├── LICENSE ├── Net_archs.py ├── Net_deploy.py ├── README.md ├── Train.py ├── data_loader_fsnet.py ├── gcn3d.py ├── prepare_data ├── gen_pts.py ├── imgs │ ├── 3140-teaser.gif │ ├── 3DGC.png │ ├── B.gif │ ├── M.gif │ ├── XZ.gif │ ├── Y.gif │ ├── arch.png │ ├── lap_green.gif │ └── lap_red.gif ├── inout.py ├── misc.py ├── renderer.py ├── renderer_py.py └── transform.py ├── pyTorchChamferDistance ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── chamfer_distance.cpython-36.pyc ├── chamfer_distance.cpp ├── chamfer_distance.cu └── chamfer_distance.py ├── requirements.txt ├── uti_tool.py └── yolov3_fsnet ├── detect_fsnet.py ├── models ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── common.cpython-36.pyc │ ├── experimental.cpython-36.pyc │ └── yolo.cpython-36.pyc ├── common.py ├── experimental.py ├── export.py ├── yolo.py ├── yolov3-spp.yaml ├── yolov3-tiny.yaml └── yolov3.yaml └── utils ├── __init__.py ├── __pycache__ ├── __init__.cpython-36.pyc ├── autoanchor.cpython-36.pyc ├── datasets.cpython-36.pyc ├── general.cpython-36.pyc ├── google_utils.cpython-36.pyc ├── metrics.cpython-36.pyc ├── plots.cpython-36.pyc └── torch_utils.cpython-36.pyc ├── activations.py ├── autoanchor.py ├── aws ├── __init__.py ├── mime.sh ├── resume.py └── userdata.sh ├── datasets.py ├── general.py ├── google_app_engine ├── Dockerfile ├── additional_requirements.txt └── app.yaml ├── google_utils.py ├── loss.py ├── metrics.py ├── plots.py ├── torch_utils.py └── wandb_logging ├── __init__.py ├── log_dataset.py └── wandb_utils.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Wei Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Net_archs.py: -------------------------------------------------------------------------------- 1 | # @Time : 06/05/2021 2 | # @Author : Wei Chen 3 | # @Project : Pycharm 4 | import torch.nn as nn 5 | import gcn3d 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | 10 | class GCN3D_segR(nn.Module): 11 | def __init__(self, class_num,vec_num, support_num, neighbor_num): 12 | super(GCN3D_segR, self).__init__() 13 | self.neighbor_num = neighbor_num 14 | 15 | self.conv_0 = gcn3d.Conv_surface(kernel_num= 128, support_num= support_num) 16 | self.conv_1 = gcn3d.Conv_layer(128, 128, support_num= support_num) 17 | self.pool_1 = gcn3d.Pool_layer(pooling_rate= 4, neighbor_num= 4) 18 | self.conv_2 = gcn3d.Conv_layer(128, 256, support_num= support_num) 19 | self.conv_3 = gcn3d.Conv_layer(256, 256, support_num= support_num) 20 | self.pool_2 = gcn3d.Pool_layer(pooling_rate= 4, neighbor_num= 4) 21 | self.conv_4 = gcn3d.Conv_layer(256, 512, support_num= support_num) 22 | 23 | self.bn1 = nn.BatchNorm1d(128) 24 | self.bn2 = nn.BatchNorm1d(256) 25 | self.bn3 = nn.BatchNorm1d(256) 26 | 27 | 28 | self.classnum = class_num 29 | self.vecnum = vec_num*3 30 | dim_fuse = sum([128, 128, 256, 256, 512, 512, 16]) 31 | self.conv1d_block = nn.Sequential( 32 | nn.Conv1d(dim_fuse, 512, 1), 33 | nn.ReLU(inplace= True), 34 | nn.Conv1d(512, 512, 1), 35 | nn.ReLU(inplace= True), 36 | nn.Conv1d(512, class_num+vec_num*3, 1), 37 | ) 38 | 39 | def forward(self, 40 | vertices: "tensor (bs, vetice_num, 3)", 41 | onehot: "tensor (bs, cat_num)"): 42 | """ 43 | Return: (bs, vertice_num, class_num) 44 | """ 45 | 46 | bs, vertice_num, _ = vertices.size() 47 | 48 | neighbor_index = gcn3d.get_neighbor_index(vertices, self.neighbor_num) 49 | # ss = time.time() 50 | fm_0 = F.relu(self.conv_0(neighbor_index, vertices), inplace= True) 51 | 52 | 53 | fm_1 = F.relu(self.bn1(self.conv_1(neighbor_index, vertices, fm_0).transpose(1,2)).transpose(1,2), inplace= True) 54 | v_pool_1, fm_pool_1 = self.pool_1(vertices, fm_1) 55 | # neighbor_index = gcn3d.get_neighbor_index(v_pool_1, self.neighbor_num) 56 | neighbor_index = gcn3d.get_neighbor_index(v_pool_1, 57 | min(self.neighbor_num, v_pool_1.shape[1] // 8)) 58 | fm_2 = F.relu(self.bn2(self.conv_2(neighbor_index, v_pool_1, fm_pool_1).transpose(1,2)).transpose(1,2), inplace= True) 59 | fm_3 = F.relu(self.bn3(self.conv_3(neighbor_index, v_pool_1, fm_2).transpose(1,2)).transpose(1,2), inplace= True) 60 | v_pool_2, fm_pool_2 = self.pool_2(v_pool_1, fm_3) 61 | # neighbor_index = gcn3d.get_neighbor_index(v_pool_2, self.neighbor_num) 62 | neighbor_index = gcn3d.get_neighbor_index(v_pool_2, min(self.neighbor_num, 63 | v_pool_2.shape[1] // 8)) 64 | fm_4 = self.conv_4(neighbor_index, v_pool_2, fm_pool_2) 65 | f_global = fm_4.max(1)[0] #(bs, f) 66 | 67 | nearest_pool_1 = gcn3d.get_nearest_index(vertices, v_pool_1) 68 | nearest_pool_2 = gcn3d.get_nearest_index(vertices, v_pool_2) 69 | fm_2 = gcn3d.indexing_neighbor(fm_2, nearest_pool_1).squeeze(2) 70 | fm_3 = gcn3d.indexing_neighbor(fm_3, nearest_pool_1).squeeze(2) 71 | fm_4 = gcn3d.indexing_neighbor(fm_4, nearest_pool_2).squeeze(2) 72 | f_global = f_global.unsqueeze(1).repeat(1, vertice_num, 1) 73 | onehot = onehot.unsqueeze(1).repeat(1, vertice_num, 1) #(bs, vertice_num, cat_one_hot) 74 | 75 | feat = torch.cat([fm_0, fm_1, fm_2, fm_3, fm_4,onehot], dim= 2) 76 | fm_fuse = torch.cat([fm_0, fm_1, fm_2, fm_3, fm_4, f_global, onehot], dim= 2) 77 | 78 | conv1d_input = fm_fuse.permute(0, 2, 1) #(bs, fuse_ch, vertice_num) 79 | conv1d_out = self.conv1d_block(conv1d_input) 80 | pred = conv1d_out.permute(0, 2, 1) #(bs, vertice_num, ch) ## B N 50? 81 | seg = pred[:,:,0:self.classnum] 82 | vecs = pred[:,:, self.classnum:self.classnum+self.vecnum] 83 | return seg, vecs, feat 84 | 85 | 86 | class Point_center(nn.Module): 87 | def __init__(self): 88 | super(Point_center, self).__init__() 89 | 90 | # self.conv1 = torch.nn.Conv2d(12, 64, 1) ##c 91 | self.conv1 = torch.nn.Conv1d(3, 128, 1) ## no c 92 | self.conv2 = torch.nn.Conv1d(128, 256, 1) 93 | 94 | ##here 95 | self.conv3 = torch.nn.Conv1d(256, 512, 1) 96 | self.conv4 = torch.nn.Conv1d(512, 1024, 1) 97 | 98 | # self.conv4 = torch.nn.Conv1d(1024,1024,1) 99 | 100 | self.bn1 = nn.BatchNorm1d(128) 101 | self.bn2 = nn.BatchNorm1d(256) 102 | self.bn3 = nn.BatchNorm1d(512) 103 | 104 | # self.bn4 = nn.BatchNorm1d(1024) 105 | # self.global_feat = global_feat 106 | 107 | def forward(self, x,obj):## 5 6 30 1000 108 | batchsize = x.size()[0] 109 | n_pts = x.size()[2] 110 | 111 | x = F.relu(self.bn1(self.conv1(x))) ## 5 64 30 1000 112 | x = F.relu(self.bn2(self.conv2(x))) ## 5 64 1 1000 113 | x = (self.bn3(self.conv3(x))) 114 | # x = F.relu(self.bn4(self.conv4(x))) 115 | x2 = torch.max(x, -1, keepdim=True)[0]#5 512 1 116 | # x2=torch.mean(x, -1, keepdim=True) 117 | obj = obj.view(-1, 1) 118 | one_hot = torch.zeros(batchsize, 16).scatter_(1, obj.cpu().long(), 1) 119 | # print(one_hot[1,:]) 120 | if torch.cuda.is_available(): 121 | one_hot = one_hot.cuda() 122 | one_hot2 = one_hot.unsqueeze(2) 123 | return torch.cat([x2, one_hot2],1) 124 | # 125 | # return x2 126 | # return pointfeat2 127 | 128 | class Point_center_res_cate(nn.Module): 129 | def __init__(self): 130 | super(Point_center_res_cate, self).__init__() 131 | 132 | # self.feat = Point_vec_edge() 133 | self.feat = Point_center() 134 | self.conv1 = torch.nn.Conv1d(512+16, 256,1) 135 | self.conv2 = torch.nn.Conv1d(256, 128,1) 136 | # self.drop1 = nn.Dropout(0.1) 137 | self.conv3 = torch.nn.Conv1d(128, 6,1 ) 138 | 139 | 140 | self.bn1 = nn.BatchNorm1d(256) 141 | self.bn2 = nn.BatchNorm1d(128) 142 | self.drop1 = nn.Dropout(0.2) 143 | 144 | def forward(self, x, obj): 145 | batchsize = x.size()[0] 146 | n_pts = x.size()[2] 147 | # print(x.size()) 148 | # tes 149 | x = self.feat(x, obj) ## Bx1024x1xN 150 | T_feat = x 151 | # x=x.squeeze(2) 152 | 153 | x = F.relu(self.bn1(self.conv1(x))) 154 | x = (self.bn2(self.conv2(x))) 155 | 156 | x=self.drop1(x) 157 | x = self.conv3(x) 158 | 159 | 160 | 161 | x = x.squeeze(2) 162 | x=x.contiguous()##Bx6 163 | xt = x[:,0:3] 164 | xs = x[:,3:6] 165 | 166 | return xt,xs 167 | 168 | class Rot_green(nn.Module): 169 | def __init__(self, k=24,F=1036): 170 | super(Rot_green, self).__init__() 171 | self.f=F 172 | self.k = k 173 | 174 | 175 | self.conv1 = torch.nn.Conv1d(self.f , 1024, 1) 176 | 177 | self.conv2 = torch.nn.Conv1d(1024, 256, 1) 178 | self.conv3 = torch.nn.Conv1d(256,256,1) 179 | self.conv4 = torch.nn.Conv1d(256,self.k,1) 180 | self.drop1 = nn.Dropout(0.2) 181 | self.bn1 = nn.BatchNorm1d(1024) 182 | self.bn2 = nn.BatchNorm1d(256) 183 | self.bn3 = nn.BatchNorm1d(256) 184 | 185 | 186 | def forward(self, x): 187 | 188 | x = F.relu(self.bn1(self.conv1(x))) 189 | x = F.relu(self.bn2(self.conv2(x))) 190 | 191 | x = torch.max(x, 2, keepdim=True)[0] 192 | 193 | x = F.relu(self.bn3(self.conv3(x))) 194 | x=self.drop1(x) 195 | x = self.conv4(x) 196 | 197 | x=x.squeeze(2) 198 | x = x.contiguous() 199 | 200 | 201 | return x 202 | 203 | 204 | class Rot_red(nn.Module): 205 | def __init__(self, k=24,F=1036): 206 | super(Rot_red, self).__init__() 207 | self.f=F 208 | self.k = k 209 | 210 | self.conv1 = torch.nn.Conv1d(self.f , 1024, 1) 211 | self.conv2 = torch.nn.Conv1d(1024, 256, 1) 212 | self.conv3 = torch.nn.Conv1d(256,256,1) 213 | self.conv4 = torch.nn.Conv1d(256,self.k,1) 214 | self.drop1 = nn.Dropout(0.2) 215 | self.bn1 = nn.BatchNorm1d(1024) 216 | self.bn2 = nn.BatchNorm1d(256) 217 | self.bn3 = nn.BatchNorm1d(256) 218 | 219 | 220 | def forward(self, x): 221 | 222 | x = F.relu(self.bn1(self.conv1(x))) 223 | x = F.relu(self.bn2(self.conv2(x))) 224 | 225 | x = torch.max(x, 2, keepdim=True)[0] 226 | 227 | x = F.relu(self.bn3(self.conv3(x))) 228 | x=self.drop1(x) 229 | x = self.conv4(x) 230 | 231 | x=x.squeeze(2) 232 | x = x.contiguous() 233 | 234 | 235 | return x -------------------------------------------------------------------------------- /Net_deploy.py: -------------------------------------------------------------------------------- 1 | # @Time : 11/05/2021 2 | # @Author : Wei Chen 3 | # @Project : Pycharm 4 | 5 | from __future__ import print_function 6 | 7 | 8 | import os 9 | from uti_tool import compute_3d_IoU 10 | import argparse 11 | import numpy as np 12 | from Net_archs import GCN3D_segR, Rot_green, Rot_red, Point_center_res_cate 13 | import torch 14 | import torch.nn as nn 15 | import cv2 16 | 17 | from uti_tool import load_ply, draw_cors_withsize, draw_cors, get_3D_corner, trans_3d, gettrans,get6dpose1 18 | 19 | def load_models(cat): 20 | classifier_seg3D = GCN3D_segR(class_num=2, vec_num = 1,support_num= 7, neighbor_num= 10) 21 | classifier_ce = Point_center_res_cate() ## translation estimation 22 | classifier_Rot_red = Rot_red(F=1296, k= 6) ## rotation red 23 | classifier_Rot_green = Rot_green(F=1296, k=6)### rotation green 24 | 25 | 26 | # optimizer = optim.SGD(classifier.parameters(), lr=0.01, momentum=0.9) 27 | 28 | classifier_seg3D = nn.DataParallel(classifier_seg3D) 29 | classifier_ce = nn.DataParallel(classifier_ce) 30 | classifier_Rot_red = nn.DataParallel(classifier_Rot_red) 31 | classifier_Rot_green = nn.DataParallel(classifier_Rot_green) 32 | 33 | 34 | classifier_seg3D = classifier_seg3D.eval() 35 | classifier_ce = classifier_ce.eval() 36 | classifier_Rot_red = classifier_Rot_red.eval() 37 | classifier_Rot_green = classifier_Rot_green.eval() 38 | # 39 | 40 | classifier_seg3D.cuda() 41 | classifier_ce.cuda() 42 | classifier_Rot_green.cuda() 43 | classifier_Rot_red.cuda() 44 | 45 | outf = 'trained_models/' 46 | 47 | Seg3d = '%s/Seg3D_last_obj%s.pth' % (outf, cat) 48 | Tes = '%s/Tres_last_obj%s.pth' % (outf, cat) 49 | Rot = '%s/Rot_g_last_obj%s.pth' % (outf, cat) 50 | Rot_res = '%s/Rot_r_last_obj%s.pth' % (outf, cat) 51 | 52 | classifier_seg3D.load_state_dict(torch.load(Seg3d)) 53 | classifier_ce.load_state_dict(torch.load(Tes)) 54 | classifier_Rot_green.load_state_dict(torch.load(Rot)) 55 | classifier_Rot_red.load_state_dict(torch.load(Rot_res)) 56 | model_sizes = np.array( 57 | [[87, 220, 89], [165, 80, 165], [88, 128, 156], [68, 146, 72], [346, 200, 335], [146, 83, 114]]) ## 6x3 58 | 59 | cats = ['bottle', 'bowl', 'camera', 'can', 'laptop', 'mug'] 60 | cate_id0 = np.where(np.array(cats) == cat)[0][0] 61 | model_size = model_sizes[cate_id0] 62 | 63 | return classifier_seg3D, classifier_ce, classifier_Rot_green,classifier_Rot_red, model_size,cate_id0 64 | def FS_Net_Test(points, pc, rgb, Rt, Tt, classifier_seg3D, classifier_ce, classifier_Rot_green,classifier_Rot_red, 65 | cat, model_size,cate_id0,num_cor=3): 66 | 67 | OR, x_r, y_r, z_r = get_3D_corner(pc) 68 | points = torch.from_numpy(points).unsqueeze(0) 69 | 70 | Rt0 = Rt[0].numpy() 71 | Tt = Tt[0].numpy().reshape(3,1) 72 | 73 | ptsori = points.clone() 74 | points= points.numpy().copy() 75 | 76 | res = np.mean(points[0],0) 77 | points[0, :, 0:3] = points[0, :, 0:3] - np.array([res[0], res[1], res[2]]) 78 | 79 | 80 | points = torch.from_numpy(points).cuda() 81 | 82 | pointsf = points[:, :, 0:3].unsqueeze(2) ##128 1500 1 12 83 | 84 | points = pointsf.transpose(3, 1) 85 | points_n = pointsf.squeeze(2) 86 | 87 | obj_idh = torch.zeros((1, 1)) 88 | 89 | if obj_idh.shape[0] == 1: 90 | obj_idh = obj_idh.view(-1, 1).repeat(points.shape[0], 1) 91 | else: 92 | obj_idh = obj_idh.view(-1, 1) 93 | 94 | one_hot = torch.zeros(points.shape[0], 16).scatter_(1, obj_idh.cpu().long(), 1) 95 | 96 | one_hot = one_hot.cuda() 97 | 98 | pred_seg, point_recon, feavecs = classifier_seg3D(points_n, one_hot) 99 | 100 | pred_choice = pred_seg.data.max(2)[1] 101 | 102 | p = pred_choice 103 | 104 | ptsori=ptsori.cuda() 105 | pts_ = torch.index_select(ptsori[0, :, 0:3], 0, p[0,:].nonzero()[:,0]) ##Nx3 106 | 107 | feat = torch.index_select(feavecs[0, :, :], 0, p[0, :].nonzero()[:, 0]) 108 | 109 | if len(pts_)<10: 110 | print('No object pts') 111 | else: 112 | pts_s = pts_[:, :].unsqueeze(0).float() 113 | # print(ib) 114 | 115 | # p[0, 10:31] 116 | # feas = torch.index_select(feass[ib, :, :], 0, indexs[ib, :].nonzero()[:, 0]) 117 | 118 | if num_cor == 3: 119 | corners0 = torch.Tensor(np.array([[0, 0, 0], [0, 200, 0], [200, 0, 0]])) 120 | else: 121 | corners0 = torch.Tensor(np.array([[0, 0, 0], [0, 200, 0]])) 122 | 123 | pts_s=pts_s.cuda() 124 | feat = feat.cuda() 125 | corners0 = corners0.cuda() 126 | 127 | 128 | pts_s=pts_s.transpose(2,1) 129 | 130 | cen_pred,obj_size = classifier_ce((pts_s - pts_s.mean(dim=2, keepdim=True)),torch.Tensor([cate_id0])) 131 | T_pred = pts_s.mean(dim=2, keepdim=True) + cen_pred.unsqueeze(2) ## 1x3x1 132 | 133 | 134 | # feavec = torch.cat([box_pred, feat.unsqueeze(0)], 2) ## 135 | feavec = feat.unsqueeze(0).transpose(1, 2) 136 | kp_m = classifier_Rot_green(feavec) 137 | 138 | if num_cor == 3: 139 | corners_ = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]]) 140 | else: 141 | corners_ = np.array([[0, 0, 0], [0, 1, 0]]) 142 | 143 | 144 | 145 | kpm_gt = (trans_3d(corners_, Rt0, np.array([0, 0, 0]).T).T).flatten() 146 | 147 | 148 | 149 | bbx_3D = model_size+obj_size.detach().cpu().numpy() 150 | model_3D = np.array([x_r, y_r, z_r]) 151 | 152 | 153 | 154 | box_pred_gan = classifier_Rot_red(feat.unsqueeze(0).transpose(1, 2)) 155 | 156 | pred_axis = np.zeros((num_cor,3)) 157 | 158 | pred_axis[0:2,:] = kp_m.view((2, 3)).detach().cpu().numpy() 159 | if num_cor==3: 160 | pred_axis[2,:] = box_pred_gan.view((2, 3)).detach().cpu().numpy()[1,:] 161 | 162 | box_pred_gan=box_pred_gan.detach().cpu().numpy() 163 | box_pred_gan = box_pred_gan / np.linalg.norm(box_pred_gan) 164 | cor0 = corners0.cpu().numpy() 165 | cor0= cor0/np.linalg.norm(cor0) 166 | kpm_gt = kpm_gt.reshape((num_cor,3)) 167 | kpm_gt = kpm_gt/np.linalg.norm(kpm_gt) 168 | 169 | 170 | pred_axis = pred_axis/np.linalg.norm(pred_axis) 171 | 172 | pose_gt = gettrans(cor0.reshape((num_cor, 3)), kpm_gt.reshape((num_cor, 1, 3))) 173 | Rt = pose_gt[0][0:3, 0:3] 174 | 175 | pose = gettrans(cor0.reshape((num_cor, 3)), pred_axis.reshape((num_cor, 1, 3))) 176 | R = pose[0][0:3, 0:3] 177 | 178 | 179 | T = (pts_s.mean(dim=2, keepdim=True) + cen_pred.unsqueeze(2)).view(1,3).detach().cpu().numpy() 180 | # T = res[0:3]+( cen_pred.unsqueeze(2)).view(1, 3).detach().cpu().numpy() 181 | #noise_batch_drop_numofloss_loss__cls_model_epoch.pth 182 | torch.cuda.empty_cache() 183 | 184 | show = 1 185 | if show == 1: 186 | R_loss, T_loss = get6dpose1(Rt, Tt, R, T, cat) 187 | size_2 = bbx_3D.reshape(3) 188 | K = np.array([[591.0125, 0, 322.525], [0, 590.16775, 244.11084], [0, 0, 1]]) 189 | 190 | rgb0 = rgb 191 | rgb0 = draw_cors(rgb0, pc, K, Rt, Tt, [255, 255, 255]) 192 | rgb0 = draw_cors_withsize(rgb0, K, R, T, [255, 0, 0], xr=size_2[0], yr=size_2[1], zr=size_2[2]) 193 | font = cv2.FONT_HERSHEY_SIMPLEX 194 | cv2.putText(rgb0, 'R_loss: %s' % (R_loss), (10, 20), font, 0.5, (0, 0, 0), 1, 0) 195 | cv2.putText(rgb0, 'T_loss(mm): %s' % (T_loss), (10, 40), font, 0.5, (0, 0, 0), 1, 0) 196 | cv2.imshow('show', rgb0 / 255) 197 | cv2.waitKey(10) 198 | eva = 1 199 | # if eva==1: 200 | # 201 | # sRT_1 = np.eye(4) 202 | # sRT_1[0:3, 0:3] = Rt 203 | # sRT_1[0:3, 3:4] = Tt 204 | # sRT_2 = np.eye(4) 205 | # sRT_2[0:3, 0:3] = R 206 | # sRT_2[0:3, 3:4] = T.reshape(3,1) 207 | # size_2= bbx_3D.reshape(3) 208 | # size_1 = model_3D 209 | # 210 | # # size_2 = size_1 211 | # class_name_1 = cat 212 | # class_name_2 = cat 213 | # iou3d = compute_3d_IoU(sRT_1, sRT_2, size_1, size_2, class_name_1, class_name_2, 214 | # handle_visibility=1) 215 | # 216 | # return iou3d, R_loss, T_loss 217 | 218 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Category-Level 6D Pose Estimation 2 | 3 | This code is for our CVPR2021 oral paper: FS-Net: Fast Shape-based Network for Category-Level 6D Object Pose Estimation with Decoupled Rotation Mechanism. If you have any questions, please leave your comments or email me. 4 | ## Experiment setup 5 | 6 | OS: Ubuntu 16.04 7 | 8 | GPU: 1080 Ti 9 | 10 | Programme language: Python 3.6, Pytorch. 11 | 12 | If you find our paper [link1(arXiv)](http://arxiv.org/abs/2103.07054) [link2(CVF)](https://openaccess.thecvf.com/content/CVPR2021/papers/Chen_FS-Net_Fast_Shape-Based_Network_for_Category-Level_6D_Object_Pose_Estimation_CVPR_2021_paper.pdf) or code is useful, please cite our paper: 13 | 14 | @InProceedings{Chen_2021_CVPR, 15 | author = {Chen, Wei and Jia, Xi and Chang, Hyung Jin and Duan, Jinming and Linlin, Shen and Leonardis, Ales}, 16 | title = {FS-Net: Fast Shape-based Network for Category-Level 6D Object Pose Estimation with Decoupled Rotation Mechanism}, 17 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 18 | month = {June}, 19 | year = {2021}, 20 | pages = {1581-1590} 21 | } 22 | 23 | ## Contributions 24 | Our framework is built on our previous work [G2L-Net](https://github.com/DC1991/G2L_Net), with the following Contributions: 25 | 26 | 1. New latent feature learning 27 | >>> [3D graph convolution](https://github.com/j1a0m0e4sNTU/3dgcn/issues) based observed points reconstruction(orientation preserved; green are reconstructed, yellow are observed) 28 | 29 | 2. New rotation representation 30 | >>> Decomposable vector-based rotation representation 31 | 32 | 33 | 34 | 35 | 3. New 3D data augmentation 36 | >>> Box-cage based, online 3D data augmentation 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | ## Pre requirements 45 | 46 | You can find the main requirements in 'requirement.txt'. 47 | 48 | ### Trained model and sample data 49 | >>Please download the data.zip [here](https://drive.google.com/file/d/15efs1IIjbRnWIlh-9sXMfbqyL4S08bEG/view?usp=sharing 50 | >), and the unzip the 'trained_model.zip' under 'yolov3_fsnet/' folder and 51 | 'test_scene_1 52 | .zip' under 'yolov3_fsnet/data/' folder. 53 | 54 | >>The trained model for YOLOv3 will be downloaded automatically. We use the 'yolov5l.pt' provided by this [git](https://github.com/ultralytics/yolov3). According to our test, this trained model works well for category 'laptop', you 55 | > may need to re-train the 2D detect model for other categories. 56 | 57 | ## Demo 58 | 59 | python yolov3_fsnet/detect_fsnet.py 60 | please note: The code is created and debugged in Pycharm, therefore you may need to change the import head in other 61 | python IDE. 62 | ## Training 63 | Please note, some details are changed from the original paper for more efficient training. 64 | ### Data Preparation 65 | To generate your own dataset, first use the data preprocess code provided in this [git](https://github.com/mentian/object-deformnet/blob/master/preprocess/pose_data.py), and then use the code 66 | provided in 'gen_pts.py'. The render function is borrowed from [BOP](https://github.com/thodan/bop_toolkit), please 67 | refer to that git if you have problems with rendering. 68 | 69 | ### Training FS_Net 70 | #### YOLOv3 Training 71 | For 2D detection training part, please refer to this [git](https://github.com/ultralytics/yolov3) 72 | #### FS_Net Training 73 | After the data preparation, run the Train.py to train your own model. 74 | 75 | 76 | ## Acknowledgment 77 | We borrow some off-the-shelf codes from [3dgcn](https://github.com/j1a0m0e4sNTU/3dgcn), [YOLOv3](https://github.com/ultralytics/yolov3), and [BOP](https://github.com/thodan/bop_toolkit). Thanks for the authors' work. 78 | -------------------------------------------------------------------------------- /Train.py: -------------------------------------------------------------------------------- 1 | # @Time : 12/05/2021 2 | # @Author : Wei Chen 3 | # @Project : Pycharm 4 | 5 | 6 | 7 | from __future__ import print_function 8 | 9 | import os 10 | import argparse 11 | import torch.optim as optim 12 | from torch.autograd import Variable 13 | 14 | import torch 15 | from Net_archs import GCN3D_segR, Rot_green, Rot_red, Point_center_res_cate 16 | from data_loader_fsnet import load_pts_train_cate 17 | import torch.nn as nn 18 | import numpy as np 19 | import time 20 | from uti_tool import data_augment 21 | 22 | from pyTorchChamferDistance.chamfer_distance import ChamferDistance 23 | 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--batchSize', type=int, default=14, help='input batch size') 26 | parser.add_argument('--workers', type=int, help='number of data loading workers', default=4) 27 | parser.add_argument('--nepoch', type=int, default=50, help='number of epochs to train for') 28 | parser.add_argument('--outf', type=str, default='models', help='output folder') 29 | parser.add_argument('--outclass', type=int, default=2, help='point class') 30 | parser.add_argument('--model', type=str, default='', help='model path') 31 | 32 | opt = parser.parse_args() 33 | 34 | 35 | kc = opt.outclass 36 | num_cor = 3 37 | num_vec = 8 38 | nw=0 # number of cpu 39 | localtime = (time.localtime(time.time())) 40 | year = localtime.tm_year 41 | month = localtime.tm_mon 42 | day = localtime.tm_mday 43 | hour = localtime.tm_hour 44 | 45 | cats = ['bottle','bowl','can','camera','laptop','mug'] 46 | 47 | for cat in ['laptop']: 48 | 49 | 50 | classifier_seg3D = GCN3D_segR(class_num=2, vec_num = 1,support_num= 7, neighbor_num= 10) 51 | classifier_ce = Point_center_res_cate() ## translation estimation 52 | classifier_Rot_red = Rot_red(F=1296, k= 6) ## rotation red 53 | classifier_Rot_green = Rot_green(F=1296, k=6)### rotation green 54 | 55 | 56 | num_classes = opt.outclass 57 | 58 | Loss_seg3D = nn.CrossEntropyLoss() 59 | Loss_func_ce = nn.MSELoss() 60 | Loss_func_Rot1 = nn.MSELoss() 61 | Loss_func_Rot2 = nn.MSELoss() 62 | Loss_func_s = nn.MSELoss() 63 | 64 | 65 | 66 | 67 | classifier_seg3D = nn.DataParallel(classifier_seg3D) 68 | classifier_ce = nn.DataParallel(classifier_ce) 69 | classifier_Rot_red = nn.DataParallel(classifier_Rot_red) 70 | classifier_Rot_green = nn.DataParallel(classifier_Rot_green) 71 | 72 | 73 | classifier_seg3D = classifier_seg3D.train() 74 | classifier_ce = classifier_ce.train() 75 | classifier_Rot_red = classifier_Rot_red.train() 76 | classifier_Rot_green = classifier_Rot_green.train() 77 | 78 | 79 | 80 | Loss_seg3D.cuda() 81 | Loss_func_ce.cuda() 82 | Loss_func_Rot1.cuda() 83 | Loss_func_Rot2.cuda() 84 | Loss_func_s.cuda() 85 | 86 | classifier_seg3D.cuda() 87 | classifier_ce.cuda() 88 | classifier_Rot_red.cuda() 89 | classifier_Rot_green.cuda() 90 | 91 | 92 | opt.outf = 'models/FS_Net_%s'%(cat) 93 | try: 94 | os.makedirs(opt.outf) 95 | except OSError: 96 | pass 97 | 98 | sepoch = 0 99 | 100 | batch_size = 12 # 101 | 102 | lr = 0.001 103 | 104 | epochs = opt.nepoch 105 | 106 | optimizer = optim.Adam([{'params': classifier_seg3D.parameters()},{'params': classifier_ce.parameters()},{'params': classifier_Rot_red.parameters()},{'params': classifier_Rot_green.parameters()}], lr=lr, betas=(0.9, 0.99)) 107 | 108 | bbxs = 0 109 | K = np.array([[591.0125, 0, 322.525], [0, 590.16775, 244.11084], [0, 0, 1]]) 110 | 111 | data_path = 'your data path' 112 | dataloader = load_pts_train_cate(data_path, batch_size, K,cat, lim=1, rad=300, shuf=True, drop=True, corners=0,nw=nw) 113 | 114 | for epoch in range(sepoch,epochs): 115 | 116 | if epoch > 0 and epoch % (epochs // 5) == 0: 117 | lr = lr / 4 118 | 119 | 120 | optimizer.param_groups[0]['lr'] = lr 121 | optimizer.param_groups[1]['lr'] = lr * 10 122 | optimizer.param_groups[2]['lr'] = lr * 20 123 | optimizer.param_groups[3]['lr'] = lr * 20 124 | 125 | for i, data in enumerate(dataloader): 126 | 127 | points, target_, Rs, Ts, obj_id,S, imgp= data['points'], data['label'], data['R'], data['T'], data['cate_id'], data['scale'], data['dep'] 128 | ptsori = points.clone() 129 | 130 | target_seg = target_[:, :, 0] ###seg_target 131 | 132 | points_ = points.numpy().copy() 133 | 134 | points, corners, centers, pts_recon = data_augment(points_[:, :, 0:3], Rs, Ts,num_cor, target_seg,a=15.0) 135 | 136 | points, target_seg, pts_recon = Variable(torch.Tensor(points)), Variable(target_seg), Variable(pts_recon) 137 | 138 | points, target_seg,pts_recon = points.cuda(), target_seg.cuda(), pts_recon.cuda() 139 | 140 | pointsf = points[:, :, 0:3].unsqueeze(2) 141 | 142 | optimizer.zero_grad() 143 | points = pointsf.transpose(3, 1) 144 | points_n = pointsf.squeeze(2) 145 | 146 | obj_idh = torch.zeros((1,1)) 147 | 148 | if obj_idh.shape[0] == 1: 149 | obj_idh = obj_idh.view(-1, 1).repeat(points.shape[0], 1) 150 | else: 151 | obj_idh = obj_idh.view(-1, 1) 152 | 153 | one_hot = torch.zeros(points.shape[0], 16).scatter_(1, obj_idh.cpu().long(), 1) 154 | one_hot = one_hot.cuda() ## the pre-defined category ID 155 | 156 | 157 | 158 | pred_seg, box_pred_, feavecs = classifier_seg3D(points_n, one_hot) 159 | 160 | 161 | pred_choice = pred_seg.data.max(2)[1] ## B N 162 | # print(pred_choice[0]) 163 | p = pred_choice # [0].cpu().numpy() B N 164 | N_seg = 1000 165 | pts_s = torch.zeros(points.shape[0], N_seg, 3) 166 | 167 | box_pred = torch.zeros(points.shape[0], N_seg, 3) 168 | 169 | 170 | pts_sv = torch.zeros(points.shape[0], N_seg, 3) 171 | 172 | feat = torch.zeros(points.shape[0], N_seg, feavecs.shape[2]) 173 | 174 | 175 | corners0 = torch.zeros((points.shape[0], num_cor, 3)) 176 | if torch.cuda.is_available(): 177 | ptsori = ptsori.cuda() 178 | 179 | Tt = np.zeros((points.shape[0], 3)) 180 | for ib in range(points.shape[0]): 181 | if len(p[ib, :].nonzero()) < 10: 182 | continue 183 | 184 | pts_ = torch.index_select(ptsori[ib, :, 0:3], 0, p[ib, :].nonzero()[:, 0]) ##Nx3 185 | 186 | 187 | box_pred__ = torch.index_select(box_pred_[ib, :, :], 0, p[ib, :].nonzero()[:, 0]) 188 | feavec_ = torch.index_select(feavecs[ib, :, :], 0, p[ib, :].nonzero()[:, 0]) 189 | 190 | choice = np.random.choice(len(pts_), N_seg, replace=True) 191 | pts_s[ib, :, :] = pts_[choice, :] 192 | 193 | box_pred[ib] = box_pred__[choice] 194 | feat[ib, :, :] = feavec_[choice, :] 195 | 196 | corners0[ib] = torch.Tensor(np.array([[0,0,0],[0,200,0],[200,0,0]])) 197 | 198 | 199 | 200 | 201 | 202 | 203 | pts_s = pts_s.cuda() 204 | 205 | 206 | 207 | pts_s = pts_s.transpose(2, 1) 208 | cen_pred,obj_size = classifier_ce((pts_s - pts_s.mean(dim=2, keepdim=True)), obj_id) 209 | 210 | 211 | feavec = feat.transpose(1, 2) 212 | 213 | kp_m = classifier_Rot_green(feavec) 214 | 215 | 216 | centers = Variable(torch.Tensor((centers))) 217 | 218 | 219 | corners = Variable(torch.Tensor((corners))) 220 | 221 | 222 | 223 | 224 | if torch.cuda.is_available(): 225 | box_pred = box_pred.cuda() 226 | centers = centers.cuda() 227 | S = S.cuda() 228 | corners = corners.cuda() 229 | feat = feat.cuda() 230 | corners0 = corners0.cuda() 231 | 232 | loss_seg = Loss_seg3D(pred_seg.reshape(-1, pred_seg.size(-1)), target_seg.view(-1,).long()) 233 | loss_res = Loss_func_ce(cen_pred, centers.float()) 234 | 235 | loss_size = Loss_func_s(obj_size,S.float()) 236 | 237 | 238 | def loss_recon(a, b): 239 | if torch.cuda.is_available(): 240 | chamferdist = ChamferDistance() 241 | dist1, dist2 = chamferdist(a, b) 242 | loss = torch.mean(dist1) + torch.mean(dist2) 243 | else: 244 | loss=torch.Tensor([100.0]) 245 | return loss 246 | loss_vec = loss_recon(box_pred, pts_recon) 247 | 248 | 249 | 250 | kp_m2 = classifier_Rot_red(feat.transpose(1,2)) # .detach()) 251 | 252 | green_v = corners[:, 0:6].float().clone() 253 | red_v = corners[:, (0, 1, 2, 6, 7, 8)].float().clone() 254 | target = torch.tensor([[1]], dtype=torch.float).cuda() 255 | 256 | 257 | loss_rot_g= Loss_func_Rot1(kp_m, green_v) 258 | loss_rot_r = Loss_func_Rot2(kp_m2, red_v) 259 | 260 | 261 | 262 | 263 | 264 | 265 | symme=1 266 | if cat in ['bottle','bowl','can']: 267 | symme=0.0 268 | 269 | 270 | Loss = loss_seg*20.0+loss_res/20.0+loss_vec/200.0+loss_size/20.0+symme*loss_rot_r/100.0+loss_rot_g/100.0 271 | Loss.backward() 272 | optimizer.step() 273 | 274 | print(cat) 275 | print('[%d: %d] train loss_seg: %f, loss_res: %f, loss_recon: %f, loss_size: %f, loss_rot_g: %f, ' 276 | 'loss_rot_r: %f' % ( 277 | epoch, i, loss_seg.item(), loss_res.item(), loss_vec.item(), loss_size.item(), loss_rot_g.item(), 278 | loss_rot_r.item())) 279 | 280 | 281 | print() 282 | 283 | torch.save(classifier_seg3D.state_dict(), '%s/Seg3D_last_obj%s.pth' % (opt.outf, 284 | cat)) 285 | torch.save(classifier_ce.state_dict(), '%s/Tres_last_obj%s.pth' % (opt.outf, cat)) 286 | torch.save(classifier_Rot_green.state_dict(), 287 | '%s/Rot_g_last_obj%s.pth' % (opt.outf, cat)) 288 | torch.save(classifier_Rot_red.state_dict(), 289 | '%s/Rot_r_last_obj%s.pth' % (opt.outf, cat)) 290 | if epoch>0 and epoch %(epochs//5)== 0: ##save mid checkpoints 291 | 292 | torch.save(classifier_seg3D.state_dict(), '%s/Seg3D_epoch%d_obj%s.pth' % (opt.outf, 293 | epoch, cat)) 294 | torch.save(classifier_ce.state_dict(), '%s/Tres_epoch%d_obj%s.pth' % (opt.outf, epoch, cat)) 295 | torch.save(classifier_Rot_green.state_dict(), 296 | '%s/Rot_g_epoch%d_obj%s.pth' % (opt.outf, epoch, cat)) 297 | torch.save(classifier_Rot_red.state_dict(), 298 | '%s/Rot_r_epoch%d_obj%s.pth' % (opt.outf, epoch, cat)) 299 | 300 | 301 | 302 | 303 | -------------------------------------------------------------------------------- /data_loader_fsnet.py: -------------------------------------------------------------------------------- 1 | # @Time : 25/09/2020 18:02 2 | # @Author : Wei Chen 3 | # @Project : Pycharm 4 | import torch 5 | from torch.utils.data import Dataset, DataLoader 6 | import _pickle as pickle 7 | from uti_tool import * 8 | import random 9 | 10 | 11 | def getFiles(file_dir,suf): 12 | L=[] 13 | for root, dirs, files in os.walk(file_dir): 14 | #print('root: ',dirs) 15 | for file in files: 16 | if os.path.splitext(file)[1] == suf: 17 | L.append(os.path.join(root, file)) 18 | L.sort(key=lambda x:int(x[-11:-4])) 19 | return L 20 | 21 | def getDirs(file_dir): 22 | L=[] 23 | 24 | dirs = os.listdir(file_dir) 25 | 26 | return dirs 27 | 28 | 29 | def load_depth(depth_path): 30 | """ Load depth image from img_path. """ 31 | 32 | depth = cv2.imread(depth_path, -1) 33 | if len(depth.shape) == 3: 34 | # This is encoded depth image, let's convert 35 | # NOTE: RGB is actually BGR in opencv 36 | depth16 = depth[:, :, 1]*256 + depth[:, :, 2] 37 | depth16 = np.where(depth16==32001, 0, depth16) 38 | depth16 = depth16.astype(np.uint16) 39 | elif len(depth.shape) == 2 and depth.dtype == 'uint16': 40 | depth16 = depth 41 | else: 42 | assert False, '[ Error ]: Unsupported depth type.' 43 | return depth16 44 | 45 | 46 | def chooselimt(pts0, lab, zmin, zmax): 47 | 48 | 49 | pts = pts0.copy() 50 | labs = lab.copy() 51 | 52 | pts1=pts[np.where(pts[:,2] zmin)[0], :] 56 | labs = lab1[np.where(pts1[:, 2] > zmin)[0],:] 57 | 58 | return ptsn,labs 59 | 60 | def circle_iou(pts,lab, dia): 61 | # fx = K[0, 0] 62 | # ux = K[0, 2] 63 | # fy = K[1, 1] 64 | # uy = K[1, 2] 65 | a = pts[lab[:, 0] == 1, :] 66 | ptss = pts[lab[:, 0] == 1, :] 67 | idx = np.random.randint(0, a.shape[0]) 68 | 69 | zmin = max(0,ptss[idx,2]-dia) 70 | zmax = ptss[idx,2]+dia 71 | 72 | return zmin, zmax 73 | 74 | 75 | class CateDataset(Dataset): 76 | def __init__(self, root_dir, K, cate,lim=1,transform=None,corners=0, temp=None): 77 | 78 | cats = ['bottle', 'bowl', 'camera', 'can', 'laptop', 'mug'] 79 | 80 | objs = os.listdir(root_dir) 81 | self.objs_name = objs 82 | self.objs = np.zeros((len(objs),1),dtype=np.uint) 83 | 84 | for i in range(len(objs)): 85 | if cate in objs[i]: 86 | self.objs[i]=1 87 | 88 | self.cate_id = np.where(np.array(cats)==cate)[0][0]+1 89 | self.ids = np.where(self.objs==1) 90 | 91 | self.root_dir = root_dir 92 | self.lim=lim 93 | self.transform=transform 94 | self.cate = cate 95 | self.K = K 96 | self.corners = corners 97 | self.rad=temp 98 | if cate=='labtop': 99 | self.rad = 600 100 | if cate == 'bottle': 101 | self.rad = 400 102 | 103 | 104 | 105 | datapath = 'Real/train/scene_' ## file path of train scenes 106 | model_path = 'real_train/plys/' ##object model 107 | 108 | self.data = datapath 109 | self.c = random.randint(0, len(self.ids) - 1) 110 | self.model_path = model_path 111 | def __len__(self): 112 | 113 | 114 | return 1500 ## 115 | 116 | 117 | def __getitem__(self, index): 118 | 119 | 120 | c = random.randint(0, len(self.ids[0])-1) 121 | 122 | obj_id = self.ids[0][c] 123 | cate = self.objs_name[obj_id] 124 | 125 | pc = load_ply(self.model_path+'/%s.ply'%(cate))['pts']*1000.0 126 | 127 | 128 | root_dir = self.root_dir + '/%s/' % (cate) 129 | pts_ps = getFiles_ab(root_dir+'points/','.txt',-12,-4) 130 | idx = random.randint(0, len(pts_ps) - 1) 131 | pts_name = pts_ps[idx] 132 | lab_name = getFiles_ab(root_dir+'points_labs/','.txt',-12,-4)[idx] 133 | 134 | 135 | 136 | scene_id = int(pts_name[-12:-4])//1000+1 ## you can change according to your own name rules 137 | 138 | img_id = int(pts_name[-12:-4])-(scene_id-1)*1000 139 | 140 | depth_p = self.data+'%d'%(scene_id)+'/%04d_depth.png'%(img_id) 141 | label_p = self.data+'%d'%(scene_id)+'/%04d_label.pkl'%(img_id) 142 | 143 | gts = pickle.load(open(label_p, 'rb')) 144 | idin = np.where(np.array(gts['model_list']) == cate) 145 | 146 | 147 | if len(idin[0])==0: ## fix some wrong cases 148 | bbx = np.array([1,2,3,4]).reshape((1, 4)) 149 | R = np.eye(3) 150 | T = np.array([0,0,0]).reshape(1,3) 151 | else: 152 | bbx = gts['bboxes'][idin[0]].reshape((1, 4)) ## y1 x1 y2 x2 153 | R = gts['rotations'][idin[0]].reshape(3,3) 154 | T = gts['translations'][idin[0]].reshape(1,3)*1000.0 155 | 156 | self.pc = pc 157 | self.R = R 158 | self.T = T 159 | depth = cv2.imread(depth_p,-1) 160 | # pts_name = bpp + 'pose%08d.txt' % (idx) 161 | 162 | label = np.loadtxt(lab_name) 163 | 164 | 165 | label_ = label.reshape((-1, 1)) 166 | points_ = np.loadtxt(pts_name) 167 | 168 | 169 | 170 | points_, label_,sx,sy,sz = self.aug_pts_labs(depth,points_,label_,bbx) 171 | 172 | Scale = np.array([sx,sy,sz]) 173 | 174 | 175 | if points_.shape[0]!=label_.shape[0]: 176 | print(self.root_dir[idx]) 177 | 178 | choice = np.random.choice(len(points_), 2000, replace=True) 179 | points = points_[choice, :] 180 | label = label_[choice, :] 181 | 182 | sample = {'points': points, 'label': label, 'R':R, 'T':T,'cate_id':self.cate_id,'scale':Scale,'dep':depth_p} 183 | 184 | return sample 185 | 186 | def aug_pts_labs(self, depth,pts,labs,bbx): 187 | 188 | ## 2D bounding box augmentation and fast relabeling 189 | bbx_gt = [bbx[0,1], bbx[0,3],bbx[0,0],bbx[0,2]]#x1,x2, y1 , y2 190 | bbx = shake_bbx(bbx_gt) ## x1,x2,y1,y2 191 | depth, bbx_iou = depth_out_iou(depth, bbx, bbx_gt) 192 | 193 | mesh = depth_2_mesh_bbx(depth, [bbx[2], bbx[3], bbx[0], bbx[1]], self.K) 194 | mesh = mesh[np.where(mesh[:, 2] > 0.0)] 195 | mesh = mesh[np.where(mesh[:, 2] < 5000.0)] 196 | 197 | if len(mesh) > 1000: 198 | choice = np.random.choice(len(mesh), len(mesh)//2, replace=True) 199 | mesh = mesh[choice, :] 200 | 201 | pts_a, labs_a = pts_iou(pts.copy(), labs.copy(), self.K, bbx_iou) 202 | 203 | assert pts_a.shape[0]==labs_a.shape[0] 204 | 205 | if len(pts_a[labs_a[:, 0] == 1, :])<50: ## too few points in intersection region 206 | pts_=pts_a.copy() 207 | labs_ = labs_a.copy() 208 | else: 209 | pts_ = pts.copy() 210 | labs_ = labs.copy() 211 | 212 | N = pts_.shape[0] 213 | M = mesh.shape[0] 214 | mesh = np.concatenate([mesh, pts_], axis=0) 215 | label = np.zeros((M + N, 1), dtype=np.uint) 216 | label[M:M + N, 0] = labs_[:, 0] 217 | points = mesh 218 | 219 | if self.lim == 1: 220 | zmin, zmax = circle_iou(points.copy(), label.copy(), self.rad) 221 | points, label = chooselimt(points, label,zmin, zmax) 222 | 223 | 224 | 225 | ### 3D deformation 226 | Rt = get_rotation(180,0,0) 227 | self.pc = np.dot(Rt, self.pc.T).T ## the object 3D model is up-side-down along the X axis in our case, you may not need this code to reverse 228 | 229 | 230 | s = 0.8 231 | e = 1.2 232 | pointsn, ex,ey, ez,s = defor_3D(points,label, self.R, self.T, self.pc, scalex=(s, e),scalez=(s, e), 233 | scaley=(s, e), scale=(s, e), cate=self.cate) 234 | sx,sy,sz = var_2_norm(self.pc, ex, ey, ez, c=self.cate) 235 | return pointsn, label.astype(np.uint8), sx,sy,sz 236 | 237 | 238 | def load_pts_train_cate(data_path ,bat,K,cate,lim=1,rad=400,shuf=True,drop=False,corners=0,nw=0): 239 | 240 | data=CateDataset(data_path, K, cate,lim=lim,transform=None,corners=corners, temp=rad) 241 | 242 | dataloader = DataLoader(data, batch_size=bat, shuffle=shuf, drop_last=drop,num_workers=nw) 243 | 244 | return dataloader 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | -------------------------------------------------------------------------------- /gcn3d.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Author: Zhi-Hao Lin 3 | @Contact: r08942062@ntu.edu.tw 4 | @Time: 2020/03/06 5 | @Document: Basic operation/blocks of 3D-GCN 6 | """ 7 | 8 | import math 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | from uti_tool import get_rotation 13 | import time 14 | 15 | def get_neighbor_index(vertices: "(bs, vertice_num, 3)", neighbor_num: int): 16 | """ 17 | Return: (bs, vertice_num, neighbor_num) 18 | """ 19 | bs, v, _ = vertices.size() 20 | device = vertices.device 21 | inner = torch.bmm(vertices, vertices.transpose(1, 2)) #(bs, v, v) 22 | quadratic = torch.sum(vertices**2, dim= 2) #(bs, v) 23 | distance = inner * (-2) + quadratic.unsqueeze(1) + quadratic.unsqueeze(2) 24 | neighbor_index = torch.topk(distance, k= neighbor_num + 1, dim= -1, largest= False)[1] 25 | neighbor_index = neighbor_index[:, :, 1:] 26 | return neighbor_index 27 | 28 | def get_nearest_index(target: "(bs, v1, 3)", source: "(bs, v2, 3)"): 29 | """ 30 | Return: (bs, v1, 1) 31 | """ 32 | inner = torch.bmm(target, source.transpose(1, 2)) #(bs, v1, v2) 33 | s_norm_2 = torch.sum(source ** 2, dim= 2) #(bs, v2) 34 | t_norm_2 = torch.sum(target ** 2, dim= 2) #(bs, v1) 35 | d_norm_2 = s_norm_2.unsqueeze(1) + t_norm_2.unsqueeze(2) - 2 * inner 36 | nearest_index = torch.topk(d_norm_2, k= 1, dim= -1, largest= False)[1] 37 | return nearest_index 38 | 39 | def indexing_neighbor(tensor: "(bs, vertice_num, dim)", index: "(bs, vertice_num, neighbor_num)" ): 40 | """ 41 | Return: (bs, vertice_num, neighbor_num, dim) 42 | """ 43 | 44 | bs, v, n = index.size() 45 | 46 | # ss = time.time() 47 | if bs==1: 48 | # id_0 = torch.arange(bs).view(-1, 1,1) 49 | tensor_indexed = tensor[torch.Tensor([[0]]).long(), index[0]].unsqueeze(dim=0) 50 | else: 51 | id_0 = torch.arange(bs).view(-1, 1, 1).long() 52 | tensor_indexed = tensor[id_0, index] 53 | # ee = time.time() 54 | # print('tensor_indexed time: ', str(ee - ss)) 55 | return tensor_indexed 56 | 57 | def get_neighbor_direction_norm(vertices: "(bs, vertice_num, 3)", neighbor_index: "(bs, vertice_num, neighbor_num)"): 58 | """ 59 | Return: (bs, vertice_num, neighobr_num, 3) 60 | """ 61 | # ss = time.time() 62 | neighbors = indexing_neighbor(vertices, neighbor_index) # (bs, v, n, 3) 63 | 64 | neighbor_direction = neighbors - vertices.unsqueeze(2) 65 | neighbor_direction_norm = F.normalize(neighbor_direction, dim= -1) 66 | return neighbor_direction_norm.float() 67 | 68 | class Conv_surface(nn.Module): 69 | """Extract structure feafure from surface, independent from vertice coordinates""" 70 | def __init__(self, kernel_num, support_num): 71 | super().__init__() 72 | self.kernel_num = kernel_num 73 | self.support_num = support_num 74 | 75 | self.relu = nn.ReLU(inplace= True) 76 | self.directions = nn.Parameter(torch.FloatTensor(3, support_num * kernel_num)) 77 | self.initialize() 78 | 79 | def initialize(self): 80 | stdv = 1. / math.sqrt(self.support_num * self.kernel_num) 81 | self.directions.data.uniform_(-stdv, stdv) 82 | 83 | def forward(self, 84 | neighbor_index: "(bs, vertice_num, neighbor_num)", 85 | vertices: "(bs, vertice_num, 3)"): 86 | """ 87 | Return vertices with local feature: (bs, vertice_num, kernel_num) 88 | """ 89 | bs, vertice_num, neighbor_num = neighbor_index.size() 90 | # ss = time.time() 91 | neighbor_direction_norm = get_neighbor_direction_norm(vertices, neighbor_index) 92 | 93 | # R = get_rotation(0,0,0) 94 | # R = torch.from_numpy(R).cuda() 95 | # R = R.unsqueeze(0).repeat(bs,1,1).float() ## bs 3,3 96 | # vertices2 = torch.bmm(R,vertices.transpose(1,2)).transpose(2,1) 97 | # neighbor_direction_norm2 = get_neighbor_direction_norm(vertices2, neighbor_index) 98 | 99 | 100 | support_direction_norm = F.normalize(self.directions, dim= 0) #(3, s * k) 101 | 102 | theta = neighbor_direction_norm @ support_direction_norm # (bs, vertice_num, neighbor_num, s*k) 103 | 104 | theta = self.relu(theta) 105 | theta = theta.contiguous().view(bs, vertice_num, neighbor_num, self.support_num, self.kernel_num) 106 | theta = torch.max(theta, dim= 2)[0] # (bs, vertice_num, support_num, kernel_num) 107 | feature = torch.sum(theta, dim= 2) # (bs, vertice_num, kernel_num) 108 | return feature 109 | 110 | class Conv_layer(nn.Module): 111 | def __init__(self, in_channel, out_channel, support_num): 112 | super().__init__() 113 | # arguments: 114 | self.in_channel = in_channel 115 | self.out_channel = out_channel 116 | self.support_num = support_num 117 | 118 | # parameters: 119 | self.relu = nn.ReLU(inplace= True) 120 | self.weights = nn.Parameter(torch.FloatTensor(in_channel, (support_num + 1) * out_channel)) 121 | self.bias = nn.Parameter(torch.FloatTensor((support_num + 1) * out_channel)) 122 | self.directions = nn.Parameter(torch.FloatTensor(3, support_num * out_channel)) 123 | self.initialize() 124 | 125 | def initialize(self): 126 | stdv = 1. / math.sqrt(self.out_channel * (self.support_num + 1)) 127 | self.weights.data.uniform_(-stdv, stdv) 128 | self.bias.data.uniform_(-stdv, stdv) 129 | self.directions.data.uniform_(-stdv, stdv) 130 | 131 | def forward(self, 132 | neighbor_index: "(bs, vertice_num, neighbor_index)", 133 | vertices: "(bs, vertice_num, 3)", 134 | feature_map: "(bs, vertice_num, in_channel)"): 135 | """ 136 | Return: output feature map: (bs, vertice_num, out_channel) 137 | """ 138 | bs, vertice_num, neighbor_num = neighbor_index.size() 139 | neighbor_direction_norm = get_neighbor_direction_norm(vertices, neighbor_index) 140 | support_direction_norm = F.normalize(self.directions, dim= 0) 141 | theta = neighbor_direction_norm @ support_direction_norm # (bs, vertice_num, neighbor_num, support_num * out_channel) 142 | theta = self.relu(theta) 143 | theta = theta.contiguous().view(bs, vertice_num, neighbor_num, -1) 144 | # (bs, vertice_num, neighbor_num, support_num * out_channel) 145 | 146 | feature_out = feature_map @ self.weights + self.bias # (bs, vertice_num, (support_num + 1) * out_channel) 147 | feature_center = feature_out[:, :, :self.out_channel] # (bs, vertice_num, out_channel) 148 | feature_support = feature_out[:, :, self.out_channel:] #(bs, vertice_num, support_num * out_channel) 149 | 150 | # Fuse together - max among product 151 | feature_support = indexing_neighbor(feature_support, neighbor_index) # (bs, vertice_num, neighbor_num, support_num * out_channel) 152 | activation_support = theta * feature_support # (bs, vertice_num, neighbor_num, support_num * out_channel) 153 | activation_support = activation_support.view(bs,vertice_num, neighbor_num, self.support_num, self.out_channel) 154 | activation_support = torch.max(activation_support, dim= 2)[0] # (bs, vertice_num, support_num, out_channel) 155 | activation_support = torch.sum(activation_support, dim= 2) # (bs, vertice_num, out_channel) 156 | feature_fuse = feature_center + activation_support # (bs, vertice_num, out_channel) 157 | return feature_fuse 158 | 159 | class Pool_layer(nn.Module): 160 | def __init__(self, pooling_rate: int= 4, neighbor_num: int= 4): 161 | super().__init__() 162 | self.pooling_rate = pooling_rate 163 | self.neighbor_num = neighbor_num 164 | 165 | def forward(self, 166 | vertices: "(bs, vertice_num, 3)", 167 | feature_map: "(bs, vertice_num, channel_num)"): 168 | """ 169 | Return: 170 | vertices_pool: (bs, pool_vertice_num, 3), 171 | feature_map_pool: (bs, pool_vertice_num, channel_num) 172 | """ 173 | bs, vertice_num, _ = vertices.size() 174 | neighbor_index = get_neighbor_index(vertices, self.neighbor_num) 175 | neighbor_feature = indexing_neighbor(feature_map, neighbor_index) #(bs, vertice_num, neighbor_num, channel_num) 176 | pooled_feature = torch.max(neighbor_feature, dim= 2)[0] #(bs, vertice_num, channel_num) 177 | 178 | pool_num = int(vertice_num / self.pooling_rate) 179 | sample_idx = torch.randperm(vertice_num)[:pool_num] 180 | vertices_pool = vertices[:, sample_idx, :] # (bs, pool_num, 3) 181 | feature_map_pool = pooled_feature[:, sample_idx, :] #(bs, pool_num, channel_num) 182 | return vertices_pool, feature_map_pool 183 | 184 | def test(): 185 | import time 186 | bs = 8 187 | v = 1024 188 | dim = 3 189 | n = 20 190 | vertices = torch.randn(bs, v, dim) 191 | neighbor_index = get_neighbor_index(vertices, n) 192 | 193 | s = 3 194 | conv_1 = Conv_surface(kernel_num= 32, support_num= s) 195 | conv_2 = Conv_layer(in_channel= 32, out_channel= 64, support_num= s) 196 | pool = Pool_layer(pooling_rate= 4, neighbor_num= 4) 197 | 198 | print("Input size: {}".format(vertices.size())) 199 | start = time.time() 200 | f1 = conv_1(neighbor_index, vertices) 201 | print("\n[1] Time: {}".format(time.time() - start)) 202 | print("[1] Out shape: {}".format(f1.size())) 203 | start = time.time() 204 | f2 = conv_2(neighbor_index, vertices, f1) 205 | print("\n[2] Time: {}".format(time.time() - start)) 206 | print("[2] Out shape: {}".format(f2.size())) 207 | start = time.time() 208 | v_pool, f_pool = pool(vertices, f2) 209 | print("\n[3] Time: {}".format(time.time() - start)) 210 | print("[3] v shape: {}, f shape: {}".format(v_pool.size(), f_pool.size())) 211 | 212 | 213 | if __name__ == "__main__": 214 | test() 215 | -------------------------------------------------------------------------------- /prepare_data/gen_pts.py: -------------------------------------------------------------------------------- 1 | # @Time : 12/05/2021 2 | # @Author : Wei Chen 3 | # @Project : Pycharm 4 | 5 | import cv2 6 | import numpy as np 7 | import os 8 | import _pickle as pickle 9 | from uti_tool import getFiles_cate, depth_2_mesh_all, depth_2_mesh_bbx 10 | from prepare_data.renderer import create_renderer 11 | 12 | def render_pre(model_path): 13 | renderer = create_renderer(640, 480, renderer_type='python') 14 | models = getFiles_ab_cate(model_path, '.ply') #model name example: laptop_air_1_norm.ply please adjust the 15 | # corresponding functions according to the model name. 16 | objs=[] 17 | for model in models: 18 | obj = model.split('.')[1] 19 | objs.append(obj) 20 | renderer.add_object(obj, model) 21 | return renderer 22 | 23 | def getFiles_ab_cate(file_dir,suf): 24 | L=[] 25 | for root, dirs, files in os.walk(file_dir): 26 | for file in files: 27 | if file.split('.')[1] == suf: 28 | L.append(os.path.join(root, file)) 29 | return L 30 | 31 | def get_dis_all(pc,dep,dd=15): 32 | 33 | N=pc.shape[0] 34 | M=dep.shape[0] 35 | depp=np.tile(dep,(1,N)) 36 | 37 | depmm=depp.reshape((M,N,3)) 38 | delta = depmm - pc 39 | diss=np.linalg.norm(delta,2, 2) 40 | 41 | aa=np.min(diss,1) 42 | bb=aa.reshape((M,1)) 43 | 44 | ids,cc=np.where(bb[:] 0.0)] * 1000.0 61 | 62 | numbs = 6000 63 | 64 | numbs2 = 1000 65 | if VIS.shape[0] > numbs2: 66 | choice2 = np.random.choice(VIS.shape[0], numbs2, replace=False) 67 | VIS = VIS[choice2, :] 68 | 69 | 70 | filename = save_path + ("/pose%08d.txt" % (idx)) 71 | w_namei = save_pathlab + ("/lab%08d.txt" % (idx)) 72 | 73 | dep3d_ = depth_2_mesh_bbx(depth, bbx, K, enl=0) 74 | 75 | if dep3d_.shape[0] > numbs: 76 | choice = np.random.choice(dep3d_.shape[0], numbs, replace=False) 77 | 78 | dep3d = dep3d_[choice, :] 79 | else: 80 | choice = np.random.choice(dep3d_.shape[0], numbs, replace=True) 81 | dep3d = dep3d_[choice, :] 82 | 83 | dep3d = dep3d[np.where(dep3d[:, 2] != 0.0)] 84 | 85 | 86 | threshold = 12 87 | 88 | ids = get_dis_all(VIS, dep3d[:, 0:3], dd=threshold) ## find the object points 89 | 90 | if len(ids) <= 10: 91 | if os.path.exists(filename): 92 | os.remove(filename) 93 | if os.path.exists(w_namei): 94 | os.remove(w_namei) 95 | 96 | if len(ids) > 10: 97 | 98 | np.savetxt(filename, dep3d, fmt='%f', delimiter=' ') 99 | lab = np.zeros((dep3d.shape[0], 1), dtype=np.uint) 100 | lab[ids, :] = 1 101 | np.savetxt(w_namei, lab, fmt='%d') 102 | 103 | 104 | 105 | 106 | def get_point_wise_lab(basepath, fold, renderer, sp): 107 | base_path = basepath + '%d/' % (fold) 108 | 109 | 110 | depths = getFiles_cate(base_path, '_depth', 4, -4) 111 | 112 | labels = getFiles_cate(base_path, '_label2', 4, -4) 113 | 114 | 115 | L_dep = depths 116 | 117 | Ki = np.array([[591.0125, 0, 322.525], [0, 590.16775, 244.11084], [0, 0, 1]]) 118 | 119 | Lidx = 1000 120 | if fold == 1: 121 | s = 0 122 | else: 123 | s = 0 124 | for i in range(s, len(L_dep)): 125 | 126 | lab = pickle.load(open(labels[i], 'rb')) 127 | 128 | depth = cv2.imread(L_dep[i], -1) 129 | img_id = int(L_dep[i][-14:-10]) 130 | for ii in range(len(lab['class_ids'])): 131 | 132 | 133 | obj = lab['model_list'][ii] 134 | 135 | seg = lab['bboxes'][ii].reshape((1, 4)) ## y1 x1 y2 x2 (ori x1,y1,w h) 136 | 137 | idx = (fold - 1) * Lidx + img_id 138 | 139 | R = lab['rotations'][ii] # .reshape((3, 3)) 140 | 141 | T = lab['translations'][ii].reshape((3, 1)) # -np.array([0,0,100]).reshape((3, 1)) 142 | 143 | 144 | if T[2] < 0: 145 | T[2] = -T[2] 146 | vis_part = renderer.render_object(obj, R, T, Ki[0, 0], Ki[1, 1], Ki[0, 2], Ki[1, 2])['depth'] 147 | 148 | bbx = [seg[0, 0], seg[0, 2], seg[0, 1], seg[0, 3]] 149 | 150 | if vis_part.max() > 0: 151 | get_one(depth, bbx, vis_part, Ki, idx, obj, sp) 152 | 153 | 154 | 155 | 156 | if __name__ == '__main__': 157 | path = 'your own object model path ' 158 | render_pre(path) 159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /prepare_data/imgs/3140-teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/3140-teaser.gif -------------------------------------------------------------------------------- /prepare_data/imgs/3DGC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/3DGC.png -------------------------------------------------------------------------------- /prepare_data/imgs/B.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/B.gif -------------------------------------------------------------------------------- /prepare_data/imgs/M.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/M.gif -------------------------------------------------------------------------------- /prepare_data/imgs/XZ.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/XZ.gif -------------------------------------------------------------------------------- /prepare_data/imgs/Y.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/Y.gif -------------------------------------------------------------------------------- /prepare_data/imgs/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/arch.png -------------------------------------------------------------------------------- /prepare_data/imgs/lap_green.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/lap_green.gif -------------------------------------------------------------------------------- /prepare_data/imgs/lap_red.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/prepare_data/imgs/lap_red.gif -------------------------------------------------------------------------------- /prepare_data/misc.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | import transform 5 | import os 6 | import sys 7 | import datetime 8 | # import pytz 9 | import math 10 | import subprocess 11 | import numpy as np 12 | from scipy.spatial import distance 13 | # from scipy.spatial import distance 14 | import pytz 15 | 16 | 17 | def log(s): 18 | """A logging function. 19 | 20 | :param s: String to print (with the current date and time). 21 | """ 22 | # Use UTC time for logging. 23 | utc_now = pytz.utc.localize(datetime.datetime.utcnow()) 24 | # pst_now = utc_now.astimezone(pytz.timezone("America/Los_Angeles")) 25 | utc_now_str = '{}/{}|{:02d}:{:02d}:{:02d}'.format( 26 | utc_now.month, utc_now.day, utc_now.hour, utc_now.minute, utc_now.second) 27 | 28 | # sys.stdout.write('{}: {}\n'.format(time.strftime('%m/%d|%H:%M:%S'), s)) 29 | sys.stdout.write('{}: {}\n'.format(utc_now_str, s)) 30 | sys.stdout.flush() 31 | 32 | 33 | def ensure_dir(path): 34 | """Ensures that the specified directory exists. 35 | 36 | :param path: Path to the directory. 37 | """ 38 | if not os.path.exists(path): 39 | os.makedirs(path) 40 | 41 | 42 | def get_symmetry_transformations(model_info, max_sym_disc_step): 43 | """Returns a set of symmetry transformations for an object model. 44 | 45 | :param model_info: See files models_info.json provided with the datasets. 46 | :param max_sym_disc_step: The maximum fraction of the object diameter which 47 | the vertex that is the furthest from the axis of continuous rotational 48 | symmetry travels between consecutive discretized rotations. 49 | :return: The set of symmetry transformations. 50 | """ 51 | # Discrete symmetries. 52 | trans_disc = [{'R': np.eye(3), 't': np.array([[0, 0, 0]]).T}] # Identity. 53 | if 'symmetries_discrete' in model_info: 54 | for sym in model_info['symmetries_discrete']: 55 | sym_4x4 = np.reshape(sym, (4, 4)) 56 | R = sym_4x4[:3, :3] 57 | t = sym_4x4[:3, 3].reshape((3, 1)) 58 | trans_disc.append({'R': R, 't': t}) 59 | 60 | # Discretized continuous symmetries. 61 | trans_cont = [] 62 | if 'symmetries_continuous' in model_info: 63 | for sym in model_info['symmetries_continuous']: 64 | axis = np.array(sym['axis']) 65 | offset = np.array(sym['offset']).reshape((3, 1)) 66 | 67 | # (PI * diam.) / (max_sym_disc_step * diam.) = discrete_steps_count 68 | discrete_steps_count = int(np.ceil(np.pi / max_sym_disc_step)) 69 | 70 | # Discrete step in radians. 71 | discrete_step = 2.0 * np.pi / discrete_steps_count 72 | 73 | for i in range(1, discrete_steps_count): 74 | R = transform.rotation_matrix(i * discrete_step, axis)[:3, :3] 75 | t = -R.dot(offset) + offset 76 | trans_cont.append({'R': R, 't': t}) 77 | 78 | # Combine the discrete and the discretized continuous symmetries. 79 | trans = [] 80 | for tran_disc in trans_disc: 81 | if len(trans_cont): 82 | for tran_cont in trans_cont: 83 | R = tran_cont['R'].dot(tran_disc['R']) 84 | t = tran_cont['R'].dot(tran_disc['t']) + tran_cont['t'] 85 | trans.append({'R': R, 't': t}) 86 | else: 87 | trans.append(tran_disc) 88 | 89 | return trans 90 | 91 | 92 | def project_pts(pts, K, R, t): 93 | """Projects 3D points. 94 | 95 | :param pts: nx3 ndarray with the 3D points. 96 | :param K: 3x3 ndarray with an intrinsic camera matrix. 97 | :param R: 3x3 ndarray with a rotation matrix. 98 | :param t: 3x1 ndarray with a translation vector. 99 | :return: nx2 ndarray with 2D image coordinates of the projections. 100 | """ 101 | assert (pts.shape[1] == 3) 102 | P = K.dot(np.hstack((R, t))) 103 | pts_h = np.hstack((pts, np.ones((pts.shape[0], 1)))) 104 | pts_im = P.dot(pts_h.T) 105 | pts_im /= pts_im[2, :] 106 | return pts_im[:2, :].T 107 | 108 | 109 | class Precomputer(object): 110 | """ Caches pre_Xs, pre_Ys for a 30% speedup of depth_im_to_dist_im() 111 | """ 112 | xs, ys = None, None 113 | pre_Xs, pre_Ys = None, None 114 | depth_im_shape = None 115 | K = None 116 | 117 | @staticmethod 118 | def precompute_lazy(depth_im, K): 119 | """ Lazy precomputation for depth_im_to_dist_im() if depth_im.shape or K changes 120 | 121 | :param depth_im: hxw ndarray with the input depth image, where depth_im[y, x] 122 | is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y], 123 | or 0 if there is no such 3D point (this is a typical output of the 124 | Kinect-like sensors). 125 | :param K: 3x3 ndarray with an intrinsic camera matrix. 126 | :return: hxw ndarray (Xs/depth_im, Ys/depth_im) 127 | """ 128 | if depth_im.shape != Precomputer.depth_im_shape: 129 | Precomputer.depth_im_shape = depth_im.shape 130 | Precomputer.xs, Precomputer.ys = np.meshgrid( 131 | np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0])) 132 | 133 | if depth_im.shape != Precomputer.depth_im_shape \ 134 | or not np.all(K == Precomputer.K): 135 | Precomputer.K = K 136 | Precomputer.pre_Xs = (Precomputer.xs - K[0, 2]) / np.float64(K[0, 0]) 137 | Precomputer.pre_Ys = (Precomputer.ys - K[1, 2]) / np.float64(K[1, 1]) 138 | 139 | return Precomputer.pre_Xs, Precomputer.pre_Ys 140 | 141 | 142 | def depth_im_to_dist_im_fast(depth_im, K): 143 | """Converts a depth image to a distance image. 144 | 145 | :param depth_im: hxw ndarray with the input depth image, where depth_im[y, x] 146 | is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y], 147 | or 0 if there is no such 3D point (this is a typical output of the 148 | Kinect-like sensors). 149 | :param K: 3x3 ndarray with an intrinsic camera matrix. 150 | :return: hxw ndarray with the distance image, where dist_im[y, x] is the 151 | distance from the camera center to the 3D point [X, Y, Z] that projects to 152 | pixel [x, y], or 0 if there is no such 3D point. 153 | """ 154 | # Only recomputed if depth_im.shape or K changes. 155 | pre_Xs, pre_Ys = Precomputer.precompute_lazy(depth_im, K) 156 | 157 | dist_im = np.sqrt( 158 | np.multiply(pre_Xs, depth_im) ** 2 + 159 | np.multiply(pre_Ys, depth_im) ** 2 + 160 | depth_im.astype(np.float64) ** 2) 161 | 162 | return dist_im 163 | 164 | 165 | def depth_im_to_dist_im(depth_im, K): 166 | """Converts a depth image to a distance image. 167 | :param depth_im: hxw ndarray with the input depth image, where depth_im[y, x] 168 | is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y], 169 | or 0 if there is no such 3D point (this is a typical output of the 170 | Kinect-like sensors). 171 | :param K: 3x3 ndarray with an intrinsic camera matrix. 172 | :return: hxw ndarray with the distance image, where dist_im[y, x] is the 173 | distance from the camera center to the 3D point [X, Y, Z] that projects to 174 | pixel [x, y], or 0 if there is no such 3D point. 175 | """ 176 | xs, ys = np.meshgrid( 177 | np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0])) 178 | 179 | Xs = np.multiply(xs - K[0, 2], depth_im) * (1.0 / K[0, 0]) 180 | Ys = np.multiply(ys - K[1, 2], depth_im) * (1.0 / K[1, 1]) 181 | 182 | dist_im = np.sqrt( 183 | Xs.astype(np.float64) ** 2 + 184 | Ys.astype(np.float64) ** 2 + 185 | depth_im.astype(np.float64) ** 2) 186 | # dist_im = np.linalg.norm(np.dstack((Xs, Ys, depth_im)), axis=2) # Slower. 187 | 188 | return dist_im 189 | 190 | 191 | def clip_pt_to_im(pt, im_size): 192 | """Clips a 2D point to the image frame. 193 | 194 | :param pt: 2D point (x, y). 195 | :param im_size: Image size (width, height). 196 | :return: Clipped 2D point (x, y). 197 | """ 198 | return [min(max(pt[0], 0), im_size[0] - 1), 199 | min(max(pt[1], 0), im_size[1] - 1)] 200 | 201 | 202 | def calc_2d_bbox(xs, ys, im_size=None, clip=False): 203 | """Calculates 2D bounding box of the given set of 2D points. 204 | 205 | :param xs: 1D ndarray with x-coordinates of 2D points. 206 | :param ys: 1D ndarray with y-coordinates of 2D points. 207 | :param im_size: Image size (width, height) (used for optional clipping). 208 | :param clip: Whether to clip the bounding box (default == False). 209 | :return: 2D bounding box (x, y, w, h), where (x, y) is the top-left corner 210 | and (w, h) is width and height of the bounding box. 211 | """ 212 | bb_min = [xs.min(), ys.min()] 213 | bb_max = [xs.max(), ys.max()] 214 | if clip: 215 | assert (im_size is not None) 216 | bb_min = clip_pt_to_im(bb_min, im_size) 217 | bb_max = clip_pt_to_im(bb_max, im_size) 218 | return [bb_min[0], bb_min[1], bb_max[0] - bb_min[0], bb_max[1] - bb_min[1]] 219 | 220 | 221 | def calc_3d_bbox(xs, ys, zs): 222 | """Calculates 3D bounding box of the given set of 3D points. 223 | 224 | :param xs: 1D ndarray with x-coordinates of 3D points. 225 | :param ys: 1D ndarray with y-coordinates of 3D points. 226 | :param zs: 1D ndarray with z-coordinates of 3D points. 227 | :return: 3D bounding box (x, y, z, w, h, d), where (x, y, z) is the top-left 228 | corner and (w, h, d) is width, height and depth of the bounding box. 229 | """ 230 | bb_min = [xs.min(), ys.min(), zs.min()] 231 | bb_max = [xs.max(), ys.max(), zs.max()] 232 | return [bb_min[0], bb_min[1], bb_min[2], 233 | bb_max[0] - bb_min[0], bb_max[1] - bb_min[1], bb_max[2] - bb_min[2]] 234 | 235 | 236 | def iou(bb_a, bb_b): 237 | """Calculates the Intersection over Union (IoU) of two 2D bounding boxes. 238 | 239 | :param bb_a: 2D bounding box (x1, y1, w1, h1) -- see calc_2d_bbox. 240 | :param bb_b: 2D bounding box (x2, y2, w2, h2) -- see calc_2d_bbox. 241 | :return: The IoU value. 242 | """ 243 | # [x1, y1, width, height] --> [x1, y1, x2, y2] 244 | tl_a, br_a = (bb_a[0], bb_a[1]), (bb_a[0] + bb_a[2], bb_a[1] + bb_a[3]) 245 | tl_b, br_b = (bb_b[0], bb_b[1]), (bb_b[0] + bb_b[2], bb_b[1] + bb_b[3]) 246 | 247 | # Intersection rectangle. 248 | tl_inter = max(tl_a[0], tl_b[0]), max(tl_a[1], tl_b[1]) 249 | br_inter = min(br_a[0], br_b[0]), min(br_a[1], br_b[1]) 250 | 251 | # Width and height of the intersection rectangle. 252 | w_inter = br_inter[0] - tl_inter[0] 253 | h_inter = br_inter[1] - tl_inter[1] 254 | 255 | if w_inter > 0 and h_inter > 0: 256 | area_inter = w_inter * h_inter 257 | area_a = bb_a[2] * bb_a[3] 258 | area_b = bb_b[2] * bb_b[3] 259 | iou = area_inter / float(area_a + area_b - area_inter) 260 | else: 261 | iou = 0.0 262 | 263 | return iou 264 | 265 | 266 | def transform_pts_Rt(pts, R, t): 267 | """Applies a rigid transformation to 3D points. 268 | 269 | :param pts: nx3 ndarray with 3D points. 270 | :param R: 3x3 ndarray with a rotation matrix. 271 | :param t: 3x1 ndarray with a translation vector. 272 | :return: nx3 ndarray with transformed 3D points. 273 | """ 274 | assert (pts.shape[1] == 3) 275 | pts_t = R.dot(pts.T) + t.reshape((3, 1)) 276 | return pts_t.T 277 | 278 | 279 | def calc_pts_diameter(pts): 280 | """Calculates the diameter of a set of 3D points (i.e. the maximum distance 281 | between any two points in the set). 282 | 283 | :param pts: nx3 ndarray with 3D points. 284 | :return: The calculated diameter. 285 | """ 286 | diameter = -1.0 287 | for pt_id in range(pts.shape[0]): 288 | pt_dup = np.tile(np.array([pts[pt_id, :]]), [pts.shape[0] - pt_id, 1]) 289 | pts_diff = pt_dup - pts[pt_id:, :] 290 | max_dist = math.sqrt((pts_diff * pts_diff).sum(axis=1).max()) 291 | if max_dist > diameter: 292 | diameter = max_dist 293 | return diameter 294 | 295 | 296 | def calc_pts_diameter2(pts): 297 | """Calculates the diameter of a set of 3D points (i.e. the maximum distance 298 | between any two points in the set). Faster but requires more memory than 299 | calc_pts_diameter. 300 | 301 | :param pts: nx3 ndarray with 3D points. 302 | :return: The calculated diameter. 303 | """ 304 | dists = distance.cdist(pts, pts, 'euclidean') 305 | diameter = np.max(dists) 306 | return diameter 307 | 308 | 309 | def overlapping_sphere_projections(radius, p1, p2): 310 | """Checks if projections of two spheres overlap (approximated). 311 | 312 | :param radius: Radius of the two spheres. 313 | :param p1: [X1, Y1, Z1] center of the first sphere. 314 | :param p2: [X2, Y2, Z2] center of the second sphere. 315 | :return: True if the projections of the two spheres overlap. 316 | """ 317 | if p1[2] == 0 or p2[2] == 0: 318 | return False 319 | 320 | # 2D projections of centers of the spheres. 321 | proj1 = (p1 / p1[2])[:2] 322 | proj2 = (p2 / p2[2])[:2] 323 | 324 | # Distance between the center projections. 325 | proj_dist = np.linalg.norm(proj1 - proj2) 326 | 327 | # The max. distance of the center projections at which the sphere projections, 328 | # i.e. sphere silhouettes, still overlap (approximated). 329 | proj_dist_thresh = radius * (1.0 / p1[2] + 1.0 / p2[2]) 330 | 331 | return proj_dist < proj_dist_thresh 332 | 333 | 334 | def get_error_signature(error_type, n_top, **kwargs): 335 | """Generates a signature for the specified settings of pose error calculation. 336 | 337 | :param error_type: Type of error. 338 | :param n_top: Top N pose estimates (with the highest score) to be evaluated 339 | for each object class in each image. 340 | :return: Generated signature. 341 | """ 342 | error_sign = 'error=' + error_type + '_ntop=' + str(n_top) 343 | if error_type == 'vsd': 344 | if kwargs['vsd_tau'] == float('inf'): 345 | vsd_tau_str = 'inf' 346 | else: 347 | vsd_tau_str = '{:.3f}'.format(kwargs['vsd_tau']) 348 | error_sign += '_delta={:.3f}_tau={}'.format( 349 | kwargs['vsd_delta'], vsd_tau_str) 350 | return error_sign 351 | 352 | 353 | def get_score_signature(correct_th, visib_gt_min): 354 | """Generates a signature for a performance score. 355 | 356 | :param visib_gt_min: Minimum visible surface fraction of a valid GT pose. 357 | :return: Generated signature. 358 | """ 359 | eval_sign = 'th=' + '-'.join(['{:.3f}'.format(t) for t in correct_th]) 360 | eval_sign += '_min-visib={:.3f}'.format(visib_gt_min) 361 | return eval_sign 362 | 363 | 364 | def run_meshlab_script(meshlab_server_path, meshlab_script_path, model_in_path, 365 | model_out_path, attrs_to_save): 366 | """Runs a MeshLab script on a 3D model. 367 | 368 | meshlabserver depends on X server. To remove this dependence (on linux), run: 369 | 1) Xvfb :100 & 370 | 2) export DISPLAY=:100.0 371 | 3) meshlabserver 372 | 373 | :param meshlab_server_path: Path to meshlabserver.exe. 374 | :param meshlab_script_path: Path to an MLX MeshLab script. 375 | :param model_in_path: Path to the input 3D model saved in the PLY format. 376 | :param model_out_path: Path to the output 3D model saved in the PLY format. 377 | :param attrs_to_save: Attributes to save: 378 | - vc -> vertex colors 379 | - vf -> vertex flags 380 | - vq -> vertex quality 381 | - vn -> vertex normals 382 | - vt -> vertex texture coords 383 | - fc -> face colors 384 | - ff -> face flags 385 | - fq -> face quality 386 | - fn -> face normals 387 | - wc -> wedge colors 388 | - wn -> wedge normals 389 | - wt -> wedge texture coords 390 | """ 391 | meshlabserver_cmd = [meshlab_server_path, '-s', meshlab_script_path, '-i', 392 | model_in_path, '-o', model_out_path] 393 | 394 | if len(attrs_to_save): 395 | meshlabserver_cmd += ['-m'] + attrs_to_save 396 | 397 | log(' '.join(meshlabserver_cmd)) 398 | if subprocess.call(meshlabserver_cmd) != 0: 399 | exit(-1) 400 | -------------------------------------------------------------------------------- /prepare_data/renderer.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | """Abstract class of a renderer and a factory function to create a renderer. 5 | 6 | The renderer produces an RGB/depth image of a 3D mesh model in a specified pose 7 | for given camera parameters and illumination settings. 8 | """ 9 | 10 | 11 | class Renderer(object): 12 | """Abstract class of a renderer.""" 13 | 14 | def __init__(self, width, height): 15 | """Constructor. 16 | 17 | :param width: Width of the rendered image. 18 | :param height: Height of the rendered image. 19 | """ 20 | self.width = width 21 | self.height = height 22 | 23 | # 3D location of a point light (in the camera coordinates). 24 | self.light_cam_pos = (0, 0, 0) 25 | 26 | # Set light color and weights. 27 | self.light_color = (1.0, 1.0, 1.0) # Used only in C++ renderer. 28 | self.light_ambient_weight = 0.5 29 | self.light_diffuse_weight = 1.0 # Used only in C++ renderer. 30 | self.light_specular_weight = 0.0 # Used only in C++ renderer. 31 | self.light_specular_shininess = 0.0 # Used only in C++ renderer. 32 | 33 | def set_light_cam_pos(self, light_cam_pos): 34 | """Sets the 3D location of a point light. 35 | 36 | :param light_cam_pos: [X, Y, Z]. 37 | """ 38 | self.light_cam_pos = light_cam_pos 39 | 40 | def set_light_ambient_weight(self, light_ambient_weight): 41 | """Sets weight of the ambient light. 42 | 43 | :param light_ambient_weight: Scalar from 0 to 1. 44 | """ 45 | self.light_ambient_weight = light_ambient_weight 46 | 47 | def add_object(self, obj_id, model_path, **kwargs): 48 | """Loads an object model. 49 | 50 | :param obj_id: Object identifier. 51 | :param model_path: Path to the object model file. 52 | """ 53 | raise NotImplementedError 54 | 55 | def remove_object(self, obj_id): 56 | """Removes an object model. 57 | 58 | :param obj_id: Identifier of the object to remove. 59 | """ 60 | raise NotImplementedError 61 | 62 | def render_object(self, obj_id, R, t, fx, fy, cx, cy): 63 | """Renders an object model in the specified pose. 64 | 65 | :param obj_id: Object identifier. 66 | :param R: 3x3 ndarray with a rotation matrix. 67 | :param t: 3x1 ndarray with a translation vector. 68 | :param fx: Focal length (X axis). 69 | :param fy: Focal length (Y axis). 70 | :param cx: The X coordinate of the principal point. 71 | :param cy: The Y coordinate of the principal point. 72 | :return: Returns a dictionary with rendered images. 73 | """ 74 | raise NotImplementedError 75 | 76 | 77 | def create_renderer(width, height, renderer_type='cpp', mode='rgb+depth', 78 | shading='phong', bg_color=(0.0, 0.0, 0.0, 0.0)): 79 | """A factory to create a renderer. 80 | 81 | Note: Parameters mode, shading and bg_color are currently supported only by 82 | the Python renderer (renderer_type='python'). 83 | 84 | :param width: Width of the rendered image. 85 | :param height: Height of the rendered image. 86 | :param renderer_type: Type of renderer (options: 'cpp', 'python'). 87 | :param mode: Rendering mode ('rgb+depth', 'rgb', 'depth'). 88 | :param shading: Type of shading ('flat', 'phong'). 89 | :param bg_color: Color of the background (R, G, B, A). 90 | :return: Instance of a renderer of the specified type. 91 | """ 92 | if renderer_type == 'python': 93 | from . import renderer_py 94 | return renderer_py.RendererPython(width, height, mode, shading, bg_color) 95 | 96 | elif renderer_type == 'cpp': 97 | from . import renderer_cpp 98 | return renderer_cpp.RendererCpp(width, height) 99 | 100 | else: 101 | raise ValueError('Unknown renderer type.') 102 | -------------------------------------------------------------------------------- /pyTorchChamferDistance/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyTorchChamferDistance/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/pyTorchChamferDistance/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /pyTorchChamferDistance/__pycache__/chamfer_distance.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/pyTorchChamferDistance/__pycache__/chamfer_distance.cpython-36.pyc -------------------------------------------------------------------------------- /pyTorchChamferDistance/chamfer_distance.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // CUDA forward declarations 4 | int ChamferDistanceKernelLauncher( 5 | const int b, const int n, 6 | const float* xyz, 7 | const int m, 8 | const float* xyz2, 9 | float* result, 10 | int* result_i, 11 | float* result2, 12 | int* result2_i); 13 | 14 | int ChamferDistanceGradKernelLauncher( 15 | const int b, const int n, 16 | const float* xyz1, 17 | const int m, 18 | const float* xyz2, 19 | const float* grad_dist1, 20 | const int* idx1, 21 | const float* grad_dist2, 22 | const int* idx2, 23 | float* grad_xyz1, 24 | float* grad_xyz2); 25 | 26 | 27 | void chamfer_distance_forward_cuda( 28 | const at::Tensor xyz1, 29 | const at::Tensor xyz2, 30 | const at::Tensor dist1, 31 | const at::Tensor dist2, 32 | const at::Tensor idx1, 33 | const at::Tensor idx2) 34 | { 35 | ChamferDistanceKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data(), 36 | xyz2.size(1), xyz2.data(), 37 | dist1.data(), idx1.data(), 38 | dist2.data(), idx2.data()); 39 | } 40 | 41 | void chamfer_distance_backward_cuda( 42 | const at::Tensor xyz1, 43 | const at::Tensor xyz2, 44 | at::Tensor gradxyz1, 45 | at::Tensor gradxyz2, 46 | at::Tensor graddist1, 47 | at::Tensor graddist2, 48 | at::Tensor idx1, 49 | at::Tensor idx2) 50 | { 51 | ChamferDistanceGradKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data(), 52 | xyz2.size(1), xyz2.data(), 53 | graddist1.data(), idx1.data(), 54 | graddist2.data(), idx2.data(), 55 | gradxyz1.data(), gradxyz2.data()); 56 | } 57 | 58 | 59 | void nnsearch( 60 | const int b, const int n, const int m, 61 | const float* xyz1, 62 | const float* xyz2, 63 | float* dist, 64 | int* idx) 65 | { 66 | for (int i = 0; i < b; i++) { 67 | for (int j = 0; j < n; j++) { 68 | const float x1 = xyz1[(i*n+j)*3+0]; 69 | const float y1 = xyz1[(i*n+j)*3+1]; 70 | const float z1 = xyz1[(i*n+j)*3+2]; 71 | double best = 0; 72 | int besti = 0; 73 | for (int k = 0; k < m; k++) { 74 | const float x2 = xyz2[(i*m+k)*3+0] - x1; 75 | const float y2 = xyz2[(i*m+k)*3+1] - y1; 76 | const float z2 = xyz2[(i*m+k)*3+2] - z1; 77 | const double d=x2*x2+y2*y2+z2*z2; 78 | if (k==0 || d < best){ 79 | best = d; 80 | besti = k; 81 | } 82 | } 83 | dist[i*n+j] = best; 84 | idx[i*n+j] = besti; 85 | } 86 | } 87 | } 88 | 89 | 90 | void chamfer_distance_forward( 91 | const at::Tensor xyz1, 92 | const at::Tensor xyz2, 93 | const at::Tensor dist1, 94 | const at::Tensor dist2, 95 | const at::Tensor idx1, 96 | const at::Tensor idx2) 97 | { 98 | const int batchsize = xyz1.size(0); 99 | const int n = xyz1.size(1); 100 | const int m = xyz2.size(1); 101 | 102 | const float* xyz1_data = xyz1.data(); 103 | const float* xyz2_data = xyz2.data(); 104 | float* dist1_data = dist1.data(); 105 | float* dist2_data = dist2.data(); 106 | int* idx1_data = idx1.data(); 107 | int* idx2_data = idx2.data(); 108 | 109 | nnsearch(batchsize, n, m, xyz1_data, xyz2_data, dist1_data, idx1_data); 110 | nnsearch(batchsize, m, n, xyz2_data, xyz1_data, dist2_data, idx2_data); 111 | } 112 | 113 | 114 | void chamfer_distance_backward( 115 | const at::Tensor xyz1, 116 | const at::Tensor xyz2, 117 | at::Tensor gradxyz1, 118 | at::Tensor gradxyz2, 119 | at::Tensor graddist1, 120 | at::Tensor graddist2, 121 | at::Tensor idx1, 122 | at::Tensor idx2) 123 | { 124 | const int b = xyz1.size(0); 125 | const int n = xyz1.size(1); 126 | const int m = xyz2.size(1); 127 | 128 | const float* xyz1_data = xyz1.data(); 129 | const float* xyz2_data = xyz2.data(); 130 | float* gradxyz1_data = gradxyz1.data(); 131 | float* gradxyz2_data = gradxyz2.data(); 132 | float* graddist1_data = graddist1.data(); 133 | float* graddist2_data = graddist2.data(); 134 | const int* idx1_data = idx1.data(); 135 | const int* idx2_data = idx2.data(); 136 | 137 | for (int i = 0; i < b*n*3; i++) 138 | gradxyz1_data[i] = 0; 139 | for (int i = 0; i < b*m*3; i++) 140 | gradxyz2_data[i] = 0; 141 | for (int i = 0;i < b; i++) { 142 | for (int j = 0; j < n; j++) { 143 | const float x1 = xyz1_data[(i*n+j)*3+0]; 144 | const float y1 = xyz1_data[(i*n+j)*3+1]; 145 | const float z1 = xyz1_data[(i*n+j)*3+2]; 146 | const int j2 = idx1_data[i*n+j]; 147 | 148 | const float x2 = xyz2_data[(i*m+j2)*3+0]; 149 | const float y2 = xyz2_data[(i*m+j2)*3+1]; 150 | const float z2 = xyz2_data[(i*m+j2)*3+2]; 151 | const float g = graddist1_data[i*n+j]*2; 152 | 153 | gradxyz1_data[(i*n+j)*3+0] += g*(x1-x2); 154 | gradxyz1_data[(i*n+j)*3+1] += g*(y1-y2); 155 | gradxyz1_data[(i*n+j)*3+2] += g*(z1-z2); 156 | gradxyz2_data[(i*m+j2)*3+0] -= (g*(x1-x2)); 157 | gradxyz2_data[(i*m+j2)*3+1] -= (g*(y1-y2)); 158 | gradxyz2_data[(i*m+j2)*3+2] -= (g*(z1-z2)); 159 | } 160 | for (int j = 0; j < m; j++) { 161 | const float x1 = xyz2_data[(i*m+j)*3+0]; 162 | const float y1 = xyz2_data[(i*m+j)*3+1]; 163 | const float z1 = xyz2_data[(i*m+j)*3+2]; 164 | const int j2 = idx2_data[i*m+j]; 165 | const float x2 = xyz1_data[(i*n+j2)*3+0]; 166 | const float y2 = xyz1_data[(i*n+j2)*3+1]; 167 | const float z2 = xyz1_data[(i*n+j2)*3+2]; 168 | const float g = graddist2_data[i*m+j]*2; 169 | gradxyz2_data[(i*m+j)*3+0] += g*(x1-x2); 170 | gradxyz2_data[(i*m+j)*3+1] += g*(y1-y2); 171 | gradxyz2_data[(i*m+j)*3+2] += g*(z1-z2); 172 | gradxyz1_data[(i*n+j2)*3+0] -= (g*(x1-x2)); 173 | gradxyz1_data[(i*n+j2)*3+1] -= (g*(y1-y2)); 174 | gradxyz1_data[(i*n+j2)*3+2] -= (g*(z1-z2)); 175 | } 176 | } 177 | } 178 | 179 | 180 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 181 | m.def("forward", &chamfer_distance_forward, "ChamferDistance forward"); 182 | m.def("forward_cuda", &chamfer_distance_forward_cuda, "ChamferDistance forward (CUDA)"); 183 | m.def("backward", &chamfer_distance_backward, "ChamferDistance backward"); 184 | m.def("backward_cuda", &chamfer_distance_backward_cuda, "ChamferDistance backward (CUDA)"); 185 | } 186 | -------------------------------------------------------------------------------- /pyTorchChamferDistance/chamfer_distance.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | __global__ 7 | void ChamferDistanceKernel( 8 | int b, 9 | int n, 10 | const float* xyz, 11 | int m, 12 | const float* xyz2, 13 | float* result, 14 | int* result_i) 15 | { 16 | const int batch=512; 17 | __shared__ float buf[batch*3]; 18 | for (int i=blockIdx.x;ibest){ 130 | result[(i*n+j)]=best; 131 | result_i[(i*n+j)]=best_i; 132 | } 133 | } 134 | __syncthreads(); 135 | } 136 | } 137 | } 138 | 139 | void ChamferDistanceKernelLauncher( 140 | const int b, const int n, 141 | const float* xyz, 142 | const int m, 143 | const float* xyz2, 144 | float* result, 145 | int* result_i, 146 | float* result2, 147 | int* result2_i) 148 | { 149 | ChamferDistanceKernel<<>>(b, n, xyz, m, xyz2, result, result_i); 150 | ChamferDistanceKernel<<>>(b, m, xyz2, n, xyz, result2, result2_i); 151 | 152 | cudaError_t err = cudaGetLastError(); 153 | if (err != cudaSuccess) 154 | printf("error in chamfer distance updateOutput: %s\n", cudaGetErrorString(err)); 155 | } 156 | 157 | 158 | __global__ 159 | void ChamferDistanceGradKernel( 160 | int b, int n, 161 | const float* xyz1, 162 | int m, 163 | const float* xyz2, 164 | const float* grad_dist1, 165 | const int* idx1, 166 | float* grad_xyz1, 167 | float* grad_xyz2) 168 | { 169 | for (int i = blockIdx.x; i>>(b, n, xyz1, m, xyz2, grad_dist1, idx1, grad_xyz1, grad_xyz2); 204 | ChamferDistanceGradKernel<<>>(b, m, xyz2, n, xyz1, grad_dist2, idx2, grad_xyz2, grad_xyz1); 205 | 206 | cudaError_t err = cudaGetLastError(); 207 | if (err != cudaSuccess) 208 | printf("error in chamfer distance get grad: %s\n", cudaGetErrorString(err)); 209 | } 210 | -------------------------------------------------------------------------------- /pyTorchChamferDistance/chamfer_distance.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from torch.utils.cpp_extension import load 5 | import platform 6 | 7 | path = 'your own path' 8 | cd = load(name="cd", 9 | sources=[path+ "pyTorchChamferDistance/chamfer_distance.cpp", 10 | path + "pyTorchChamferDistance/chamfer_distance.cu"]) 11 | 12 | class ChamferDistanceFunction(torch.autograd.Function): 13 | @staticmethod 14 | def forward(ctx, xyz1, xyz2): 15 | batchsize, n, _ = xyz1.size() 16 | _, m, _ = xyz2.size() 17 | xyz1 = xyz1.contiguous() 18 | xyz2 = xyz2.contiguous() 19 | dist1 = torch.zeros(batchsize, n) 20 | dist2 = torch.zeros(batchsize, m) 21 | 22 | idx1 = torch.zeros(batchsize, n, dtype=torch.int) 23 | idx2 = torch.zeros(batchsize, m, dtype=torch.int) 24 | 25 | if not xyz1.is_cuda: 26 | cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2) 27 | else: 28 | dist1 = dist1.cuda() 29 | dist2 = dist2.cuda() 30 | idx1 = idx1.cuda() 31 | idx2 = idx2.cuda() 32 | cd.forward_cuda(xyz1, xyz2, dist1, dist2, idx1, idx2) 33 | 34 | ctx.save_for_backward(xyz1, xyz2, idx1, idx2) 35 | 36 | return dist1, dist2 37 | 38 | @staticmethod 39 | def backward(ctx, graddist1, graddist2): 40 | xyz1, xyz2, idx1, idx2 = ctx.saved_tensors 41 | 42 | graddist1 = graddist1.contiguous() 43 | graddist2 = graddist2.contiguous() 44 | 45 | gradxyz1 = torch.zeros(xyz1.size()) 46 | gradxyz2 = torch.zeros(xyz2.size()) 47 | 48 | if not graddist1.is_cuda: 49 | cd.backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2) 50 | else: 51 | gradxyz1 = gradxyz1.cuda() 52 | gradxyz2 = gradxyz2.cuda() 53 | cd.backward_cuda(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2) 54 | 55 | return gradxyz1, gradxyz2 56 | 57 | 58 | class ChamferDistance(torch.nn.Module): 59 | def forward(self, xyz1, xyz2): 60 | return ChamferDistanceFunction.apply(xyz1, xyz2) 61 | 62 | 63 | if __name__ == '__main__': 64 | 65 | 66 | chamfer_dist = ChamferDistance() 67 | a = torch.randn(1, 100, 3) 68 | b = torch.randn(1, 50, 5) 69 | dist1, dist2 = chamfer_dist(a, b) 70 | loss = (torch.mean(dist1)) + (torch.mean(dist2)) 71 | print(loss) 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.3.4 2 | nn-distance==0.0.0 3 | numpy==1.19.4 4 | opencv-contrib-python==4.5.2.52 5 | opencv-python==4.5.2.52 6 | torch==1.8.1 7 | torchvision==0.9.1 8 | -------------------------------------------------------------------------------- /yolov3_fsnet/detect_fsnet.py: -------------------------------------------------------------------------------- 1 | # @Time : 10/05/2021 2 | # @Author : Wei Chen 3 | # @Project : Pycharm 4 | import argparse 5 | import time 6 | from pathlib import Path 7 | import numpy as np 8 | import cv2 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | from numpy import random 12 | 13 | from yolov3_fsnet.models.experimental import attempt_load 14 | from yolov3_fsnet.utils.datasets import LoadStreams, LoadImages, LoadImages_fsnet 15 | from yolov3_fsnet.utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \ 16 | scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path 17 | from yolov3_fsnet.utils.plots import plot_one_box 18 | from yolov3_fsnet.utils.torch_utils import select_device, load_classifier, time_synchronized 19 | from uti_tool import getFiles_ab_cate, depth_2_mesh_bbx, load_ply 20 | from Net_deploy import load_models, FS_Net_Test 21 | from torch.utils.data import DataLoader 22 | 23 | def detect(opt,data_path, classifier_seg3D, classifier_ce, classifier_Rot_green, classifier_Rot_red, 24 | model_size, cate_id0): 25 | source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size 26 | save_img = not opt.nosave and not source.endswith('.txt') # save inference images 27 | webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( 28 | ('rtsp://', 'rtmp://', 'http://', 'https://')) 29 | 30 | # Directories 31 | save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run 32 | (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir 33 | 34 | # Initialize 35 | set_logging() 36 | device = select_device(opt.device) 37 | half = device.type != 'cpu' # half precision only supported on CUDA 38 | 39 | # Load model 40 | model = attempt_load(weights, map_location=device) # load FP32 model 41 | stride = int(model.stride.max()) # model stride 42 | imgsz = check_img_size(imgsz, s=stride) # check img_size 43 | if half: 44 | model.half() # to FP16 45 | 46 | 47 | # Set Dataloader 48 | dataset = LoadImages_fsnet(data_path, img_size=imgsz, stride=stride) 49 | 50 | dataloader = DataLoader(dataset, batch_size=1, shuffle=False) 51 | # Get names and colors 52 | names = model.module.names if hasattr(model, 'module') else model.names 53 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] 54 | 55 | # Run inference 56 | if device.type != 'cpu': 57 | model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once 58 | 59 | for icc, data in enumerate(dataloader): 60 | path, img, im0s, depth_, Rt, Tt, pc =data 61 | 62 | img = img[0].to(device) 63 | img = img.half() if half else img.float() # uint8 to fp16/32 64 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 65 | if img.ndimension() == 3: 66 | img = img.unsqueeze(0) 67 | 68 | # Inference 69 | 70 | pred = model(img, augment=opt.augment)[0] 71 | 72 | # Apply NMS 73 | pred, cenxy = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, 74 | agnostic=opt.agnostic_nms) 75 | # pred2 = pred[0][(np.where(pred[0][:,-1].cpu()==63))] ##labtop 76 | K = np.array([[591.0125, 0, 322.525], [0, 590.16775, 244.11084], [0, 0, 1]]) 77 | DR = int(cenxy.cpu().numpy()[1]) 78 | DC = int(cenxy.cpu().numpy()[0]) 79 | depth = depth_[0].numpy() 80 | if depth[DR, DC] == 0: 81 | while depth[DR, DC] == 0: 82 | DR = min(max(0, DR + np.random.randint(-10, 10)), 480) 83 | DC = min(max(0, DC + np.random.randint(-10, 10)), 640) 84 | XC = [0, 0] 85 | XC[0] = np.float32(DC - K[0, 2]) * np.float32(depth[DR, DC] / K[0, 0]) 86 | XC[1] = np.float32(DR - K[1, 2]) * np.float32(depth[DR, DC] / K[1, 1]) 87 | cen_depth = np.zeros((1, 3)) 88 | cen_depth[0, 0:3] = [XC[0], XC[1], depth[DR, DC]] 89 | 90 | # Process detections 91 | for i, det in enumerate(pred): # detections per image 92 | 93 | p, s, im0 = path[0], '', im0s[0].numpy() 94 | mode = 'image' 95 | p = Path(p) # to Path 96 | 97 | s += '%gx%g ' % img.shape[2:] # print string 98 | 99 | if len(det): 100 | # Rescale boxes from img_size to im0 size 101 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 102 | 103 | # Write results 104 | for *xyxy, conf, cls in reversed(det): 105 | 106 | label = f'{names[int(cls)]} {conf:.2f}' 107 | plot_one_box(xyxy, im0, label='', color=colors[int(cls)], line_thickness=3) 108 | 109 | dep3d = depth_2_mesh_bbx(depth, [det[0][1],det[0][3],det[0][0],det[0][2]], K) 110 | dep3d = dep3d[np.where(dep3d[:, 2]>0.0)] 111 | # show_mulit_mesh([dep3d]) 112 | dep3d = chooselimt_test(dep3d, 400, cen_depth) ##3 *N 113 | choice = np.random.choice(len(dep3d), 1500, replace=True) 114 | dep3d = dep3d[choice, :] 115 | # 116 | 117 | FS_Net_Test(dep3d, pc[0].numpy(), im0, Rt, Tt, classifier_seg3D, classifier_ce, 118 | classifier_Rot_green, 119 | classifier_Rot_red, 120 | 'laptop', model_size, cate_id0, num_cor=3) 121 | 122 | 123 | 124 | print(icc) 125 | 126 | 127 | def chooselimt_test(pts0, dia, cen): ##replace the 3D sphere with 3D cube 128 | 129 | pts = pts0.copy() 130 | pts = pts[np.where(pts[:, 2] > 20)[0], :] 131 | ptsn = pts[np.where(np.abs(pts[:, 2] - cen[:, 2].min()) < dia)[0], :] 132 | if ptsn.shape[0] < 1000: 133 | ptsn = pts[np.where(np.abs(pts[:, 2] - cen[:, 2].min()) < dia * 2)[0], :] 134 | if ptsn.shape[0] < 500: 135 | ptsn = pts[np.where(np.abs(pts[:, 2] - cen[:, 2].min()) < dia * 3)[0], :] 136 | return ptsn 137 | 138 | 139 | 140 | if __name__ == '__main__': 141 | parser = argparse.ArgumentParser() 142 | parser.add_argument('--weights', nargs='+', type=str, default='yolov5l.pt', help='model.pt path(s)') 143 | parser.add_argument('--source', type=str, default='', help='source') # file/folder, 0 for webcam 144 | parser.add_argument('--img-size', type=int, default=320, help='inference size (pixels)') 145 | parser.add_argument('--conf-thres', type=float, default=0.6, help='object confidence threshold') 146 | parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') 147 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 148 | parser.add_argument('--view-img', action='store_true', help='display results') 149 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 150 | parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') 151 | parser.add_argument('--nosave', action='store_true', help='do not save images/videos') 152 | parser.add_argument('--classes',default=63, nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') 153 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 154 | parser.add_argument('--augment', action='store_true', help='augmented inference') 155 | parser.add_argument('--update', action='store_true', help='update all models') 156 | parser.add_argument('--project', default='runs/detect', help='save results to project/name') 157 | parser.add_argument('--name', default='exp', help='save results to project/name') 158 | parser.add_argument('--exist-ok',default='False', action='store_true', help='existing project/name ok, ' 159 | 'do not increment') 160 | opt = parser.parse_args() 161 | print(opt) 162 | 163 | cate = 'laptop' 164 | fold = 'FS_Net/yolov3_fsnet/data/test_scene_1/' ##should be absolute path 165 | 166 | classifier_seg3D, classifier_ce, classifier_Rot_green, classifier_Rot_red, model_size, cate_id0 = load_models( 167 | cate) 168 | with torch.no_grad(): 169 | 170 | 171 | detect(opt, fold ,classifier_seg3D, classifier_ce, classifier_Rot_green, 172 | classifier_Rot_red, model_size, cate_id0) 173 | -------------------------------------------------------------------------------- /yolov3_fsnet/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/models/__init__.py -------------------------------------------------------------------------------- /yolov3_fsnet/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/models/__pycache__/common.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/models/__pycache__/common.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/models/__pycache__/experimental.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/models/__pycache__/experimental.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/models/__pycache__/yolo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/models/__pycache__/yolo.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/models/common.py: -------------------------------------------------------------------------------- 1 | # YOLOv3 common modules 2 | 3 | import math 4 | from copy import copy 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import requests 10 | import torch 11 | import torch.nn as nn 12 | from PIL import Image 13 | # from torch.cuda import amp 14 | # from apex import amp 15 | 16 | 17 | from yolov3_fsnet.utils.datasets import letterbox 18 | from yolov3_fsnet.utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh 19 | from yolov3_fsnet.utils.plots import color_list, plot_one_box 20 | from yolov3_fsnet.utils.torch_utils import time_synchronized 21 | 22 | 23 | def autopad(k, p=None): # kernel, padding 24 | # Pad to 'same' 25 | if p is None: 26 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 27 | return p 28 | 29 | 30 | def DWConv(c1, c2, k=1, s=1, act=True): 31 | # Depthwise convolution 32 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 33 | 34 | 35 | class Conv(nn.Module): 36 | # Standard convolution 37 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 38 | super(Conv, self).__init__() 39 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 40 | self.bn = nn.BatchNorm2d(c2) 41 | self.act = nn.LeakyReLU(0.1) if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 42 | 43 | def forward(self, x): 44 | return self.act(self.bn(self.conv(x))) 45 | 46 | def fuseforward(self, x): 47 | return self.act(self.conv(x)) 48 | 49 | 50 | class TransformerLayer(nn.Module): 51 | # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) 52 | def __init__(self, c, num_heads): 53 | super().__init__() 54 | self.q = nn.Linear(c, c, bias=False) 55 | self.k = nn.Linear(c, c, bias=False) 56 | self.v = nn.Linear(c, c, bias=False) 57 | self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) 58 | self.fc1 = nn.Linear(c, c, bias=False) 59 | self.fc2 = nn.Linear(c, c, bias=False) 60 | 61 | def forward(self, x): 62 | x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x 63 | x = self.fc2(self.fc1(x)) + x 64 | return x 65 | 66 | 67 | class TransformerBlock(nn.Module): 68 | # Vision Transformer https://arxiv.org/abs/2010.11929 69 | def __init__(self, c1, c2, num_heads, num_layers): 70 | super().__init__() 71 | self.conv = None 72 | if c1 != c2: 73 | self.conv = Conv(c1, c2) 74 | self.linear = nn.Linear(c2, c2) # learnable position embedding 75 | self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) 76 | self.c2 = c2 77 | 78 | def forward(self, x): 79 | if self.conv is not None: 80 | x = self.conv(x) 81 | b, _, w, h = x.shape 82 | p = x.flatten(2) 83 | p = p.unsqueeze(0) 84 | p = p.transpose(0, 3) 85 | p = p.squeeze(3) 86 | e = self.linear(p) 87 | x = p + e 88 | 89 | x = self.tr(x) 90 | x = x.unsqueeze(3) 91 | x = x.transpose(0, 3) 92 | x = x.reshape(b, self.c2, w, h) 93 | return x 94 | 95 | 96 | class Bottleneck(nn.Module): 97 | # Standard bottleneck 98 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 99 | super(Bottleneck, self).__init__() 100 | c_ = int(c2 * e) # hidden channels 101 | self.cv1 = Conv(c1, c_, 1, 1) 102 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 103 | self.add = shortcut and c1 == c2 104 | 105 | def forward(self, x): 106 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 107 | 108 | 109 | class BottleneckCSP(nn.Module): 110 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 111 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 112 | super(BottleneckCSP, self).__init__() 113 | c_ = int(c2 * e) # hidden channels 114 | self.cv1 = Conv(c1, c_, 1, 1) 115 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 116 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 117 | self.cv4 = Conv(2 * c_, c2, 1, 1) 118 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 119 | self.act = nn.LeakyReLU(0.1, inplace=True) 120 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 121 | 122 | def forward(self, x): 123 | y1 = self.cv3(self.m(self.cv1(x))) 124 | y2 = self.cv2(x) 125 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 126 | 127 | 128 | class C3(nn.Module): 129 | # CSP Bottleneck with 3 convolutions 130 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 131 | super(C3, self).__init__() 132 | c_ = int(c2 * e) # hidden channels 133 | self.cv1 = Conv(c1, c_, 1, 1) 134 | self.cv2 = Conv(c1, c_, 1, 1) 135 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) 136 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 137 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 138 | 139 | def forward(self, x): 140 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) 141 | 142 | 143 | class C3TR(C3): 144 | # C3 module with TransformerBlock() 145 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 146 | super().__init__(c1, c2, n, shortcut, g, e) 147 | c_ = int(c2 * e) 148 | self.m = TransformerBlock(c_, c_, 4, n) 149 | 150 | 151 | class SPP(nn.Module): 152 | # Spatial pyramid pooling layer used in YOLOv3-SPP 153 | def __init__(self, c1, c2, k=(5, 9, 13)): 154 | super(SPP, self).__init__() 155 | c_ = c1 // 2 # hidden channels 156 | self.cv1 = Conv(c1, c_, 1, 1) 157 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 158 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 159 | 160 | def forward(self, x): 161 | x = self.cv1(x) 162 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 163 | 164 | 165 | class Focus(nn.Module): 166 | # Focus wh information into c-space 167 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 168 | super(Focus, self).__init__() 169 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 170 | # self.contract = Contract(gain=2) 171 | 172 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 173 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 174 | # return self.conv(self.contract(x)) 175 | 176 | 177 | class Contract(nn.Module): 178 | # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) 179 | def __init__(self, gain=2): 180 | super().__init__() 181 | self.gain = gain 182 | 183 | def forward(self, x): 184 | N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' 185 | s = self.gain 186 | x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) 187 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) 188 | return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) 189 | 190 | 191 | class Expand(nn.Module): 192 | # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) 193 | def __init__(self, gain=2): 194 | super().__init__() 195 | self.gain = gain 196 | 197 | def forward(self, x): 198 | N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' 199 | s = self.gain 200 | x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) 201 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) 202 | return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) 203 | 204 | 205 | class Concat(nn.Module): 206 | # Concatenate a list of tensors along dimension 207 | def __init__(self, dimension=1): 208 | super(Concat, self).__init__() 209 | self.d = dimension 210 | 211 | def forward(self, x): 212 | return torch.cat(x, self.d) 213 | 214 | 215 | class NMS(nn.Module): 216 | # Non-Maximum Suppression (NMS) module 217 | conf = 0.25 # confidence threshold 218 | iou = 0.45 # IoU threshold 219 | classes = None # (optional list) filter by class 220 | 221 | def __init__(self): 222 | super(NMS, self).__init__() 223 | 224 | def forward(self, x): 225 | return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) 226 | 227 | 228 | class autoShape(nn.Module): 229 | # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS 230 | conf = 0.25 # NMS confidence threshold 231 | iou = 0.45 # NMS IoU threshold 232 | classes = None # (optional list) filter by class 233 | 234 | def __init__(self, model): 235 | super(autoShape, self).__init__() 236 | self.model = model.eval() 237 | 238 | def autoshape(self): 239 | print('autoShape already enabled, skipping... ') # model already converted to model.autoshape() 240 | return self 241 | 242 | @torch.no_grad() 243 | def forward(self, imgs, size=640, augment=False, profile=False): 244 | # Inference from various sources. For height=640, width=1280, RGB images example inputs are: 245 | # filename: imgs = 'data/samples/zidane.jpg' 246 | # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' 247 | # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) 248 | # PIL: = Image.open('image.jpg') # HWC x(640,1280,3) 249 | # numpy: = np.zeros((640,1280,3)) # HWC 250 | # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) 251 | # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images 252 | 253 | t = [time_synchronized()] 254 | p = next(self.model.parameters()) # for device and type 255 | if isinstance(imgs, torch.Tensor): # torch 256 | with amp.autocast(enabled=p.device.type != 'cpu'): 257 | return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference 258 | 259 | # Pre-process 260 | n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images 261 | shape0, shape1, files = [], [], [] # image and inference shapes, filenames 262 | for i, im in enumerate(imgs): 263 | f = f'image{i}' # filename 264 | if isinstance(im, str): # filename or uri 265 | im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im 266 | elif isinstance(im, Image.Image): # PIL Image 267 | im, f = np.asarray(im), getattr(im, 'filename', f) or f 268 | files.append(Path(f).with_suffix('.jpg').name) 269 | if im.shape[0] < 5: # image in CHW 270 | im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) 271 | im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input 272 | s = im.shape[:2] # HWC 273 | shape0.append(s) # image shape 274 | g = (size / max(s)) # gain 275 | shape1.append([y * g for y in s]) 276 | imgs[i] = im # update 277 | shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape 278 | x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad 279 | x = np.stack(x, 0) if n > 1 else x[0][None] # stack 280 | x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW 281 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 282 | t.append(time_synchronized()) 283 | 284 | with amp.autocast(enabled=p.device.type != 'cpu'): 285 | # Inference 286 | y = self.model(x, augment, profile)[0] # forward 287 | t.append(time_synchronized()) 288 | 289 | # Post-process 290 | y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS 291 | for i in range(n): 292 | scale_coords(shape1, y[i][:, :4], shape0[i]) 293 | 294 | t.append(time_synchronized()) 295 | return Detections(imgs, y, files, t, self.names, x.shape) 296 | 297 | 298 | class Detections: 299 | # detections class for YOLOv3 inference results 300 | def __init__(self, imgs, pred, files, times=None, names=None, shape=None): 301 | super(Detections, self).__init__() 302 | d = pred[0].device # device 303 | gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations 304 | self.imgs = imgs # list of images as numpy arrays 305 | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) 306 | self.names = names # class names 307 | self.files = files # image filenames 308 | self.xyxy = pred # xyxy pixels 309 | self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels 310 | self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized 311 | self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized 312 | self.n = len(self.pred) # number of images (batch size) 313 | self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms) 314 | self.s = shape # inference BCHW shape 315 | 316 | def display(self, pprint=False, show=False, save=False, render=False, save_dir=''): 317 | colors = color_list() 318 | for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): 319 | str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' 320 | if pred is not None: 321 | for c in pred[:, -1].unique(): 322 | n = (pred[:, -1] == c).sum() # detections per class 323 | str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string 324 | if show or save or render: 325 | for *box, conf, cls in pred: # xyxy, confidence, class 326 | label = f'{self.names[int(cls)]} {conf:.2f}' 327 | plot_one_box(box, img, label=label, color=colors[int(cls) % 10]) 328 | img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np 329 | if pprint: 330 | print(str.rstrip(', ')) 331 | if show: 332 | img.show(self.files[i]) # show 333 | if save: 334 | f = self.files[i] 335 | img.save(Path(save_dir) / f) # save 336 | print(f"{'Saved' * (i == 0)} {f}", end=',' if i < self.n - 1 else f' to {save_dir}\n') 337 | if render: 338 | self.imgs[i] = np.asarray(img) 339 | 340 | def print(self): 341 | self.display(pprint=True) # print results 342 | print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t) 343 | 344 | def show(self): 345 | self.display(show=True) # show results 346 | 347 | def save(self, save_dir='runs/hub/exp'): 348 | save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/hub/exp') # increment save_dir 349 | Path(save_dir).mkdir(parents=True, exist_ok=True) 350 | self.display(save=True, save_dir=save_dir) # save results 351 | 352 | def render(self): 353 | self.display(render=True) # render results 354 | return self.imgs 355 | 356 | def pandas(self): 357 | # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) 358 | new = copy(self) # return copy 359 | ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns 360 | cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns 361 | for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): 362 | a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update 363 | setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) 364 | return new 365 | 366 | def tolist(self): 367 | # return a list of Detections objects, i.e. 'for result in results.tolist():' 368 | x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)] 369 | for d in x: 370 | for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: 371 | setattr(d, k, getattr(d, k)[0]) # pop out of list 372 | return x 373 | 374 | def __len__(self): 375 | return self.n 376 | 377 | 378 | class Classify(nn.Module): 379 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 380 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 381 | super(Classify, self).__init__() 382 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 383 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) 384 | self.flat = nn.Flatten() 385 | 386 | def forward(self, x): 387 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 388 | return self.flat(self.conv(z)) # flatten to x(b,c2) 389 | -------------------------------------------------------------------------------- /yolov3_fsnet/models/experimental.py: -------------------------------------------------------------------------------- 1 | # YOLOv3 experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from yolov3_fsnet.models.common import Conv, DWConv 8 | from yolov3_fsnet.utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride 62 | super(GhostBottleneck, self).__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super(MixConv2d, self).__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super(Ensemble, self).__init__() 102 | 103 | def forward(self, x, augment=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.stack(y).mean(0) # mean ensemble 109 | y = torch.cat(y, 1) # nms ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None): 114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 115 | model = Ensemble() 116 | for w in weights if isinstance(weights, list) else [weights]: 117 | attempt_download(w) 118 | ckpt = torch.load(w, map_location=map_location) # load 119 | model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model 120 | 121 | # Compatibility updates 122 | for m in model.modules(): 123 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: 124 | m.inplace = True # pytorch 1.7.0 compatibility 125 | elif type(m) is Conv: 126 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 127 | 128 | if len(model) == 1: 129 | return model[-1] # return model 130 | else: 131 | print('Ensemble created with %s\n' % weights) 132 | for k in ['names', 'stride']: 133 | setattr(model, k, getattr(model[-1], k)) 134 | return model # return ensemble 135 | -------------------------------------------------------------------------------- /yolov3_fsnet/models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv3 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov3.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | from utils.torch_utils import select_device 21 | 22 | if __name__ == '__main__': 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--weights', type=str, default='./yolov3.pt', help='weights path') # from yolov3/models/ 25 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 26 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 27 | parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') 28 | parser.add_argument('--grid', action='store_true', help='export Detect() layer grid') 29 | parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 30 | opt = parser.parse_args() 31 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 32 | print(opt) 33 | set_logging() 34 | t = time.time() 35 | 36 | # Load PyTorch model 37 | device = select_device(opt.device) 38 | model = attempt_load(opt.weights, map_location=device) # load FP32 model 39 | labels = model.names 40 | 41 | # Checks 42 | gs = int(max(model.stride)) # grid size (max stride) 43 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 44 | 45 | # Input 46 | img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection 47 | 48 | # Update model 49 | for k, m in model.named_modules(): 50 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 51 | if isinstance(m, models.common.Conv): # assign export-friendly activations 52 | if isinstance(m.act, nn.Hardswish): 53 | m.act = Hardswish() 54 | elif isinstance(m.act, nn.SiLU): 55 | m.act = SiLU() 56 | # elif isinstance(m, models.yolo.Detect): 57 | # m.forward = m.forward_export # assign forward (optional) 58 | model.model[-1].export = not opt.grid # set Detect() layer grid export 59 | y = model(img) # dry run 60 | 61 | # TorchScript export 62 | try: 63 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 64 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 65 | ts = torch.jit.trace(model, img, strict=False) 66 | ts.save(f) 67 | print('TorchScript export success, saved as %s' % f) 68 | except Exception as e: 69 | print('TorchScript export failure: %s' % e) 70 | 71 | # ONNX export 72 | try: 73 | import onnx 74 | 75 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 76 | f = opt.weights.replace('.pt', '.onnx') # filename 77 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 78 | output_names=['classes', 'boxes'] if y is None else ['output'], 79 | dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640) 80 | 'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None) 81 | 82 | # Checks 83 | onnx_model = onnx.load(f) # load onnx model 84 | onnx.checker.check_model(onnx_model) # check onnx model 85 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 86 | print('ONNX export success, saved as %s' % f) 87 | except Exception as e: 88 | print('ONNX export failure: %s' % e) 89 | 90 | # CoreML export 91 | try: 92 | import coremltools as ct 93 | 94 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 95 | # convert model from torchscript and apply pixel scaling as per detect.py 96 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 97 | f = opt.weights.replace('.pt', '.mlmodel') # filename 98 | model.save(f) 99 | print('CoreML export success, saved as %s' % f) 100 | except Exception as e: 101 | print('CoreML export failure: %s' % e) 102 | 103 | # Finish 104 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 105 | -------------------------------------------------------------------------------- /yolov3_fsnet/models/yolo.py: -------------------------------------------------------------------------------- 1 | # YOLOv3 YOLO-specific modules 2 | 3 | import argparse 4 | import logging 5 | import sys 6 | from copy import deepcopy 7 | import matplotlib; matplotlib.use('TkAgg') 8 | import matplotlib.pyplot as plt 9 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 10 | logger = logging.getLogger(__name__) 11 | import cv2 12 | from yolov3_fsnet.models.common import * 13 | from yolov3_fsnet.models.experimental import * 14 | from yolov3_fsnet.utils.autoanchor import check_anchor_order 15 | from yolov3_fsnet.utils.general import make_divisible, check_file, set_logging 16 | from yolov3_fsnet.utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 17 | select_device, copy_attr 18 | 19 | try: 20 | import thop # for FLOPS computation 21 | except ImportError: 22 | thop = None 23 | 24 | 25 | class Detect(nn.Module): 26 | stride = None # strides computed during build 27 | export = False # onnx export 28 | 29 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 30 | super(Detect, self).__init__() 31 | self.nc = nc # number of classes 32 | self.no = nc + 5 # number of outputs per anchor 33 | self.nl = len(anchors) # number of detection layers 34 | self.na = len(anchors[0]) // 2 # number of anchors 35 | self.grid = [torch.zeros(1)] * self.nl # init grid 36 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 37 | self.register_buffer('anchors', a) # shape(nl,na,2) 38 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 39 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 40 | 41 | def forward(self, x): 42 | # x = x.copy() # for profiling 43 | z = [] # inference output 44 | self.training |= self.export 45 | 46 | for i in range(self.nl): 47 | x[i] = self.m[i](x[i]) # conv 48 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 49 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 50 | 51 | if not self.training: # inference 52 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 53 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 54 | 55 | y = x[i].sigmoid() 56 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 57 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 58 | z.append(y.view(bs, -1, self.no)) 59 | idd=68 60 | xmap = ((x[0][0,:,:,:,idd]-x[0][0,:,:,:,idd].min())/(x[0][0,:,:,:,idd].max()-x[0][0,:,:,:,idd].min())).transpose( 61 | 0,2).transpose(0,1).cpu().float().numpy() 62 | return x if self.training else (torch.cat(z, 1), x) 63 | 64 | @staticmethod 65 | def _make_grid(nx=20, ny=20): 66 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 67 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 68 | 69 | 70 | class Model(nn.Module): 71 | def __init__(self, cfg='yolov3.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes 72 | super(Model, self).__init__() 73 | if isinstance(cfg, dict): 74 | self.yaml = cfg # model dict 75 | else: # is *.yaml 76 | import yaml # for torch hub 77 | self.yaml_file = Path(cfg).name 78 | with open(cfg) as f: 79 | self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict 80 | 81 | # Define model 82 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 83 | if nc and nc != self.yaml['nc']: 84 | logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") 85 | self.yaml['nc'] = nc # override yaml value 86 | if anchors: 87 | logger.info(f'Overriding model.yaml anchors with anchors={anchors}') 88 | self.yaml['anchors'] = round(anchors) # override yaml value 89 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 90 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 91 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 92 | 93 | # Build strides, anchors 94 | m = self.model[-1] # Detect() 95 | if isinstance(m, Detect): 96 | s = 256 # 2x min stride 97 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 98 | m.anchors /= m.stride.view(-1, 1, 1) 99 | check_anchor_order(m) 100 | self.stride = m.stride 101 | self._initialize_biases() # only run once 102 | # print('Strides: %s' % m.stride.tolist()) 103 | 104 | # Init weights, biases 105 | initialize_weights(self) 106 | self.info() 107 | logger.info('') 108 | 109 | def forward(self, x, augment=False, profile=False): 110 | if augment: 111 | img_size = x.shape[-2:] # height, width 112 | s = [1, 0.83, 0.67] # scales 113 | f = [None, 3, None] # flips (2-ud, 3-lr) 114 | y = [] # outputs 115 | for si, fi in zip(s, f): 116 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 117 | yi = self.forward_once(xi)[0] # forward 118 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 119 | yi[..., :4] /= si # de-scale 120 | if fi == 2: 121 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 122 | elif fi == 3: 123 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 124 | y.append(yi) 125 | return torch.cat(y, 1), None # augmented inference, train 126 | else: 127 | return self.forward_once(x, profile) # single-scale inference, train 128 | 129 | def forward_once(self, x, profile=False): 130 | y, dt = [], [] # outputs 131 | for m in self.model: 132 | if m.f != -1: # if not from previous layer 133 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 134 | 135 | if profile: 136 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS 137 | t = time_synchronized() 138 | for _ in range(10): 139 | _ = m(x) 140 | dt.append((time_synchronized() - t) * 100) 141 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 142 | 143 | x = m(x) # run 144 | y.append(x if m.i in self.save else None) # save output 145 | 146 | if profile: 147 | print('%.1fms total' % sum(dt)) 148 | return x 149 | 150 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 151 | # https://arxiv.org/abs/1708.02002 section 3.3 152 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 153 | m = self.model[-1] # Detect() module 154 | for mi, s in zip(m.m, m.stride): # from 155 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 156 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 157 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 158 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 159 | 160 | def _print_biases(self): 161 | m = self.model[-1] # Detect() module 162 | for mi in m.m: # from 163 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 164 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 165 | 166 | # def _print_weights(self): 167 | # for m in self.model.modules(): 168 | # if type(m) is Bottleneck: 169 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 170 | 171 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 172 | print('Fusing layers... ') 173 | for m in self.model.modules(): 174 | if type(m) is Conv and hasattr(m, 'bn'): 175 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 176 | delattr(m, 'bn') # remove batchnorm 177 | m.forward = m.fuseforward # update forward 178 | self.info() 179 | return self 180 | 181 | def nms(self, mode=True): # add or remove NMS module 182 | present = type(self.model[-1]) is NMS # last layer is NMS 183 | if mode and not present: 184 | print('Adding NMS... ') 185 | m = NMS() # module 186 | m.f = -1 # from 187 | m.i = self.model[-1].i + 1 # index 188 | self.model.add_module(name='%s' % m.i, module=m) # add 189 | self.eval() 190 | elif not mode and present: 191 | print('Removing NMS... ') 192 | self.model = self.model[:-1] # remove 193 | return self 194 | 195 | def autoshape(self): # add autoShape module 196 | print('Adding autoShape... ') 197 | m = autoShape(self) # wrap model 198 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 199 | return m 200 | 201 | def info(self, verbose=False, img_size=640): # print model information 202 | model_info(self, verbose, img_size) 203 | 204 | 205 | def parse_model(d, ch): # model_dict, input_channels(3) 206 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 207 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 208 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 209 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 210 | 211 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 212 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 213 | m = eval(m) if isinstance(m, str) else m # eval strings 214 | for j, a in enumerate(args): 215 | try: 216 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 217 | except: 218 | pass 219 | 220 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 221 | if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, 222 | C3, C3TR]: 223 | c1, c2 = ch[f], args[0] 224 | if c2 != no: # if not output 225 | c2 = make_divisible(c2 * gw, 8) 226 | 227 | args = [c1, c2, *args[1:]] 228 | if m in [BottleneckCSP, C3, C3TR]: 229 | args.insert(2, n) # number of repeats 230 | n = 1 231 | elif m is nn.BatchNorm2d: 232 | args = [ch[f]] 233 | elif m is Concat: 234 | c2 = sum([ch[x] for x in f]) 235 | elif m is Detect: 236 | args.append([ch[x] for x in f]) 237 | if isinstance(args[1], int): # number of anchors 238 | args[1] = [list(range(args[1] * 2))] * len(f) 239 | elif m is Contract: 240 | c2 = ch[f] * args[0] ** 2 241 | elif m is Expand: 242 | c2 = ch[f] // args[0] ** 2 243 | else: 244 | c2 = ch[f] 245 | 246 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 247 | t = str(m)[8:-2].replace('__main__.', '') # module type 248 | np = sum([x.numel() for x in m_.parameters()]) # number params 249 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 250 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 251 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 252 | layers.append(m_) 253 | if i == 0: 254 | ch = [] 255 | ch.append(c2) 256 | return nn.Sequential(*layers), sorted(save) 257 | 258 | 259 | if __name__ == '__main__': 260 | parser = argparse.ArgumentParser() 261 | parser.add_argument('--cfg', type=str, default='yolov3.yaml', help='model.yaml') 262 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 263 | opt = parser.parse_args() 264 | opt.cfg = check_file(opt.cfg) # check file 265 | set_logging() 266 | device = select_device(opt.device) 267 | 268 | # Create model 269 | model = Model(opt.cfg).to(device) 270 | model.train() 271 | 272 | # Profile 273 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 274 | # y = model(img, profile=True) 275 | 276 | # Tensorboard 277 | # from torch.utils.tensorboard import SummaryWriter 278 | # tb_writer = SummaryWriter() 279 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 280 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 281 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard 282 | -------------------------------------------------------------------------------- /yolov3_fsnet/models/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /yolov3_fsnet/models/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /yolov3_fsnet/models/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, [1, 1]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__init__.py -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/autoanchor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/autoanchor.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/general.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/general.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/google_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/google_utils.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/metrics.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/metrics.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/plots.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/plots.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/__pycache__/torch_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/__pycache__/torch_utils.cpython-36.pyc -------------------------------------------------------------------------------- /yolov3_fsnet/utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | 9 | from utils.general import colorstr 10 | 11 | 12 | def check_anchor_order(m): 13 | # Check anchor order against stride order for YOLOv3 Detect() module m, and correct if necessary 14 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 15 | da = a[-1] - a[0] # delta a 16 | ds = m.stride[-1] - m.stride[0] # delta s 17 | if da.sign() != ds.sign(): # same order 18 | print('Reversing anchor order') 19 | m.anchors[:] = m.anchors.flip(0) 20 | m.anchor_grid[:] = m.anchor_grid.flip(0) 21 | 22 | 23 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 24 | # Check anchor fit to data, recompute if necessary 25 | prefix = colorstr('autoanchor: ') 26 | print(f'\n{prefix}Analyzing anchors... ', end='') 27 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 28 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 29 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 30 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 31 | 32 | def metric(k): # compute metric 33 | r = wh[:, None] / k[None] 34 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 35 | best = x.max(1)[0] # best_x 36 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 37 | bpr = (best > 1. / thr).float().mean() # best possible recall 38 | return bpr, aat 39 | 40 | anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors 41 | bpr, aat = metric(anchors) 42 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 43 | if bpr < 0.98: # threshold to recompute 44 | print('. Attempting to improve anchors, please wait...') 45 | na = m.anchor_grid.numel() // 2 # number of anchors 46 | try: 47 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 48 | except Exception as e: 49 | print(f'{prefix}ERROR: {e}') 50 | new_bpr = metric(anchors)[0] 51 | if new_bpr > bpr: # replace anchors 52 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 53 | m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference 54 | m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 55 | check_anchor_order(m) 56 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 57 | else: 58 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 59 | print('') # newline 60 | 61 | 62 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 63 | """ Creates kmeans-evolved anchors from training dataset 64 | 65 | Arguments: 66 | path: path to dataset *.yaml, or a loaded dataset 67 | n: number of anchors 68 | img_size: image size used for training 69 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 70 | gen: generations to evolve anchors using genetic algorithm 71 | verbose: print all results 72 | 73 | Return: 74 | k: kmeans evolved anchors 75 | 76 | Usage: 77 | from utils.autoanchor import *; _ = kmean_anchors() 78 | """ 79 | thr = 1. / thr 80 | prefix = colorstr('autoanchor: ') 81 | 82 | def metric(k, wh): # compute metrics 83 | r = wh[:, None] / k[None] 84 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 85 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 86 | return x, x.max(1)[0] # x, best_x 87 | 88 | def anchor_fitness(k): # mutation fitness 89 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 90 | return (best * (best > thr).float()).mean() # fitness 91 | 92 | def print_results(k): 93 | k = k[np.argsort(k.prod(1))] # sort small to large 94 | x, best = metric(k, wh0) 95 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 96 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 97 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 98 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 99 | for i, x in enumerate(k): 100 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 101 | return k 102 | 103 | if isinstance(path, str): # *.yaml file 104 | with open(path) as f: 105 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict 106 | from utils.datasets import LoadImagesAndLabels 107 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 108 | else: 109 | dataset = path # dataset 110 | 111 | # Get label wh 112 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 113 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 114 | 115 | # Filter 116 | i = (wh0 < 3.0).any(1).sum() 117 | if i: 118 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 119 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 120 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 121 | 122 | # Kmeans calculation 123 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 124 | s = wh.std(0) # sigmas for whitening 125 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 126 | assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') 127 | k *= s 128 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 129 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 130 | k = print_results(k) 131 | 132 | # Plot 133 | # k, d = [None] * 20, [None] * 20 134 | # for i in tqdm(range(1, 21)): 135 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 136 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 137 | # ax = ax.ravel() 138 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 139 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 140 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 141 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 142 | # fig.savefig('wh.png', dpi=200) 143 | 144 | # Evolve 145 | npr = np.random 146 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 147 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar 148 | for _ in pbar: 149 | v = np.ones(sh) 150 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 151 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 152 | kg = (k.copy() * v).clip(min=2.0) 153 | fg = anchor_fitness(kg) 154 | if fg > f: 155 | f, k = fg, kg.copy() 156 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 157 | if verbose: 158 | print_results(k) 159 | 160 | return print_results(k) 161 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/aws/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/aws/__init__.py -------------------------------------------------------------------------------- /yolov3_fsnet/utils/aws/mime.sh: -------------------------------------------------------------------------------- 1 | # AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/ 2 | # This script will run on every instance restart, not only on first start 3 | # --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA --- 4 | 5 | Content-Type: multipart/mixed; boundary="//" 6 | MIME-Version: 1.0 7 | 8 | --// 9 | Content-Type: text/cloud-config; charset="us-ascii" 10 | MIME-Version: 1.0 11 | Content-Transfer-Encoding: 7bit 12 | Content-Disposition: attachment; filename="cloud-config.txt" 13 | 14 | #cloud-config 15 | cloud_final_modules: 16 | - [scripts-user, always] 17 | 18 | --// 19 | Content-Type: text/x-shellscript; charset="us-ascii" 20 | MIME-Version: 1.0 21 | Content-Transfer-Encoding: 7bit 22 | Content-Disposition: attachment; filename="userdata.txt" 23 | 24 | #!/bin/bash 25 | # --- paste contents of userdata.sh here --- 26 | --// 27 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/aws/resume.py: -------------------------------------------------------------------------------- 1 | # Resume all interrupted trainings in yolov5/ dir including DDP trainings 2 | # Usage: $ python utils/aws/resume.py 3 | 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import torch 9 | import yaml 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | port = 0 # --master_port 14 | path = Path('').resolve() 15 | for last in path.rglob('*/**/last.pt'): 16 | ckpt = torch.load(last) 17 | if ckpt['optimizer'] is None: 18 | continue 19 | 20 | # Load opt.yaml 21 | with open(last.parent.parent / 'opt.yaml') as f: 22 | opt = yaml.load(f, Loader=yaml.SafeLoader) 23 | 24 | # Get device count 25 | d = opt['device'].split(',') # devices 26 | nd = len(d) # number of devices 27 | ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel 28 | 29 | if ddp: # multi-GPU 30 | port += 1 31 | cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}' 32 | else: # single-GPU 33 | cmd = f'python train.py --resume {last}' 34 | 35 | cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread 36 | print(cmd) 37 | os.system(cmd) 38 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/aws/userdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html 3 | # This script will run only once on first instance start (for a re-start script see mime.sh) 4 | # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir 5 | # Use >300 GB SSD 6 | 7 | cd home/ubuntu 8 | if [ ! -d yolov5 ]; then 9 | echo "Running first-time script." # install dependencies, download COCO, pull Docker 10 | git clone https://github.com/ultralytics/yolov5 && sudo chmod -R 777 yolov5 11 | cd yolov5 12 | bash data/scripts/get_coco.sh && echo "Data done." & 13 | sudo docker pull ultralytics/yolov5:latest && echo "Docker done." & 14 | python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." & 15 | wait && echo "All tasks done." # finish background tasks 16 | else 17 | echo "Running re-start script." # resume interrupted runs 18 | i=0 19 | list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour' 20 | while IFS= read -r id; do 21 | ((i++)) 22 | echo "restarting container $i: $id" 23 | sudo docker start $id 24 | # sudo docker exec -it $id python train.py --resume # single-GPU 25 | sudo docker exec -d $id python utils/aws/resume.py # multi-scenario 26 | done <<<"$list" 27 | fi 28 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==18.1 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov3app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /yolov3_fsnet/utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import requests 10 | import torch 11 | 12 | 13 | def gsutil_getsize(url=''): 14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 15 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 16 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 17 | 18 | 19 | def attempt_download(file, repo='ultralytics/yolov3'): 20 | # Attempt file download if does not exist 21 | file = Path(str(file).strip().replace("'", '').lower()) 22 | 23 | if not file.exists(): 24 | try: 25 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api 26 | assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...] 27 | tag = response['tag_name'] # i.e. 'v1.0' 28 | except: # fallback plan 29 | assets = ['yolov3.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt'] 30 | tag = subprocess.check_output('git tag', shell=True).decode().split()[-1] 31 | 32 | name = file.name 33 | if name in assets: 34 | msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/' 35 | redundant = False # second download option 36 | try: # GitHub 37 | url = f'https://github.com/{repo}/releases/download/{tag}/{name}' 38 | print(f'Downloading {url} to {file}...') 39 | torch.hub.download_url_to_file(url, file) 40 | assert file.exists() and file.stat().st_size > 1E6 # check 41 | except Exception as e: # GCP 42 | print(f'Download error: {e}') 43 | assert redundant, 'No secondary mirror' 44 | url = f'https://storage.googleapis.com/{repo}/ckpt/{name}' 45 | print(f'Downloading {url} to {file}...') 46 | os.system(f'curl -L {url} -o {file}') # torch.hub.download_url_to_file(url, weights) 47 | finally: 48 | if not file.exists() or file.stat().st_size < 1E6: # check 49 | 50 | file.unlink(missing_ok=True) 51 | # remove partial downloads 52 | print(f'ERROR: Download failure: {msg}') 53 | print('') 54 | return 55 | 56 | 57 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): 58 | # Downloads a file from Google Drive. from yolov3.utils.google_utils import *; gdrive_download() 59 | t = time.time() 60 | file = Path(file) 61 | cookie = Path('cookie') # gdrive cookie 62 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 63 | file.unlink(missing_ok=True) # remove existing file 64 | cookie.unlink(missing_ok=True) # remove existing cookie 65 | 66 | # Attempt file download 67 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 68 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 69 | if os.path.exists('cookie'): # large file 70 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 71 | else: # small file 72 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 73 | r = os.system(s) # execute, capture return 74 | cookie.unlink(missing_ok=True) # remove existing cookie 75 | 76 | # Error check 77 | if r != 0: 78 | file.unlink(missing_ok=True) # remove partial 79 | print('Download error ') # raise Exception('Download error') 80 | return r 81 | 82 | # Unzip if archive 83 | if file.suffix == '.zip': 84 | print('unzipping... ', end='') 85 | os.system(f'unzip -q {file}') # unzip 86 | file.unlink() # remove zip to free space 87 | 88 | print(f'Done ({time.time() - t:.1f}s)') 89 | return r 90 | 91 | 92 | def get_token(cookie="./cookie"): 93 | with open(cookie) as f: 94 | for line in f: 95 | if "download" in line: 96 | return line.split()[-1] 97 | return "" 98 | 99 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 100 | # # Uploads a file to a bucket 101 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 102 | # 103 | # storage_client = storage.Client() 104 | # bucket = storage_client.get_bucket(bucket_name) 105 | # blob = bucket.blob(destination_blob_name) 106 | # 107 | # blob.upload_from_filename(source_file_name) 108 | # 109 | # print('File {} uploaded to {}.'.format( 110 | # source_file_name, 111 | # destination_blob_name)) 112 | # 113 | # 114 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 115 | # # Uploads a blob from a bucket 116 | # storage_client = storage.Client() 117 | # bucket = storage_client.get_bucket(bucket_name) 118 | # blob = bucket.blob(source_blob_name) 119 | # 120 | # blob.download_to_filename(destination_file_name) 121 | # 122 | # print('Blob {} downloaded to {}.'.format( 123 | # source_blob_name, 124 | # destination_file_name)) 125 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/loss.py: -------------------------------------------------------------------------------- 1 | # Loss functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from utils.general import bbox_iou 7 | from utils.torch_utils import is_parallel 8 | 9 | 10 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 11 | # return positive, negative label smoothing BCE targets 12 | return 1.0 - 0.5 * eps, 0.5 * eps 13 | 14 | 15 | class BCEBlurWithLogitsLoss(nn.Module): 16 | # BCEwithLogitLoss() with reduced missing label effects. 17 | def __init__(self, alpha=0.05): 18 | super(BCEBlurWithLogitsLoss, self).__init__() 19 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() 20 | self.alpha = alpha 21 | 22 | def forward(self, pred, true): 23 | loss = self.loss_fcn(pred, true) 24 | pred = torch.sigmoid(pred) # prob from logits 25 | dx = pred - true # reduce only missing label effects 26 | # dx = (pred - true).abs() # reduce missing label and false label effects 27 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 28 | loss *= alpha_factor 29 | return loss.mean() 30 | 31 | 32 | class FocalLoss(nn.Module): 33 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 34 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 35 | super(FocalLoss, self).__init__() 36 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 37 | self.gamma = gamma 38 | self.alpha = alpha 39 | self.reduction = loss_fcn.reduction 40 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 41 | 42 | def forward(self, pred, true): 43 | loss = self.loss_fcn(pred, true) 44 | # p_t = torch.exp(-loss) 45 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability 46 | 47 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py 48 | pred_prob = torch.sigmoid(pred) # prob from logits 49 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 50 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 51 | modulating_factor = (1.0 - p_t) ** self.gamma 52 | loss *= alpha_factor * modulating_factor 53 | 54 | if self.reduction == 'mean': 55 | return loss.mean() 56 | elif self.reduction == 'sum': 57 | return loss.sum() 58 | else: # 'none' 59 | return loss 60 | 61 | 62 | class QFocalLoss(nn.Module): 63 | # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 64 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 65 | super(QFocalLoss, self).__init__() 66 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 67 | self.gamma = gamma 68 | self.alpha = alpha 69 | self.reduction = loss_fcn.reduction 70 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 71 | 72 | def forward(self, pred, true): 73 | loss = self.loss_fcn(pred, true) 74 | 75 | pred_prob = torch.sigmoid(pred) # prob from logits 76 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 77 | modulating_factor = torch.abs(true - pred_prob) ** self.gamma 78 | loss *= alpha_factor * modulating_factor 79 | 80 | if self.reduction == 'mean': 81 | return loss.mean() 82 | elif self.reduction == 'sum': 83 | return loss.sum() 84 | else: # 'none' 85 | return loss 86 | 87 | 88 | class ComputeLoss: 89 | # Compute losses 90 | def __init__(self, model, autobalance=False): 91 | super(ComputeLoss, self).__init__() 92 | device = next(model.parameters()).device # get model device 93 | h = model.hyp # hyperparameters 94 | 95 | # Define criteria 96 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) 97 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 98 | 99 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 100 | self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets 101 | 102 | # Focal loss 103 | g = h['fl_gamma'] # focal loss gamma 104 | if g > 0: 105 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 106 | 107 | det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module 108 | self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 109 | self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index 110 | self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance 111 | for k in 'na', 'nc', 'nl', 'anchors': 112 | setattr(self, k, getattr(det, k)) 113 | 114 | def __call__(self, p, targets): # predictions, targets, model 115 | device = targets.device 116 | lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) 117 | tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets 118 | 119 | # Losses 120 | for i, pi in enumerate(p): # layer index, layer predictions 121 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 122 | tobj = torch.zeros_like(pi[..., 0], device=device) # target obj 123 | 124 | n = b.shape[0] # number of targets 125 | if n: 126 | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets 127 | 128 | # Regression 129 | pxy = ps[:, :2].sigmoid() * 2. - 0.5 130 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] 131 | pbox = torch.cat((pxy, pwh), 1) # predicted box 132 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) 133 | lbox += (1.0 - iou).mean() # iou loss 134 | 135 | # Objectness 136 | tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio 137 | 138 | # Classification 139 | if self.nc > 1: # cls loss (only if multiple classes) 140 | t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets 141 | t[range(n), tcls[i]] = self.cp 142 | lcls += self.BCEcls(ps[:, 5:], t) # BCE 143 | 144 | # Append targets to text file 145 | # with open('targets.txt', 'a') as file: 146 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] 147 | 148 | obji = self.BCEobj(pi[..., 4], tobj) 149 | lobj += obji * self.balance[i] # obj loss 150 | if self.autobalance: 151 | self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() 152 | 153 | if self.autobalance: 154 | self.balance = [x / self.balance[self.ssi] for x in self.balance] 155 | lbox *= self.hyp['box'] 156 | lobj *= self.hyp['obj'] 157 | lcls *= self.hyp['cls'] 158 | bs = tobj.shape[0] # batch size 159 | 160 | loss = lbox + lobj + lcls 161 | return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() 162 | 163 | def build_targets(self, p, targets): 164 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 165 | na, nt = self.na, targets.shape[0] # number of anchors, targets 166 | tcls, tbox, indices, anch = [], [], [], [] 167 | gain = torch.ones(7, device=targets.device) # normalized to gridspace gain 168 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 169 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices 170 | 171 | g = 0.5 # bias 172 | off = torch.tensor([[0, 0], 173 | # [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m 174 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 175 | ], device=targets.device).float() * g # offsets 176 | 177 | for i in range(self.nl): 178 | anchors = self.anchors[i] 179 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain 180 | 181 | # Match targets to anchors 182 | t = targets * gain 183 | if nt: 184 | # Matches 185 | r = t[:, :, 4:6] / anchors[:, None] # wh ratio 186 | j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare 187 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 188 | t = t[j] # filter 189 | 190 | # Offsets 191 | gxy = t[:, 2:4] # grid xy 192 | gxi = gain[[2, 3]] - gxy # inverse 193 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T 194 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T 195 | j = torch.stack((torch.ones_like(j),)) 196 | t = t.repeat((off.shape[0], 1, 1))[j] 197 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 198 | else: 199 | t = targets[0] 200 | offsets = 0 201 | 202 | # Define 203 | b, c = t[:, :2].long().T # image, class 204 | gxy = t[:, 2:4] # grid xy 205 | gwh = t[:, 4:6] # grid wh 206 | gij = (gxy - offsets).long() 207 | gi, gj = gij.T # grid xy indices 208 | 209 | # Append 210 | a = t[:, 6].long() # anchor indices 211 | indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices 212 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 213 | anch.append(anchors[a]) # anchors 214 | tcls.append(c) # class 215 | 216 | return tcls, tbox, indices, anch 217 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()): 19 | """ Compute the average precision, given the recall and precision curves. 20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 21 | # Arguments 22 | tp: True positives (nparray, nx1 or nx10). 23 | conf: Objectness value from 0-1 (nparray). 24 | pred_cls: Predicted object classes (nparray). 25 | target_cls: True object classes (nparray). 26 | plot: Plot precision-recall curve at mAP@0.5 27 | save_dir: Plot save directory 28 | # Returns 29 | The average precision as computed in py-faster-rcnn. 30 | """ 31 | 32 | # Sort by objectness 33 | i = np.argsort(-conf) 34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 35 | 36 | # Find unique classes 37 | unique_classes = np.unique(target_cls) 38 | nc = unique_classes.shape[0] # number of classes, number of detections 39 | 40 | # Create Precision-Recall curve and compute AP for each class 41 | px, py = np.linspace(0, 1, 1000), [] # for plotting 42 | ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) 43 | for ci, c in enumerate(unique_classes): 44 | i = pred_cls == c 45 | n_l = (target_cls == c).sum() # number of labels 46 | n_p = i.sum() # number of predictions 47 | 48 | if n_p == 0 or n_l == 0: 49 | continue 50 | else: 51 | # Accumulate FPs and TPs 52 | fpc = (1 - tp[i]).cumsum(0) 53 | tpc = tp[i].cumsum(0) 54 | 55 | # Recall 56 | recall = tpc / (n_l + 1e-16) # recall curve 57 | r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases 58 | 59 | # Precision 60 | precision = tpc / (tpc + fpc) # precision curve 61 | p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score 62 | 63 | # AP from recall-precision curve 64 | for j in range(tp.shape[1]): 65 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 66 | if plot and j == 0: 67 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 68 | 69 | # Compute F1 (harmonic mean of precision and recall) 70 | f1 = 2 * p * r / (p + r + 1e-16) 71 | if plot: 72 | plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) 73 | plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') 74 | plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') 75 | plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') 76 | 77 | i = f1.mean(0).argmax() # max F1 index 78 | return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32') 79 | 80 | 81 | def compute_ap(recall, precision): 82 | """ Compute the average precision, given the recall and precision curves 83 | # Arguments 84 | recall: The recall curve (list) 85 | precision: The precision curve (list) 86 | # Returns 87 | Average precision, precision curve, recall curve 88 | """ 89 | 90 | # Append sentinel values to beginning and end 91 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 92 | mpre = np.concatenate(([1.], precision, [0.])) 93 | 94 | # Compute the precision envelope 95 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 96 | 97 | # Integrate area under curve 98 | method = 'interp' # methods: 'continuous', 'interp' 99 | if method == 'interp': 100 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 101 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 102 | else: # 'continuous' 103 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 104 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 105 | 106 | return ap, mpre, mrec 107 | 108 | 109 | class ConfusionMatrix: 110 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 111 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 112 | self.matrix = np.zeros((nc + 1, nc + 1)) 113 | self.nc = nc # number of classes 114 | self.conf = conf 115 | self.iou_thres = iou_thres 116 | 117 | def process_batch(self, detections, labels): 118 | """ 119 | Return intersection-over-union (Jaccard index) of boxes. 120 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 121 | Arguments: 122 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 123 | labels (Array[M, 5]), class, x1, y1, x2, y2 124 | Returns: 125 | None, updates confusion matrix accordingly 126 | """ 127 | detections = detections[detections[:, 4] > self.conf] 128 | gt_classes = labels[:, 0].int() 129 | detection_classes = detections[:, 5].int() 130 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 131 | 132 | x = torch.where(iou > self.iou_thres) 133 | if x[0].shape[0]: 134 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 135 | if x[0].shape[0] > 1: 136 | matches = matches[matches[:, 2].argsort()[::-1]] 137 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 138 | matches = matches[matches[:, 2].argsort()[::-1]] 139 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 140 | else: 141 | matches = np.zeros((0, 3)) 142 | 143 | n = matches.shape[0] > 0 144 | m0, m1, _ = matches.transpose().astype(np.int16) 145 | for i, gc in enumerate(gt_classes): 146 | j = m0 == i 147 | if n and sum(j) == 1: 148 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 149 | else: 150 | self.matrix[self.nc, gc] += 1 # background FP 151 | 152 | if n: 153 | for i, dc in enumerate(detection_classes): 154 | if not any(m1 == i): 155 | self.matrix[dc, self.nc] += 1 # background FN 156 | 157 | def matrix(self): 158 | return self.matrix 159 | 160 | def plot(self, save_dir='', names=()): 161 | try: 162 | import seaborn as sn 163 | 164 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize 165 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 166 | 167 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 168 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 169 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels 170 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 171 | xticklabels=names + ['background FP'] if labels else "auto", 172 | yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1)) 173 | fig.axes[0].set_xlabel('True') 174 | fig.axes[0].set_ylabel('Predicted') 175 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 176 | except Exception as e: 177 | pass 178 | 179 | def print(self): 180 | for i in range(self.nc + 1): 181 | print(' '.join(map(str, self.matrix[i]))) 182 | 183 | 184 | # Plots ---------------------------------------------------------------------------------------------------------------- 185 | 186 | def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()): 187 | # Precision-recall curve 188 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 189 | py = np.stack(py, axis=1) 190 | 191 | if 0 < len(names) < 21: # display per-class legend if < 21 classes 192 | for i, y in enumerate(py.T): 193 | ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) 194 | else: 195 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) 196 | 197 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 198 | ax.set_xlabel('Recall') 199 | ax.set_ylabel('Precision') 200 | ax.set_xlim(0, 1) 201 | ax.set_ylim(0, 1) 202 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 203 | fig.savefig(Path(save_dir), dpi=250) 204 | 205 | 206 | def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'): 207 | # Metric-confidence curve 208 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 209 | 210 | if 0 < len(names) < 21: # display per-class legend if < 21 classes 211 | for i, y in enumerate(py): 212 | ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) 213 | else: 214 | ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) 215 | 216 | y = py.mean(0) 217 | ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') 218 | ax.set_xlabel(xlabel) 219 | ax.set_ylabel(ylabel) 220 | ax.set_xlim(0, 1) 221 | ax.set_ylim(0, 1) 222 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 223 | fig.savefig(Path(save_dir), dpi=250) 224 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # YOLOv3 PyTorch utils 2 | 3 | import datetime 4 | import logging 5 | import math 6 | import os 7 | import platform 8 | import subprocess 9 | import time 10 | from contextlib import contextmanager 11 | from copy import deepcopy 12 | from pathlib import Path 13 | 14 | import torch 15 | import torch.backends.cudnn as cudnn 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | import torchvision 19 | 20 | try: 21 | import thop # for FLOPS computation 22 | except ImportError: 23 | thop = None 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | @contextmanager 28 | def torch_distributed_zero_first(local_rank: int): 29 | """ 30 | Decorator to make all processes in distributed training wait for each local_master to do something. 31 | """ 32 | if local_rank not in [-1, 0]: 33 | torch.distributed.barrier() 34 | yield 35 | if local_rank == 0: 36 | torch.distributed.barrier() 37 | 38 | 39 | def init_torch_seeds(seed=0): 40 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 41 | torch.manual_seed(seed) 42 | if seed == 0: # slower, more reproducible 43 | cudnn.benchmark, cudnn.deterministic = False, True 44 | else: # faster, less reproducible 45 | cudnn.benchmark, cudnn.deterministic = True, False 46 | 47 | 48 | def date_modified(path=__file__): 49 | # return human-readable file modification date, i.e. '2021-3-26' 50 | t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime) 51 | return f'{t.year}-{t.month}-{t.day}' 52 | 53 | 54 | def git_describe(path=Path(__file__).parent): # path must be a directory 55 | # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe 56 | s = f'git -C {path} describe --tags --long --always' 57 | try: 58 | return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1] 59 | except subprocess.CalledProcessError as e: 60 | return '' # not a git repository 61 | 62 | 63 | def select_device(device='', batch_size=None): 64 | # device = 'cpu' or '0' or '0,1,2,3' 65 | s = f'YOLOv3 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string 66 | cpu = device.lower() == 'cpu' 67 | if cpu: 68 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 69 | elif device: # non-cpu device requested 70 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 71 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 72 | 73 | cuda = not cpu and torch.cuda.is_available() 74 | if cuda: 75 | n = torch.cuda.device_count() 76 | if n > 1 and batch_size: # check that batch_size is compatible with device_count 77 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 78 | space = ' ' * len(s) 79 | for i, d in enumerate(device.split(',') if device else range(n)): 80 | p = torch.cuda.get_device_properties(i) 81 | s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB 82 | else: 83 | s += 'CPU\n' 84 | 85 | logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe 86 | return torch.device('cuda:0' if cuda else 'cpu') 87 | 88 | 89 | def time_synchronized(): 90 | # pytorch-accurate time 91 | if torch.cuda.is_available(): 92 | torch.cuda.synchronize() 93 | return time.time() 94 | 95 | 96 | def profile(x, ops, n=100, device=None): 97 | # profile a pytorch module or list of modules. Example usage: 98 | # x = torch.randn(16, 3, 640, 640) # input 99 | # m1 = lambda x: x * torch.sigmoid(x) 100 | # m2 = nn.SiLU() 101 | # profile(x, [m1, m2], n=100) # profile speed over 100 iterations 102 | 103 | device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 104 | x = x.to(device) 105 | x.requires_grad = True 106 | print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') 107 | print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") 108 | for m in ops if isinstance(ops, list) else [ops]: 109 | m = m.to(device) if hasattr(m, 'to') else m # device 110 | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type 111 | dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward 112 | try: 113 | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS 114 | except: 115 | flops = 0 116 | 117 | for _ in range(n): 118 | t[0] = time_synchronized() 119 | y = m(x) 120 | t[1] = time_synchronized() 121 | try: 122 | _ = y.sum().backward() 123 | t[2] = time_synchronized() 124 | except: # no backward method 125 | t[2] = float('nan') 126 | dtf += (t[1] - t[0]) * 1000 / n # ms per op forward 127 | dtb += (t[2] - t[1]) * 1000 / n # ms per op backward 128 | 129 | s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' 130 | s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' 131 | p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters 132 | print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') 133 | 134 | 135 | def is_parallel(model): 136 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 137 | 138 | 139 | def intersect_dicts(da, db, exclude=()): 140 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 141 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 142 | 143 | 144 | def initialize_weights(model): 145 | for m in model.modules(): 146 | t = type(m) 147 | if t is nn.Conv2d: 148 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 149 | elif t is nn.BatchNorm2d: 150 | m.eps = 1e-3 151 | m.momentum = 0.03 152 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 153 | m.inplace = True 154 | 155 | 156 | def find_modules(model, mclass=nn.Conv2d): 157 | # Finds layer indices matching module class 'mclass' 158 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 159 | 160 | 161 | def sparsity(model): 162 | # Return global model sparsity 163 | a, b = 0., 0. 164 | for p in model.parameters(): 165 | a += p.numel() 166 | b += (p == 0).sum() 167 | return b / a 168 | 169 | 170 | def prune(model, amount=0.3): 171 | # Prune model to requested global sparsity 172 | import torch.nn.utils.prune as prune 173 | print('Pruning model... ', end='') 174 | for name, m in model.named_modules(): 175 | if isinstance(m, nn.Conv2d): 176 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 177 | prune.remove(m, 'weight') # make permanent 178 | print(' %.3g global sparsity' % sparsity(model)) 179 | 180 | 181 | def fuse_conv_and_bn(conv, bn): 182 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 183 | fusedconv = nn.Conv2d(conv.in_channels, 184 | conv.out_channels, 185 | kernel_size=conv.kernel_size, 186 | stride=conv.stride, 187 | padding=conv.padding, 188 | groups=conv.groups, 189 | bias=True).requires_grad_(False).to(conv.weight.device) 190 | 191 | # prepare filters 192 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 193 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 194 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) 195 | 196 | # prepare spatial bias 197 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 198 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 199 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 200 | 201 | return fusedconv 202 | 203 | 204 | def model_info(model, verbose=False, img_size=640): 205 | # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] 206 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 207 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 208 | if verbose: 209 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 210 | for i, (name, p) in enumerate(model.named_parameters()): 211 | name = name.replace('module_list.', '') 212 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 213 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 214 | 215 | try: # FLOPS 216 | from thop import profile 217 | stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 218 | img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input 219 | flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS 220 | img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float 221 | fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS 222 | except (ImportError, Exception): 223 | fs = '' 224 | 225 | logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") 226 | 227 | 228 | def load_classifier(name='resnet101', n=2): 229 | # Loads a pretrained model reshaped to n-class output 230 | model = torchvision.models.__dict__[name](pretrained=True) 231 | 232 | # ResNet model properties 233 | # input_size = [3, 224, 224] 234 | # input_space = 'RGB' 235 | # input_range = [0, 1] 236 | # mean = [0.485, 0.456, 0.406] 237 | # std = [0.229, 0.224, 0.225] 238 | 239 | # Reshape output to n classes 240 | filters = model.fc.weight.shape[1] 241 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 242 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 243 | model.fc.out_features = n 244 | return model 245 | 246 | 247 | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) 248 | # scales img(bs,3,y,x) by ratio constrained to gs-multiple 249 | if ratio == 1.0: 250 | return img 251 | else: 252 | h, w = img.shape[2:] 253 | s = (int(h * ratio), int(w * ratio)) # new size 254 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 255 | if not same_shape: # pad/crop img 256 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 257 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 258 | 259 | 260 | def copy_attr(a, b, include=(), exclude=()): 261 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 262 | for k, v in b.__dict__.items(): 263 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 264 | continue 265 | else: 266 | setattr(a, k, v) 267 | 268 | 269 | class ModelEMA: 270 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 271 | Keep a moving average of everything in the model state_dict (parameters and buffers). 272 | This is intended to allow functionality like 273 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 274 | A smoothed version of the weights is necessary for some training schemes to perform well. 275 | This class is sensitive where it is initialized in the sequence of model init, 276 | GPU assignment and distributed training wrappers. 277 | """ 278 | 279 | def __init__(self, model, decay=0.9999, updates=0): 280 | # Create EMA 281 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 282 | # if next(model.parameters()).device.type != 'cpu': 283 | # self.ema.half() # FP16 EMA 284 | self.updates = updates # number of EMA updates 285 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 286 | for p in self.ema.parameters(): 287 | p.requires_grad_(False) 288 | 289 | def update(self, model): 290 | # Update EMA parameters 291 | with torch.no_grad(): 292 | self.updates += 1 293 | d = self.decay(self.updates) 294 | 295 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 296 | for k, v in self.ema.state_dict().items(): 297 | if v.dtype.is_floating_point: 298 | v *= d 299 | v += (1. - d) * msd[k].detach() 300 | 301 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 302 | # Update EMA attributes 303 | copy_attr(self.ema, model, include, exclude) 304 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/wandb_logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC1991/FS_Net/6bd838db38279cfdc390dcdb62d7caf6ee0054a3/yolov3_fsnet/utils/wandb_logging/__init__.py -------------------------------------------------------------------------------- /yolov3_fsnet/utils/wandb_logging/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import yaml 4 | 5 | from wandb_utils import WandbLogger 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | with open(opt.data) as f: 12 | data = yaml.load(f, Loader=yaml.SafeLoader) # data dict 13 | logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 21 | opt = parser.parse_args() 22 | opt.resume = False # Explicitly disallow resume check for dataset upload job 23 | 24 | create_dataset_artifact(opt) 25 | -------------------------------------------------------------------------------- /yolov3_fsnet/utils/wandb_logging/wandb_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from pathlib import Path 4 | 5 | import torch 6 | import yaml 7 | from tqdm import tqdm 8 | 9 | sys.path.append(str(Path(__file__).parent.parent.parent)) # add utils/ to path 10 | from utils.datasets import LoadImagesAndLabels 11 | from utils.datasets import img2label_paths 12 | from utils.general import colorstr, xywh2xyxy, check_dataset 13 | 14 | try: 15 | import wandb 16 | from wandb import init, finish 17 | except ImportError: 18 | wandb = None 19 | 20 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 21 | 22 | 23 | def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX): 24 | return from_string[len(prefix):] 25 | 26 | 27 | def check_wandb_config_file(data_config_file): 28 | wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path 29 | if Path(wandb_config).is_file(): 30 | return wandb_config 31 | return data_config_file 32 | 33 | 34 | def get_run_info(run_path): 35 | run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX)) 36 | run_id = run_path.stem 37 | project = run_path.parent.stem 38 | model_artifact_name = 'run_' + run_id + '_model' 39 | return run_id, project, model_artifact_name 40 | 41 | 42 | def check_wandb_resume(opt): 43 | process_wandb_config_ddp_mode(opt) if opt.global_rank not in [-1, 0] else None 44 | if isinstance(opt.resume, str): 45 | if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): 46 | if opt.global_rank not in [-1, 0]: # For resuming DDP runs 47 | run_id, project, model_artifact_name = get_run_info(opt.resume) 48 | api = wandb.Api() 49 | artifact = api.artifact(project + '/' + model_artifact_name + ':latest') 50 | modeldir = artifact.download() 51 | opt.weights = str(Path(modeldir) / "last.pt") 52 | return True 53 | return None 54 | 55 | 56 | def process_wandb_config_ddp_mode(opt): 57 | with open(opt.data) as f: 58 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # data dict 59 | train_dir, val_dir = None, None 60 | if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX): 61 | api = wandb.Api() 62 | train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias) 63 | train_dir = train_artifact.download() 64 | train_path = Path(train_dir) / 'data/images/' 65 | data_dict['train'] = str(train_path) 66 | 67 | if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX): 68 | api = wandb.Api() 69 | val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias) 70 | val_dir = val_artifact.download() 71 | val_path = Path(val_dir) / 'data/images/' 72 | data_dict['val'] = str(val_path) 73 | if train_dir or val_dir: 74 | ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml') 75 | with open(ddp_data_path, 'w') as f: 76 | yaml.dump(data_dict, f) 77 | opt.data = ddp_data_path 78 | 79 | 80 | class WandbLogger(): 81 | def __init__(self, opt, name, run_id, data_dict, job_type='Training'): 82 | # Pre-training routine -- 83 | self.job_type = job_type 84 | self.wandb, self.wandb_run, self.data_dict = wandb, None if not wandb else wandb.run, data_dict 85 | # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call 86 | if isinstance(opt.resume, str): # checks resume from artifact 87 | if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): 88 | run_id, project, model_artifact_name = get_run_info(opt.resume) 89 | model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name 90 | assert wandb, 'install wandb to resume wandb runs' 91 | # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config 92 | self.wandb_run = wandb.init(id=run_id, project=project, resume='allow') 93 | opt.resume = model_artifact_name 94 | elif self.wandb: 95 | self.wandb_run = wandb.init(config=opt, 96 | resume="allow", 97 | project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, 98 | name=name, 99 | job_type=job_type, 100 | id=run_id) if not wandb.run else wandb.run 101 | if self.wandb_run: 102 | if self.job_type == 'Training': 103 | if not opt.resume: 104 | wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict 105 | # Info useful for resuming from artifacts 106 | self.wandb_run.config.opt = vars(opt) 107 | self.wandb_run.config.data_dict = wandb_data_dict 108 | self.data_dict = self.setup_training(opt, data_dict) 109 | if self.job_type == 'Dataset Creation': 110 | self.data_dict = self.check_and_upload_dataset(opt) 111 | else: 112 | prefix = colorstr('wandb: ') 113 | print(f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)") 114 | 115 | def check_and_upload_dataset(self, opt): 116 | assert wandb, 'Install wandb to upload dataset' 117 | check_dataset(self.data_dict) 118 | config_path = self.log_dataset_artifact(opt.data, 119 | opt.single_cls, 120 | 'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem) 121 | print("Created dataset config file ", config_path) 122 | with open(config_path) as f: 123 | wandb_data_dict = yaml.load(f, Loader=yaml.SafeLoader) 124 | return wandb_data_dict 125 | 126 | def setup_training(self, opt, data_dict): 127 | self.log_dict, self.current_epoch, self.log_imgs = {}, 0, 16 # Logging Constants 128 | self.bbox_interval = opt.bbox_interval 129 | if isinstance(opt.resume, str): 130 | modeldir, _ = self.download_model_artifact(opt) 131 | if modeldir: 132 | self.weights = Path(modeldir) / "last.pt" 133 | config = self.wandb_run.config 134 | opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str( 135 | self.weights), config.save_period, config.total_batch_size, config.bbox_interval, config.epochs, \ 136 | config.opt['hyp'] 137 | data_dict = dict(self.wandb_run.config.data_dict) # eliminates the need for config file to resume 138 | if 'val_artifact' not in self.__dict__: # If --upload_dataset is set, use the existing artifact, don't download 139 | self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'), 140 | opt.artifact_alias) 141 | self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'), 142 | opt.artifact_alias) 143 | self.result_artifact, self.result_table, self.val_table, self.weights = None, None, None, None 144 | if self.train_artifact_path is not None: 145 | train_path = Path(self.train_artifact_path) / 'data/images/' 146 | data_dict['train'] = str(train_path) 147 | if self.val_artifact_path is not None: 148 | val_path = Path(self.val_artifact_path) / 'data/images/' 149 | data_dict['val'] = str(val_path) 150 | self.val_table = self.val_artifact.get("val") 151 | self.map_val_table_path() 152 | if self.val_artifact is not None: 153 | self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") 154 | self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"]) 155 | if opt.bbox_interval == -1: 156 | self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1 157 | return data_dict 158 | 159 | def download_dataset_artifact(self, path, alias): 160 | if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX): 161 | dataset_artifact = wandb.use_artifact(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias) 162 | assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'" 163 | datadir = dataset_artifact.download() 164 | return datadir, dataset_artifact 165 | return None, None 166 | 167 | def download_model_artifact(self, opt): 168 | if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): 169 | model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest") 170 | assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist' 171 | modeldir = model_artifact.download() 172 | epochs_trained = model_artifact.metadata.get('epochs_trained') 173 | total_epochs = model_artifact.metadata.get('total_epochs') 174 | assert epochs_trained < total_epochs, 'training to %g epochs is finished, nothing to resume.' % ( 175 | total_epochs) 176 | return modeldir, model_artifact 177 | return None, None 178 | 179 | def log_model(self, path, opt, epoch, fitness_score, best_model=False): 180 | model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={ 181 | 'original_url': str(path), 182 | 'epochs_trained': epoch + 1, 183 | 'save period': opt.save_period, 184 | 'project': opt.project, 185 | 'total_epochs': opt.epochs, 186 | 'fitness_score': fitness_score 187 | }) 188 | model_artifact.add_file(str(path / 'last.pt'), name='last.pt') 189 | wandb.log_artifact(model_artifact, 190 | aliases=['latest', 'epoch ' + str(self.current_epoch), 'best' if best_model else '']) 191 | print("Saving model artifact on epoch ", epoch + 1) 192 | 193 | def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False): 194 | with open(data_file) as f: 195 | data = yaml.load(f, Loader=yaml.SafeLoader) # data dict 196 | nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names']) 197 | names = {k: v for k, v in enumerate(names)} # to index dictionary 198 | self.train_artifact = self.create_dataset_table(LoadImagesAndLabels( 199 | data['train']), names, name='train') if data.get('train') else None 200 | self.val_artifact = self.create_dataset_table(LoadImagesAndLabels( 201 | data['val']), names, name='val') if data.get('val') else None 202 | if data.get('train'): 203 | data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train') 204 | if data.get('val'): 205 | data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val') 206 | path = data_file if overwrite_config else '_wandb.'.join(data_file.rsplit('.', 1)) # updated data.yaml path 207 | data.pop('download', None) 208 | with open(path, 'w') as f: 209 | yaml.dump(data, f) 210 | 211 | if self.job_type == 'Training': # builds correct artifact pipeline graph 212 | self.wandb_run.use_artifact(self.val_artifact) 213 | self.wandb_run.use_artifact(self.train_artifact) 214 | self.val_artifact.wait() 215 | self.val_table = self.val_artifact.get('val') 216 | self.map_val_table_path() 217 | else: 218 | self.wandb_run.log_artifact(self.train_artifact) 219 | self.wandb_run.log_artifact(self.val_artifact) 220 | return path 221 | 222 | def map_val_table_path(self): 223 | self.val_table_map = {} 224 | print("Mapping dataset") 225 | for i, data in enumerate(tqdm(self.val_table.data)): 226 | self.val_table_map[data[3]] = data[0] 227 | 228 | def create_dataset_table(self, dataset, class_to_id, name='dataset'): 229 | # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging 230 | artifact = wandb.Artifact(name=name, type="dataset") 231 | img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None 232 | img_files = tqdm(dataset.img_files) if not img_files else img_files 233 | for img_file in img_files: 234 | if Path(img_file).is_dir(): 235 | artifact.add_dir(img_file, name='data/images') 236 | labels_path = 'labels'.join(dataset.path.rsplit('images', 1)) 237 | artifact.add_dir(labels_path, name='data/labels') 238 | else: 239 | artifact.add_file(img_file, name='data/images/' + Path(img_file).name) 240 | label_file = Path(img2label_paths([img_file])[0]) 241 | artifact.add_file(str(label_file), 242 | name='data/labels/' + label_file.name) if label_file.exists() else None 243 | table = wandb.Table(columns=["id", "train_image", "Classes", "name"]) 244 | class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()]) 245 | for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)): 246 | height, width = shapes[0] 247 | labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view(-1, 4))) * torch.Tensor([width, height, width, height]) 248 | box_data, img_classes = [], {} 249 | for cls, *xyxy in labels[:, 1:].tolist(): 250 | cls = int(cls) 251 | box_data.append({"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, 252 | "class_id": cls, 253 | "box_caption": "%s" % (class_to_id[cls]), 254 | "scores": {"acc": 1}, 255 | "domain": "pixel"}) 256 | img_classes[cls] = class_to_id[cls] 257 | boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space 258 | table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), json.dumps(img_classes), 259 | Path(paths).name) 260 | artifact.add(table, name) 261 | return artifact 262 | 263 | def log_training_progress(self, predn, path, names): 264 | if self.val_table and self.result_table: 265 | class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()]) 266 | box_data = [] 267 | total_conf = 0 268 | for *xyxy, conf, cls in predn.tolist(): 269 | if conf >= 0.25: 270 | box_data.append( 271 | {"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, 272 | "class_id": int(cls), 273 | "box_caption": "%s %.3f" % (names[cls], conf), 274 | "scores": {"class_score": conf}, 275 | "domain": "pixel"}) 276 | total_conf = total_conf + conf 277 | boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space 278 | id = self.val_table_map[Path(path).name] 279 | self.result_table.add_data(self.current_epoch, 280 | id, 281 | wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set), 282 | total_conf / max(1, len(box_data)) 283 | ) 284 | 285 | def log(self, log_dict): 286 | if self.wandb_run: 287 | for key, value in log_dict.items(): 288 | self.log_dict[key] = value 289 | 290 | def end_epoch(self, best_result=False): 291 | if self.wandb_run: 292 | wandb.log(self.log_dict) 293 | self.log_dict = {} 294 | if self.result_artifact: 295 | train_results = wandb.JoinedTable(self.val_table, self.result_table, "id") 296 | self.result_artifact.add(train_results, 'result') 297 | wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch), 298 | ('best' if best_result else '')]) 299 | self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"]) 300 | self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") 301 | 302 | def finish_run(self): 303 | if self.wandb_run: 304 | if self.log_dict: 305 | wandb.log(self.log_dict) 306 | wandb.run.finish() 307 | --------------------------------------------------------------------------------