├── README.md ├── anchors.py ├── assets ├── 1.jpg ├── 2.jpg └── 3.jpg ├── dataloader.py ├── detect.py ├── down.py ├── eval_widerface.py ├── img_tester.py ├── losses.py ├── magic_convert.py ├── mnas.py ├── mobile.py ├── mobile_testing.py ├── model.py ├── network.torch ├── out └── stage_5_68_full_model_epoch_121.pt ├── requirements.txt ├── test_argu.py ├── torchvision_model.py ├── train.py ├── utils.py └── video_detect.py /README.md: -------------------------------------------------------------------------------- 1 | # Retinaface-Pytorch-version 2 | ### It's not the best version of my model due to confidentiality 3 | Thanks to Alvin Yang (https://github.com/supernotman/RetinaFace_Pytorch) 4 | 5 | This is the branch for 68 landmarks detection, the pre-trained model is in ./out 6 | 7 | Working on 96 landmarks detection( refer to the other branch) 8 | 9 |

10 |

11 |

12 | The model also predicted the occulded part of the landmarks, can hide them if don't want them to show up. 13 | 14 | 15 | 16 | 17 | Based on RetinaFace 18 | ### current model 19 | mobileNet V1+FPN+context module+ regressor 1.6MB 20 | CPU～10FPS GPU 50FPU 21 | 22 | 23 | 24 | ### Train：（ Please refer to dataloader.py to change the file location） 25 | python3 train.py -train 26 | This model use LS3D-W dataset，or change your dataset to the format of demo.pt/ demo.jpg（68*2 tensor） 27 | 28 | 29 | ### Use local camera ： 30 | python3 video_detect.py （ need to delete all 'cuda()', and run locally with a CPU） 31 | 32 | 33 | ### Eval Model： 34 | python3 train.py -train False 35 | 36 | ## Todo: 37 | - [ ] Use SBR and BFLD to improve performance 38 | 39 | If you have train a model with this code, welcome to discuss with me at elvishelvis6@gmail.com 40 | -------------------------------------------------------------------------------- /anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class Anchors(nn.Module): 7 | def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None): 8 | super(Anchors, self).__init__() 9 | 10 | if pyramid_levels is None: 11 | # self.pyramid_levels = [2, 3, 4, 5, 6] 12 | self.pyramid_levels = [3, 4, 5] 13 | if strides is None: 14 | self.strides = [2 ** x for x in self.pyramid_levels] 15 | if sizes is None: 16 | # self.sizes = [2 ** (x + 2) for x in self.pyramid_levels] 17 | self.sizes = [2 ** 4.0, 2 ** 6.0, 2 ** 8.0] 18 | if ratios is None: 19 | self.ratios = np.array([1, 1, 1]) 20 | if scales is None: 21 | # self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) 22 | self.scales = np.array([2 ** 0, 2 ** (1/2.0) , 2 ** 1.0 ]) 23 | 24 | def forward(self, image): 25 | 26 | image_shape = image.shape[2:] 27 | image_shape = np.array(image_shape) 28 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels] 29 | 30 | # compute anchors over all pyramid levels 31 | all_anchors = np.zeros((0, 4)).astype(np.float32) 32 | 33 | for idx, p in enumerate(self.pyramid_levels): 34 | anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales) 35 | shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors) 36 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0) 37 | 38 | all_anchors = np.expand_dims(all_anchors, axis=0) 39 | 40 | return torch.from_numpy(all_anchors.astype(np.float32)).cuda() 41 | 42 | def generate_anchors(base_size=16, ratios=None, scales=None): 43 | """ 44 | Generate anchor (reference) windows by enumerating aspect ratios X 45 | scales w.r.t. a reference window. 46 | """ 47 | 48 | if ratios is None: 49 | ratios = np.array([1, 1, 1]) 50 | 51 | if scales is None: 52 | scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) 53 | 54 | num_anchors = len(scales) 55 | 56 | # initialize output anchors 57 | anchors = np.zeros((num_anchors, 4)) 58 | 59 | # scale base_size 60 | anchors[:, 2:] = base_size * np.tile(scales, (2, 1)).T 61 | 62 | # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2) 63 | anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T 64 | anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T 65 | 66 | return anchors 67 | 68 | def shift(shape, stride, anchors): 69 | shift_x = (np.arange(0, shape[1]) + 0.5) * stride 70 | shift_y = (np.arange(0, shape[0]) + 0.5) * stride 71 | 72 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 73 | 74 | shifts = np.vstack(( 75 | shift_x.ravel(), shift_y.ravel(), 76 | shift_x.ravel(), shift_y.ravel() 77 | )).transpose() 78 | 79 | # add A anchors (1, A, 4) to 80 | # cell K shifts (K, 1, 4) to get 81 | # shift anchors (K, A, 4) 82 | # reshape to (K * A, 4) shifted anchors 83 | A = anchors.shape[0] 84 | K = shifts.shape[0] 85 | all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) 86 | all_anchors = all_anchors.reshape((K * A, 4)) 87 | 88 | return all_anchors -------------------------------------------------------------------------------- /assets/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/assets/1.jpg -------------------------------------------------------------------------------- /assets/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/assets/2.jpg -------------------------------------------------------------------------------- /assets/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/assets/3.jpg -------------------------------------------------------------------------------- /dataloader.py: -------------------------------------------------------------------------------- 1 | import torchvision.transforms as transforms 2 | from torch.utils.data.sampler import Sampler 3 | from torch.utils.data import Dataset 4 | import torch.nn.functional as F 5 | from skimage.util import crop 6 | import skimage.transform 7 | from PIL import Image 8 | import skimage.color 9 | import torch.nn as nn 10 | import numpy as np 11 | import skimage.io 12 | import skimage 13 | import random 14 | import torch 15 | import math 16 | import os 17 | import cv2 18 | from scipy import misc 19 | 20 | class TrainDataset(Dataset): 21 | def __init__(self,txt_path=None,transform=None,flip=False): 22 | self.words = [] 23 | self.transform = transform 24 | self.flip = flip 25 | self.batch_count = 0 26 | self.img_size = 640 27 | 28 | def __len__(self): 29 | # return len(self.name_list) 30 | # return 10 31 | # return 22995 32 | return 1000 33 | # return 10 34 | 35 | def __getitem__(self,index): 36 | img = cv2.imread("/versa/elvishelvis/landmarks56/new_dataset/{}.jpg".format(index)) 37 | try: 38 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 39 | except: 40 | import random 41 | rad=random.randint(1,22995) 42 | return self.__getitem__(rad) 43 | 44 | #img = img.astype(np.float32)/255.0 45 | 46 | annotations = np.zeros((0, 4+136)) 47 | annotation = np.zeros((1,140)) 48 | landmark=[] 49 | minx=float('inf') 50 | miny=float('inf') 51 | maxx=0 52 | maxy=0 53 | path="/versa/elvishelvis/landmarks56/new_dataset/{}.pth".format(index) 54 | data=np.array(torch.load(path)) 55 | for da in data: 56 | if(da[0]maxx): 59 | maxx=da[0] 60 | if(da[1]maxy): 63 | maxy=da[1] 64 | landmark.append(da[0]) 65 | landmark.append(da[1]) 66 | # bbox 67 | annotation[0,0] = minx -int((maxx-minx)/10) # x1 68 | annotation[0,1] = miny -int((maxy-miny)/10) # y1 69 | annotation[0,2] = maxx +int((maxx-minx)/10) 70 | annotation[0,3] = maxy +int((maxy-miny)/10) 71 | 72 | for i in range(4,140): 73 | annotation[0,i] = landmark[i-4] 74 | annotations = np.append(annotations,annotation,axis=0) 75 | sample = {'img':torch.tensor(img), 'annot':torch.tensor(annotations)} 76 | if self.transform is not None: 77 | sample = self.transform(sample) 78 | return sample 79 | 80 | 81 | 82 | def collater(data): 83 | batch_size = len(data) 84 | 85 | imgs = [s['img'] for s in data] 86 | annots = [s['annot'] for s in data] 87 | 88 | # batch images 89 | height = imgs[0].shape[0] 90 | width = imgs[0].shape[1] 91 | assert height==width ,'Input width must eqs height' 92 | 93 | input_size = width 94 | batched_imgs = torch.zeros(batch_size, height, width, 3) 95 | 96 | for i in range(batch_size): 97 | img = imgs[i] 98 | batched_imgs[i,:] = img 99 | 100 | # batch annotations 101 | max_num_annots = max(annot.shape[0] for annot in annots) 102 | 103 | if max_num_annots > 0: 104 | if annots[0].shape[1] > 4: 105 | annot_padded = torch.ones((len(annots), max_num_annots, 140)) * -1 106 | for idx, annot in enumerate(annots): 107 | if annot.shape[0] > 0: 108 | annot_padded[idx, :annot.shape[0], :] = annot 109 | else: 110 | annot_padded = torch.ones((len(annots), max_num_annots, 4)) * -1 111 | #print('annot~~~~~~~~~~~~~~~~~~,',annots) 112 | for idx, annot in enumerate(annots): 113 | if annot.shape[0] > 0: 114 | annot_padded[idx, :annot.shape[0], :] = annot 115 | else: 116 | if annots[0].shape[1] > 4: 117 | annot_padded = torch.ones((len(annots), 1, 140)) * -1 118 | else: 119 | annot_padded = torch.ones((len(annots), 1, 4)) * -1 120 | 121 | batched_imgs = batched_imgs.permute(0, 3, 1, 2) 122 | 123 | return {'img': batched_imgs, 'annot': annot_padded} 124 | 125 | 126 | class RandomFlip(object): 127 | def __call__(self, sample, input_size=320, flip_x=0.4): 128 | aaa=np.random.rand() 129 | if aaa < flip_x: 130 | image, annots = sample['img'], sample['annot'] 131 | c,w,h=image.shape 132 | # flip image 133 | image = torch.flip(image,[1]) 134 | 135 | image = image.numpy() 136 | annots = annots.numpy() 137 | # relocate bboxes 138 | for i in range(0,140): 139 | if i%2==0: 140 | annots[0, i] = w - annots[0, i] 141 | annots[0, 0],annots[0, 2]=annots[0, 2],annots[0, 0] 142 | for k in range(4,20): 143 | if(k%2==0): 144 | annots[0, k],annots[0, (40-k)]=annots[0, (40-k)],annots[0, k] 145 | else: 146 | annots[0, k],annots[0, (42-k)]=annots[0, (42-k)],annots[0, k] 147 | for b in range(38,48): 148 | if(b%2==0): 149 | annots[0, b],annots[0, (94-b)]=annots[0, (94-b)],annots[0, b] 150 | else: 151 | annots[0, b],annots[0, (96-b)]=annots[0, (96-b)],annots[0, b] 152 | for a in range(76,84): 153 | if(a%2==0): 154 | annots[0, a],annots[0, (170-a)]=annots[0, (170-a)],annots[0, a] 155 | else: 156 | annots[0, a],annots[0, (172-a)]=annots[0, (172-a)],annots[0, a] 157 | 158 | annots[0, 86],annots[0, 96]=annots[0, 96],annots[0, 86] 159 | annots[0, 84],annots[0, 98]=annots[0, 98],annots[0, 84] 160 | 161 | annots[0, 66],annots[0, 74]=annots[0, 74],annots[0, 66] 162 | annots[0, 67],annots[0, 75]=annots[0, 75],annots[0, 67] 163 | annots[0, 68],annots[0, 72]=annots[0, 72],annots[0, 68] 164 | annots[0, 69],annots[0, 73]=annots[0, 73],annots[0, 69] 165 | 166 | annots[0, 100],annots[0, 112]=annots[0, 112],annots[0, 100] 167 | annots[0, 102],annots[0, 110]=annots[0, 110],annots[0, 102] 168 | annots[0, 104],annots[0, 108]=annots[0, 108],annots[0, 104] 169 | annots[0, 126],annots[0, 130]=annots[0, 130],annots[0, 126] 170 | annots[0, 138],annots[0, 134]=annots[0, 134],annots[0, 138] 171 | annots[0, 116],annots[0, 120]=annots[0, 120],annots[0, 116] 172 | annots[0, 114],annots[0, 122]=annots[0, 122],annots[0, 114] 173 | annots[0, 124],annots[0, 132]=annots[0, 132],annots[0, 124] 174 | 175 | 176 | 177 | image = torch.from_numpy(image) 178 | annots = torch.from_numpy(annots) 179 | 180 | sample = {'img': image, 'annot': annots} 181 | 182 | return sample 183 | 184 | 185 | class Rotate(object): 186 | def __init__(self,angle=[-45,45],p=0.3): 187 | self.angle=angle 188 | self.p=p 189 | def __call__(self,sample): 190 | if(np.random.rand()maxx): 222 | maxx=box[i] 223 | if(box[i+1]maxy): 226 | maxy=box[i+1] 227 | 228 | box[0] = minx -int((maxx-minx)/10) # x1 229 | box[1] = miny -int((maxy-miny)/10) # y1 230 | box[2] = maxx +int((maxx-minx)/10) 231 | box[3] = maxy +int((maxy-miny)/10) 232 | 233 | return {'img': torch.tensor(img), 'annot': torch.tensor(box[np.newaxis,:])} 234 | return sample 235 | 236 | class RandomErasing(object): 237 | def __init__(self,p=0.3): 238 | self.p=p 239 | def __call__(self, sample): 240 | if(np.random.rand() 4 : 297 | annots = annots * scale 298 | else : 299 | annots[:,:4] = annots[:,:4] * scale 300 | 301 | return {'img': torch.tensor(resized_image), 'annot': annots} 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | class PadToSquare(object): 310 | def __call__(self, sample, input_size=640): 311 | image, annots = sample['img'], sample['annot'] 312 | rows, cols, _ = image.shape 313 | dim_diff = np.abs(rows - cols) 314 | 315 | # relocate bbox annotations 316 | if rows == input_size: 317 | diff = input_size - cols 318 | annots[:,0] = annots[:,0] + diff/2 319 | annots[:,2] = annots[:,2] + diff/2 320 | elif cols == input_size: 321 | diff = input_size - rows 322 | annots[:,1] = annots[:,1] + diff/2 323 | annots[:,3] = annots[:,3] + diff/2 324 | if annots.shape[1] > 4 : 325 | ldm_mask = annots[:,4] > 0 326 | if rows == input_size: 327 | diff = input_size - cols 328 | annots[ldm_mask,4::2] = annots[ldm_mask,4::2] + diff/2 329 | elif cols == input_size: 330 | diff = input_size - rows 331 | annots[ldm_mask,5::2] = annots[ldm_mask,5::2] + diff/2 332 | 333 | # pad image to square 334 | img = image 335 | img = img.permute(2,0,1) 336 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 337 | pad = (0, 0, pad1, pad2) if rows <= cols else (pad1, pad2, 0, 0) 338 | 339 | padded_img = F.pad(img, pad, "constant", value=0) 340 | 341 | # # pad to input size 342 | pad_=input_size-padded_img.shape[1] 343 | num1= random.randint(0,pad_) 344 | num2= random.randint(0,pad_) 345 | 346 | pading = (num1, pad_-num1,num2,pad_-num2) 347 | padded_img = F.pad(padded_img, pading, "constant", value=0) 348 | for i in range(0,140): 349 | if i%2==0: 350 | annots[0,i]+=num1 351 | else: 352 | annots[0,i]+=num2 353 | padded_img = padded_img.permute(1,2,0) 354 | 355 | return {'img': padded_img, 'annot': annots} 356 | 357 | 358 | class ValDataset(Dataset): 359 | def __init__(self,txt_path=None,transform=None,flip=False): 360 | self.words = [] 361 | self.transform = transform 362 | self.flip = flip 363 | self.batch_count = 0 364 | self.img_size = 640 365 | 366 | def __len__(self): 367 | # return len(self.name_list) 368 | return 299 369 | # return 50 370 | # return 10 371 | 372 | def __getitem__(self,index): 373 | index+=1 374 | img = cv2.imread("/versa/elvishelvis/landmarks56/300w/{}.jpg".format(index)) 375 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 376 | #img = img.astype(np.float32)/255.0 377 | 378 | annotations = np.zeros((0, 4+136)) 379 | annotation = np.zeros((1,140)) 380 | landmark=[] 381 | minx=float('inf') 382 | miny=float('inf') 383 | maxx=0 384 | maxy=0 385 | label=[] 386 | with open("/versa/elvishelvis/landmarks56/300w/{}.pts".format(index),'r') as f: 387 | f.readline() 388 | f.readline() 389 | f.readline() 390 | while(True): 391 | try: 392 | item=f.readline() 393 | label.append([float(item[0:7]),float(item[8:15])]) 394 | item[2] 395 | except: 396 | break 397 | # label=torch.tensor(label) 398 | for da in label: 399 | if(da[0]maxx): 402 | maxx=da[0] 403 | if(da[1]maxy): 406 | maxy=da[1] 407 | landmark.append(da[0]) 408 | landmark.append(da[1]) 409 | # bbox 410 | annotation[0,0] = minx -int((maxx-minx)/5) # x1 411 | annotation[0,1] = miny -int((maxy-miny)/5) # y1 412 | annotation[0,2] = maxx +int((maxx-minx)/5) 413 | annotation[0,3] = maxy +int((maxy-miny)/5) 414 | if(len(landmark)!=136): 415 | return self.__getitem__(index+1) 416 | for i in range(4,140): 417 | annotation[0,i] = landmark[i-4] 418 | annotations = np.append(annotations,annotation,axis=0) 419 | sample = {'img':torch.tensor(img), 'annot':torch.tensor(annotations)} 420 | if self.transform is not None: 421 | sample = self.transform(sample) 422 | return sample 423 | 424 | 425 | 426 | 427 | class ValDataset_CeleB(Dataset): 428 | def __init__(self,txt_path=None,transform=None,flip=False): 429 | self.words = [] 430 | self.transform = transform 431 | self.flip = flip 432 | self.batch_count = 0 433 | self.img_size = 640 434 | self.name_list=[] 435 | self.bbox = [] 436 | self.landmarks=[] 437 | path1="/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Anno/list_bbox_celeba.txt" 438 | # for the bbox 439 | f = open(path1,'r') 440 | f.readline() 441 | f.readline() 442 | lines = f.readlines() 443 | for line in lines: 444 | self.name_list.append(line[0:10]) 445 | count=0 446 | begin=11 447 | temp=[] 448 | is_first=False 449 | while (count<4): 450 | while(line[begin]==" "): 451 | begin+=1 452 | cur=begin 453 | while(line[cur]!=" " and line[cur]!='\n'): 454 | cur+=1 455 | temp.append(line[begin:cur]) 456 | is_first=True 457 | begin=cur 458 | count+=1 459 | self.bbox.append(temp) 460 | 461 | path2="/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Anno/list_landmarks_celeba.txt" 462 | k = open(path2,'r') 463 | k.readline() 464 | k.readline() 465 | lines = k.readlines() 466 | for line in lines: 467 | count=0 468 | begin=11 469 | temp=[] 470 | is_first=False 471 | while (count<10): 472 | while(line[begin]==" "): 473 | begin+=1 474 | cur=begin 475 | while(line[cur]!=" " and line[cur]!='\n'): 476 | cur+=1 477 | temp.append(line[begin:cur]) 478 | is_first=True 479 | begin=cur 480 | count+=1 481 | self.landmarks.append(temp) 482 | 483 | def __len__(self): 484 | # return len(self.name_list) 485 | return 20 486 | # return 30 487 | 488 | def __getitem__(self,index): 489 | img = skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/\ 490 | CelebA/Img/img_celeba.7z/img_celeba/"+str(self.name_list[int(index)])) 491 | #img = img.astype(np.float32)/255.0 492 | 493 | box_ = self.bbox[int(index)] 494 | land_=self.landmarks[int(index)] 495 | annotations = np.zeros((0, 14)) 496 | if len(box_) == 0: 497 | return annotations 498 | annotation = np.zeros((1,14)) 499 | # bbox 500 | annotation[0,0] = box_[0] # x1 501 | annotation[0,1] = box_[1] # y1 502 | annotation[0,2] = str(int(box_[0]) + int(box_[2])) # x2 503 | annotation[0,3] = str(int(box_[1]) + int(box_[3])) # y2 504 | 505 | # landmarks 506 | annotation[0,4] = land_[0] # l0_x 507 | annotation[0,5] = land_[1] # l0_y 508 | annotation[0,6] = land_[2] # l1_x 509 | annotation[0,7] = land_[3] # l1_y 510 | annotation[0,8] = land_[4] # l2_x 511 | annotation[0,9] = land_[5] # l2_y 512 | annotation[0,10] = land_[6] # l3_x 513 | annotation[0,11] = land_[7] # l3_y 514 | annotation[0,12] = land_[8] # l4_x 515 | annotation[0,13] = land_[9] # l4_y 516 | 517 | annotations = np.append(annotations,annotation,axis=0) 518 | sample = {'img':img, 'annot':torch.tensor(annotations)} 519 | if self.transform is not None: 520 | sample = self.transform(sample) 521 | return sample 522 | 523 | ''' 524 | class ValDataset(Dataset): 525 | def __init__(self,txt_path,transform=None,flip=False): 526 | self.imgs_path = [] 527 | self.words = [] 528 | self.transform = transform 529 | self.flip = flip 530 | self.batch_count = 0 531 | self.img_size = 320 532 | 533 | f = open(txt_path,'r') 534 | lines = f.readlines() 535 | isFirst = True 536 | bbox = [] 537 | for line in lines: 538 | line = line.rstrip() 539 | if line.startswith('#'): 540 | if isFirst is True: 541 | isFirst = False 542 | else: 543 | labels_copy = labels.copy() 544 | self.words.append(labels_copy) 545 | labels.clear() 546 | path = line[2:] 547 | path = txt_path.replace('label.txt','images/') + path 548 | self.imgs_path.append(path) 549 | else: 550 | line = line.split(' ') 551 | label = [float(x) for x in line] 552 | labels.append(label) 553 | 554 | self.words.append(labels) 555 | 556 | def __getitem__(self,index): 557 | img = skimage.io.imread(self.imgs_path[index]) 558 | 559 | labels = self.words[index] 560 | annotations = np.zeros((0, 4)) 561 | if len(labels) == 0: 562 | return annotations 563 | for idx, label in enumerate(labels): 564 | annotation = np.zeros((1,4)) 565 | # bbox 566 | annotation[0,0] = label[0] # x1 567 | annotation[0,1] = label[1] # y1 568 | annotation[0,2] = label[2] # x2 569 | annotation[0,3] = label[3] # y2 570 | 571 | annotations = np.append(annotations,annotation,axis=0) 572 | 573 | sample = {'img':img, 'annot':annotations} 574 | if self.transform is not None: 575 | sample = self.transform(sample) 576 | 577 | return sample 578 | 579 | def __len__(self): 580 | return len(self.imgs_path) 581 | 582 | def _load_annotations(self,index): 583 | labels = self.words[index] 584 | annotations = np.zeros((0,4)) 585 | 586 | if len(labels) == 0: 587 | return annotations 588 | 589 | for idx, label in enumerate(labels): 590 | annotation = np.zeros((1,4)) 591 | annotation[0,0] = label[0] # x1 592 | annotation[0,1] = label[1] # y1 593 | annotation[0,2] = label[0] + label[2] # x2 594 | annotation[0,3] = label[1] + label[3] # y2 595 | 596 | annotations = np.append(annotations, annotation, axis=0) 597 | 598 | return annotations 599 | ''' 600 | 601 | 602 | 603 | ''' 604 | class RandomCroper(object): 605 | def __call__(self, sample, input_size=640): 606 | image, annots = sample['img'], sample['annot'] 607 | rows, cols, _ = image.shape 608 | 609 | smallest_side = min(rows, cols) 610 | longest_side = max(rows,cols) 611 | scale = random.uniform(0.3,1) 612 | short_size = int(smallest_side * scale) 613 | start_short_upscale = smallest_side - short_size 614 | start_long_upscale = longest_side - short_size 615 | crop_short = random.randint(0,start_short_upscale) 616 | crop_long = random.randint(0,start_long_upscale) 617 | crop_y = 0 618 | crop_x = 0 619 | if smallest_side == rows: 620 | crop_y = crop_short 621 | crop_x = crop_long 622 | else: 623 | crop_x = crop_short 624 | crop_y = crop_long 625 | # crop 626 | cropped_img = image[crop_y:crop_y + short_size,crop_x:crop_x + short_size] 627 | # resize 628 | new_image = skimage.transform.resize(cropped_img, (input_size, input_size)) 629 | 630 | # why normalized from 255 to 1 after skimage.transform????????? 631 | new_image = new_image * 255 632 | 633 | # relocate bbox 634 | annots[:,0] -= crop_x 635 | annots[:,1] -= crop_y 636 | annots[:,2] -= crop_x 637 | annots[:,3] -= crop_y 638 | 639 | # relocate landmarks56 640 | if annots.shape[1] > 4: 641 | # l_mask = annots[:,4]!=-1 642 | l_mask = annots[:,4] > 0 643 | annots[l_mask,4] -= crop_x 644 | annots[l_mask,5] -= crop_y 645 | annots[l_mask,6] -= crop_x 646 | annots[l_mask,7] -= crop_y 647 | annots[l_mask,8] -= crop_x 648 | annots[l_mask,9] -= crop_y 649 | annots[l_mask,10] -= crop_x 650 | annots[l_mask,11] -= crop_y 651 | annots[l_mask,12] -= crop_x 652 | annots[l_mask,13] -= crop_y 653 | 654 | # scale annotations 655 | resize_scale = input_size/short_size 656 | annots[:,:4] = annots[:,:4] * resize_scale 657 | if annots.shape[1] > 4: 658 | annots[l_mask,4:] = annots[l_mask,4:] * resize_scale 659 | 660 | # remove faces center not in image afer crop 661 | center_x = (annots[:,0] + annots[:,2]) / 2 662 | center_y = (annots[:,1] + annots[:,3]) / 2 663 | 664 | mask_x = (center_x[:,]>0)&(center_x[:,]0)&(center_y[:,] 4: 674 | annots[l_mask,4:] = annots[l_mask,4:].clip(0, input_size) 675 | 676 | annots = annots[mask] 677 | 678 | return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots)} 679 | ''' -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | import skimage 7 | from skimage import io 8 | from PIL import Image 9 | import cv2 10 | import torchvision 11 | import eval_widerface 12 | import torchvision_model 13 | import model 14 | import os 15 | 16 | def pad_to_square(img, pad_value): 17 | _, h, w = img.shape 18 | dim_diff = np.abs(h - w) 19 | # (upper / left) padding and (lower / right) padding 20 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 21 | # Determine padding 22 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) 23 | # Add padding 24 | img = F.pad(img, pad, "constant", value=pad_value) 25 | 26 | return img, pad 27 | 28 | def resize(image, size): 29 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) 30 | return image 31 | 32 | def get_args(): 33 | parser = argparse.ArgumentParser(description="Detect program for retinaface.") 34 | parser.add_argument('--image_path', type=str, default='test.jpg', help='Path for image to detect') 35 | parser.add_argument('--model_path', type=str, help='Path for model') 36 | parser.add_argument('--save_path', type=str, default='./out', help='Path for result image') 37 | parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) 38 | parser.add_argument('--scale', type=float, default=1.0, help='Image resize scale', ) 39 | args = parser.parse_args() 40 | 41 | return args 42 | 43 | def main(): 44 | args = get_args() 45 | # Create torchvision model 46 | return_layers = {'layer2':1,'layer3':2,'layer4':3} 47 | RetinaFace = torchvision_model.create_retinaface(return_layers) 48 | 49 | # Load trained model 50 | retina_dict = RetinaFace.state_dict() 51 | pre_state_dict = torch.load(args.model_path) 52 | pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict} 53 | RetinaFace.load_state_dict(pretrained_dict) 54 | 55 | RetinaFace = RetinaFace.cuda() 56 | RetinaFace.eval() 57 | 58 | # Read image 59 | img = skimage.io.imread(args.image_path) 60 | img = torch.from_numpy(img) 61 | img = img.permute(2,0,1) 62 | 63 | if not args.scale == 1.0: 64 | size1 = int(img.shape[1]/args.scale) 65 | size2 = int(img.shape[2]/args.scale) 66 | img = resize(img.float(),(size1,size2)) 67 | 68 | input_img = img.unsqueeze(0).float().cuda() 69 | picked_boxes, picked_landmarks = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) 70 | 71 | # np_img = resized_img.cpu().permute(1,2,0).numpy() 72 | np_img = img.cpu().permute(1,2,0).numpy() 73 | np_img.astype(int) 74 | img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB) 75 | 76 | for j, boxes in enumerate(picked_boxes): 77 | if boxes is not None: 78 | for box,landmark in zip(boxes,picked_landmarks[j]): 79 | cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2) 80 | cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) 81 | cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) 82 | cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) 83 | cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) 84 | cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) 85 | 86 | image_name = args.image_path.split('/')[-1] 87 | save_path = os.path.join(args.save_path,image_name) 88 | cv2.imwrite(save_path, img) 89 | cv2.imshow('RetinaFace-Pytorch',img) 90 | cv2.waitKey() 91 | 92 | if __name__=='__main__': 93 | main() 94 | -------------------------------------------------------------------------------- /down.py: -------------------------------------------------------------------------------- 1 | from requests import get # to make GET request 2 | 3 | 4 | def download(url, file_name): 5 | # open in binary mode 6 | with open(file_name, "wb") as file: 7 | print("runinng!!!!!") 8 | # get request 9 | response = get(url) 10 | # write to file 11 | print("get {}".format(file_name)) 12 | 13 | file.write(response.content) 14 | 15 | download("https://www.adrianbulat.com/downloads/FaceAlignment/LS3D-W-balanced-20-03-2017.zip ",'sample.zip') 16 | download('https://uniofnottm-my.sharepoint.com/personal/adrian_bulat_nottingham_ac_uk/_layouts/15/download.aspx?SourceUrl=%2Fpersonal%2Fadrian%5Fbulat%5Fnottingham%5Fac%5Fuk%2FDocuments%2FUoN%20Box%20Migration%2FPublic%2FLS3D%2DW%2FLS3D%2DW%2Etar%2Egz','all.zip') 17 | 18 | 19 | -------------------------------------------------------------------------------- /eval_widerface.py: -------------------------------------------------------------------------------- 1 | import utils 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import os 6 | from tqdm import tqdm 7 | import torchvision.ops as ops 8 | import cv2 9 | import time 10 | def get_detections(img_batch, model,score_threshold=0.5, iou_threshold=0.5): 11 | start=time.time() 12 | model.eval() 13 | model.cuda() 14 | img_batch.cuda() 15 | with torch.no_grad(): 16 | #[1,16800,2] 17 | classifications, bboxes, landmarks = model(img_batch) 18 | batch_size = classifications.shape[0] 19 | picked_boxes = [] 20 | picked_landmarks = [] 21 | 22 | for i in range(batch_size): 23 | #[16800,2] 24 | classification = torch.exp(classifications[i,:,:]) 25 | bbox = bboxes[i,:,:] 26 | landmark = landmarks[i,:,:] 27 | 28 | # choose positive and scores > score_threshold 29 | scores, argmax = torch.max(classification, dim=1) 30 | argmax_indice = argmax==0 31 | scores_indice = scores > score_threshold 32 | positive_indices = argmax_indice & scores_indice 33 | 34 | scores = scores[positive_indices] 35 | 36 | if scores.shape[0] == 0: 37 | picked_boxes.append(None) 38 | picked_landmarks.append(None) 39 | continue 40 | 41 | bbox = bbox[positive_indices] 42 | landmark = landmark[positive_indices] 43 | keep = ops.boxes.nms(bbox, scores, iou_threshold) 44 | keep_boxes = bbox[keep] 45 | keep_landmarks = landmark[keep] 46 | picked_boxes.append(keep_boxes) 47 | picked_landmarks.append(keep_landmarks) 48 | # print(time.time()-start) 49 | return picked_boxes, picked_landmarks 50 | 51 | def compute_overlap(a,b): 52 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) 53 | 54 | iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0]) 55 | ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1]) 56 | 57 | iw = np.maximum(iw, 0) 58 | ih = np.maximum(ih, 0) 59 | 60 | ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih 61 | 62 | ua = np.maximum(ua, np.finfo(float).eps) 63 | 64 | intersection = iw * ih 65 | 66 | # (N, K) ndarray of overlap between boxes and query_boxes 67 | return torch.from_numpy(intersection / ua) 68 | 69 | 70 | def evaluate(val_data,retinaFace,threshold=0.5): 71 | recall = 0. 72 | precision = 0. 73 | landmark_loss=0 74 | miss=0 75 | #for i, data in tqdm(enumerate(val_data)): 76 | resssss=[] 77 | count=0 78 | for data in tqdm(iter(val_data)): 79 | img_batch = data['img'].cuda() 80 | annots = data['annot'].cuda() 81 | 82 | 83 | picked_boxes,picked_landmarks = get_detections(img_batch,retinaFace) 84 | recall_iter = 0. 85 | precision_iter = 0. 86 | for j, boxes in enumerate(picked_boxes): 87 | annot_boxes = annots[j] 88 | annot_boxes = annot_boxes[annot_boxes[:,0]!=-1] 89 | annot_boxes=annot_boxes[:,:4] 90 | annot_land=annot_boxes[:,4:] 91 | if boxes is None and annot_boxes.shape[0] == 0: 92 | continue 93 | elif boxes is None and annot_boxes.shape[0] != 0: 94 | recall_iter += 0. 95 | precision_iter += 1. 96 | continue 97 | elif boxes is not None and annot_boxes.shape[0] == 0: 98 | recall_iter += 1. 99 | precision_iter += 0. 100 | continue 101 | overlap = ops.boxes.box_iou(annot_boxes, boxes) 102 | 103 | # compute recall 104 | max_overlap, _ = torch.max(overlap,dim=1) 105 | mask = max_overlap > threshold 106 | detected_num = mask.sum().item() 107 | recall_iter += detected_num/annot_boxes.shape[0] 108 | 109 | # compute precision 110 | max_overlap, _ = torch.max(overlap,dim=0) 111 | mask = max_overlap > threshold 112 | true_positives = mask.sum().item() 113 | precision_iter += true_positives/boxes.shape[0] 114 | if (picked_landmarks==None): 115 | continue 116 | for i, land in enumerate(picked_landmarks): 117 | 118 | annot_land = annots[i] 119 | annot_land=annot_land[:,4:] 120 | # img_batch=np.array(img_batch[0].cpu()).transpose(1,2,0) 121 | try: 122 | 123 | land=land[0,:] 124 | landmark_loss=torch.mean(torch.sqrt(torch.sum((annot_land - land)**2))) 125 | offset=abs(int(annot_land[0][4])-int(annot_land[0][68])) 126 | # landmark_loss=nn.SmoothL1Loss()(annot_land,land) 127 | landmark_loss=float(landmark_loss/offset) 128 | if landmark_loss<1: 129 | resssss.append(landmark_loss) 130 | # annot_land=np.array(annot_land[0].cpu()) 131 | # land=np.array(land.cpu()) 132 | # for kkk in range(0,136,2): 133 | # img_batch=cv2.circle(img_batch,(annot_land[kkk],annot_land[kkk+1]),radius=1,color=(0,0,255),thickness=2) 134 | # img_batch=cv2.circle(img_batch,(land[kkk],land[kkk+1]),radius=1,color=(0,255,0),thickness=2) 135 | # cv2.imwrite('{}.jpg'.format(count),img_batch) 136 | # count+=1 137 | # landmark_loss+=torch.mean((annot_land-land)**2).item() 138 | except: 139 | # print('miss') 140 | miss+=1 141 | 142 | recall += recall_iter/len(picked_boxes) 143 | precision += precision_iter/len(picked_boxes) 144 | print(sorted(resssss)) 145 | return recall/len(val_data),precision/len(val_data), np.mean(resssss) ,miss 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /img_tester.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | import skimage 7 | from skimage import io 8 | from PIL import Image 9 | import cv2 10 | import torchvision 11 | import eval_widerface 12 | import torchvision_model 13 | import model 14 | import os 15 | import skimage 16 | from dataloader import ValDataset, Resizer, PadToSquare,ValDataset_CeleB, TrainDataset 17 | from torchvision import datasets, models, transforms 18 | def pad_to_square(img, pad_value): 19 | _, h, w = img.shape 20 | dim_diff = np.abs(h - w) 21 | # (upper / left) padding and (lower / right) padding 22 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 23 | # Determine padding 24 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) 25 | # Add padding 26 | img = F.pad(img, pad, "constant", value=pad_value) 27 | 28 | return img, pad 29 | 30 | def resize(image, size): 31 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) 32 | return image 33 | 34 | def get_args(): 35 | parser = argparse.ArgumentParser(description="Detect program for retinaface.") 36 | parser.add_argument('--image_path', type=str, default='WechatIMG10.jpeg', help='Path for image to detect') 37 | parser.add_argument('--model_path', type=str, help='Path for model',default="/versa/elvishelvis/RetinaYang/out/68_full_model_epoch_10.pt") 38 | parser.add_argument('--save_path', type=str, default='./out', help='Path for result image') 39 | parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) 40 | args = parser.parse_args() 41 | 42 | return args 43 | 44 | def main(nummmmmm): 45 | args = get_args() 46 | 47 | # Create the model 48 | # if args.depth == 18: 49 | # RetinaFace = model.resnet18(num_classes=2, pretrained=True) 50 | # elif args.depth == 34: 51 | # RetinaFace = model.resnet34(num_classes=2, pretrained=True) 52 | # elif args.depth == 50: 53 | # RetinaFace = model.resnet50(num_classes=2, pretrained=True) 54 | # elif args.depth == 101: 55 | # RetinaFace = model.resnet101(num_classes=2, pretrained=True) 56 | # elif args.depth == 152: 57 | # RetinaFace = model.resnet152(num_classes=2, pretrained=True) 58 | # else: 59 | # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') 60 | 61 | # Create torchvision model 62 | 63 | return_layers = {'layer2':1,'layer3':2,'layer4':3} 64 | RetinaFace = torchvision_model.create_retinaface(return_layers) 65 | device= torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 66 | 67 | # Load trained model 68 | retina_dict = RetinaFace.state_dict() 69 | pre_state_dict = torch.load('/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_51.pt') 70 | pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict} 71 | RetinaFace.load_state_dict(pretrained_dict) 72 | RetinaFace.to(device) 73 | 74 | import time 75 | 76 | dataset_val = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(640),PadToSquare()])) 77 | # dataset_val = ValDataset('./widerface/train/label.txt') 78 | for qq in range(100,150): 79 | img=dataset_val[qq]['img'] 80 | # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 81 | # img=skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg".format(str(qq))) 82 | img = img.permute(2,0,1) 83 | resized_img = img.float() 84 | input_img = resized_img.unsqueeze(0).to(device) 85 | start=time.time() 86 | picked_boxes, picked_landmarks = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.9, iou_threshold=0.2) 87 | print(time.time()-start) 88 | # print(picked_boxes) 89 | np_img = resized_img.cpu().permute(1,2,0).numpy() 90 | np_img.astype(int) 91 | img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB) 92 | 93 | for j, boxes in enumerate(picked_boxes): 94 | if boxes is not None: 95 | for box,landmark in zip(boxes,picked_landmarks[j]): 96 | cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2) 97 | for i in range(0,136,2): 98 | cv2.circle(img,(landmark[i],landmark[i+1]),radius=1,color=(0,0,255),thickness=2) 99 | 100 | image_name = args.image_path.split('/')[-1] 101 | save_path = os.path.join(args.save_path,image_name) 102 | cv2.imwrite('./RetinaFace-Pytorch{}.jpg'.format(qq),cv2.resize(img,(640,640))) 103 | if __name__=='__main__': 104 | main(10) 105 | 106 | 107 | -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- 1 | import numpy as numpy 2 | import torch.nn as nn 3 | import torch 4 | import math 5 | # torch.log and math.log is e based 6 | class WingLoss(nn.Module): 7 | def __init__(self, omega=3, epsilon=2): 8 | super(WingLoss, self).__init__() 9 | self.omega = omega 10 | self.epsilon = epsilon 11 | 12 | def forward(self, pred, target): 13 | y = target 14 | y_hat = pred 15 | delta_y = (y - y_hat).abs() 16 | delta_y1 = delta_y[delta_y < self.omega] 17 | delta_y2 = delta_y[delta_y >= self.omega] 18 | loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon) 19 | C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon) 20 | loss2 = delta_y2 - C 21 | return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2)) 22 | class AdaptiveWingLoss(nn.Module): 23 | def __init__(self, omega=14, theta=0.5, epsilon=1, alpha=2.1): 24 | super(AdaptiveWingLoss, self).__init__() 25 | self.omega = omega 26 | self.theta = theta 27 | self.epsilon = epsilon 28 | self.alpha = alpha 29 | 30 | def forward(self, pred, target): 31 | ''' 32 | :param pred: BxNxHxH 33 | :param target: BxNxHxH 34 | :return: 35 | ''' 36 | 37 | y = target 38 | y_hat = pred 39 | delta_y = (y - y_hat).abs() 40 | delta_y1 = delta_y[delta_y < self.theta] 41 | delta_y2 = delta_y[delta_y >= self.theta] 42 | y1 = y[delta_y < self.theta] 43 | y2 = y[delta_y >= self.theta] 44 | loss1 = self.omega * torch.log(1 + torch.pow(delta_y1 / self.omega, self.alpha - y1)) 45 | A = self.omega * (1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))) * (self.alpha - y2) * ( 46 | torch.pow(self.theta / self.epsilon, self.alpha - y2 - 1)) * (1 / self.epsilon) 47 | C = self.theta * A - self.omega * torch.log(1 + torch.pow(self.theta / self.epsilon, self.alpha - y2)) 48 | loss2 = A * delta_y2 - C 49 | return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2)) 50 | def calc_iou(a, b): 51 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) 52 | 53 | iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0]) 54 | ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1]) 55 | 56 | iw = torch.clamp(iw, min=0) 57 | ih = torch.clamp(ih, min=0) 58 | 59 | ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih 60 | 61 | ua = torch.clamp(ua, min=1e-8) 62 | 63 | intersection = iw * ih 64 | 65 | IoU = intersection / ua 66 | 67 | return IoU 68 | 69 | def filt_IoU(a, b, l): 70 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) 71 | 72 | iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0]) 73 | ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1]) 74 | 75 | iw = torch.clamp(iw, min=0) 76 | ih = torch.clamp(ih, min=0) 77 | 78 | ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih 79 | 80 | ua = torch.clamp(ua, min=1e-8) 81 | 82 | intersection = iw * ih 83 | 84 | IoU = intersection / ua 85 | 86 | ldm_sum = l.sum(dim=1) 87 | mask = ldm_sum<0 88 | ldm_mask = torch.ones_like(mask) 89 | ldm_mask[mask] = -1 90 | filted_IoU = IoU * ldm_mask.float() 91 | 92 | return IoU, filted_IoU 93 | 94 | class LossLayer(nn.Module): 95 | def __init__(self): 96 | super(LossLayer, self).__init__() 97 | self.smoothl1 = nn.SmoothL1Loss() 98 | 99 | def forward(self,classifications,bbox_regressions,ldm_regressions,anchors,annotations): 100 | batch_size = classifications.shape[0] 101 | classification_losses = [] 102 | bbox_regression_losses = [] 103 | ldm_regression_losses = [] 104 | 105 | anchor = anchors[0, :, :] 106 | anchor_widths = anchor[:, 2] - anchor[:, 0] 107 | anchor_heights = anchor[:, 3] - anchor[:, 1] 108 | anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths 109 | anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights 110 | 111 | #temp 112 | positive_indices_list = [] 113 | 114 | for j in range(batch_size): 115 | classification = classifications[j,:,:] 116 | bbox_regression = bbox_regressions[j,:,:] 117 | ldm_regression = ldm_regressions[j,:,:] 118 | 119 | annotation = annotations[j,:,:] 120 | # annotation = annotation[annotation[:,0] != -1] 121 | annotation = annotation[annotation[:,0] > 0] 122 | bbox_annotation = annotation[:,:4] 123 | ldm_annotation = annotation[:,4:] 124 | 125 | if bbox_annotation.shape[0] == 0: 126 | bbox_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda()) 127 | classification_losses.append(torch.tensor(0.,requires_grad=True).cuda()) 128 | ldm_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda()) 129 | 130 | # temp 131 | positive_indices_list.append([]) 132 | 133 | continue 134 | 135 | IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) 136 | #IoU, filt_iou = filt_IoU(anchors[0, :, :], bbox_annotation, ldm_annotation) 137 | 138 | IoU_max, IoU_argmax = torch.max(IoU, dim=1) 139 | 140 | targets = torch.ones(classification.shape)*-1 141 | targets = targets.cuda() 142 | 143 | # those whose iou<0.3 have no object 144 | negative_indices = torch.lt(IoU_max, 0.3) 145 | targets[negative_indices, :] = 0 146 | targets[negative_indices, 1] = 1 147 | 148 | # those whose iou>0.5 have object 149 | positive_indices = torch.ge(IoU_max, 0.7) 150 | 151 | #temp 152 | positive_indices_list.append(positive_indices) 153 | 154 | num_positive_anchors = positive_indices.sum() 155 | 156 | #keep positive and negative ratios with 1:3 157 | keep_negative_anchors = num_positive_anchors * 3 158 | 159 | bbox_assigned_annotations = bbox_annotation[IoU_argmax, :] 160 | ldm_assigned_annotations = ldm_annotation[IoU_argmax, :] 161 | 162 | targets[positive_indices, :] = 0 163 | targets[positive_indices, 0] = 1 164 | 165 | # ignore targets with no landmarks 166 | # f_IoU_max ,f_IoU_argmax = torch.max(filt_iou, dim=1) 167 | # ldm_positive_indices = torch.ge(f_IoU_max, 0.5) 168 | 169 | ldm_sum = ldm_assigned_annotations.sum(dim=1) 170 | ge0_mask = ldm_sum > 0 171 | ldm_positive_indices = ge0_mask & positive_indices 172 | 173 | # OHEM 174 | negative_losses = classification[negative_indices,1] * -1 175 | sorted_losses, _ = torch.sort(negative_losses, descending=True) 176 | if sorted_losses.numel() > keep_negative_anchors: 177 | sorted_losses = sorted_losses[:keep_negative_anchors] 178 | positive_losses = classification[positive_indices,0] * -1 179 | 180 | focal_loss = False 181 | # focal loss 182 | if focal_loss: 183 | alpha = 0.25 184 | gamma = 2.0 185 | alpha_factor = torch.ones(targets.shape).cuda() * alpha 186 | 187 | alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) 188 | focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) 189 | focal_weight = alpha_factor * torch.pow(focal_weight, gamma) 190 | 191 | bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) 192 | 193 | cls_loss = focal_weight * bce 194 | 195 | cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) 196 | 197 | classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0)) 198 | else: 199 | if positive_indices.sum() > 0: 200 | classification_losses.append(positive_losses.mean() + sorted_losses.mean()) 201 | else: 202 | classification_losses.append(torch.tensor(0.,requires_grad=True).cuda()) 203 | 204 | 205 | # compute bboxes loss 206 | if positive_indices.sum() > 0: 207 | # bbox 208 | bbox_assigned_annotations = bbox_assigned_annotations[positive_indices, :] 209 | 210 | anchor_widths_pi = anchor_widths[positive_indices] 211 | anchor_heights_pi = anchor_heights[positive_indices] 212 | anchor_ctr_x_pi = anchor_ctr_x[positive_indices] 213 | anchor_ctr_y_pi = anchor_ctr_y[positive_indices] 214 | 215 | gt_widths = bbox_assigned_annotations[:, 2] - bbox_assigned_annotations[:, 0] 216 | gt_heights = bbox_assigned_annotations[:, 3] - bbox_assigned_annotations[:, 1] 217 | gt_ctr_x = bbox_assigned_annotations[:, 0] + 0.5 * gt_widths 218 | gt_ctr_y = bbox_assigned_annotations[:, 1] + 0.5 * gt_heights 219 | 220 | targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / (anchor_widths_pi + 1e-14) 221 | targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / (anchor_heights_pi + 1e-14) 222 | targets_dw = torch.log(gt_widths / anchor_widths_pi) 223 | targets_dh = torch.log(gt_heights / anchor_heights_pi) 224 | 225 | bbox_targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) 226 | bbox_targets = bbox_targets.t() 227 | 228 | # Rescale 229 | bbox_targets = bbox_targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() 230 | 231 | # smooth L1 232 | # box losses 233 | bbox_regression_loss = self.smoothl1(bbox_targets,bbox_regression[positive_indices, :]) 234 | bbox_regression_losses.append(bbox_regression_loss) 235 | else: 236 | bbox_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda()) 237 | 238 | # compute landmarks loss 239 | if ldm_positive_indices.sum() > 0 : 240 | ldm_assigned_annotations = ldm_assigned_annotations[ldm_positive_indices, :] 241 | 242 | anchor_widths_l = anchor_widths[ldm_positive_indices] 243 | anchor_heights_l = anchor_heights[ldm_positive_indices] 244 | anchor_ctr_x_l = anchor_ctr_x[ldm_positive_indices] 245 | anchor_ctr_y_l = anchor_ctr_y[ldm_positive_indices] 246 | ldm_targets=[] 247 | for i in range(0,136): 248 | if i %2==0: 249 | candidate=(ldm_assigned_annotations[:,i] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14) 250 | else: 251 | candidate=(ldm_assigned_annotations[:,i] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14) 252 | ldm_targets.append(candidate) 253 | ldm_targets=torch.stack((ldm_targets)) 254 | ldm_targets = ldm_targets.t() 255 | 256 | # Rescale 257 | scale = torch.ones(1,136)*0.1 258 | ldm_targets = ldm_targets/scale.cuda() 259 | # increase the weight for lips 260 | s1 = torch.ones(1,99) 261 | s2 = torch.ones(1,37)*3 262 | s=torch.cat([s1,s2],dim=-1).cuda() 263 | aaaaaaa=WingLoss() 264 | ldm_regression_loss = self.smoothl1(ldm_targets*s, ldm_regression[ldm_positive_indices, :]*s) 265 | ldm_regression_losses.append(ldm_regression_loss) 266 | else: 267 | ldm_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda()) 268 | 269 | return torch.stack(classification_losses), torch.stack(bbox_regression_losses),torch.stack(ldm_regression_losses) 270 | -------------------------------------------------------------------------------- /magic_convert.py: -------------------------------------------------------------------------------- 1 | # # import numpy as np 2 | # # import torch 3 | # # import numpy as np 4 | # # from collections import OrderedDict 5 | # # b=torch.load('./out/mobile_model_epoch_1.pt') 6 | # # # a=torch.load('network.torch') 7 | # # # key_a=a.keys() 8 | # # # key_b=b.keys() 9 | # # # result=OrderedDict() 10 | # # # for ka in key_a: 11 | # # # for kb in key_b: 12 | # # # if(ka in kb): 13 | # # # result[kb]=a[ka] 14 | # # # print(len(result.keys())) 15 | 16 | # # # torch.save(result,"pretrained.torch") 17 | 18 | # # c=torch.load("pretrained.torch") 19 | # # print(b.keys()) 20 | 21 | 22 | # import torch 23 | # import torch.nn as nn 24 | # import torch.nn.functional as F 25 | # import math 26 | # import datetime 27 | # from collections import OrderedDict 28 | 29 | # def Conv_3x3(in_channels, out_channels, stride): 30 | # return nn.Sequential( 31 | # nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False), 32 | # nn.BatchNorm2d(out_channels), 33 | # nn.ReLU6() 34 | # ) 35 | 36 | # def Conv_1x1(in_channels, out_channels, stride): 37 | # return nn.Sequential( 38 | # nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False), 39 | # nn.BatchNorm2d(out_channels), 40 | # nn.ReLU6() 41 | # ) 42 | 43 | # def SepConv_3x3(in_channels, out_channels, stride): 44 | # return nn.Sequential( 45 | # nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False), 46 | # nn.BatchNorm2d(in_channels), 47 | # nn.ReLU6(), 48 | 49 | # nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 50 | # nn.BatchNorm2d(out_channels) 51 | # ) 52 | 53 | # class MBConv3_3x3(nn.Module): 54 | # def __init__(self, in_channels, out_channels, stride): 55 | # super(MBConv3_3x3, self).__init__() 56 | # mid_channels = int(3 * in_channels) 57 | 58 | # self.block = nn.Sequential( 59 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 60 | # nn.BatchNorm2d(mid_channels), 61 | # nn.ReLU6(), 62 | 63 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False), 64 | # nn.BatchNorm2d(mid_channels), 65 | # nn.ReLU6(), 66 | 67 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 68 | # nn.BatchNorm2d(out_channels) 69 | # ) 70 | 71 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 72 | 73 | # def forward(self, x): 74 | # if self.use_skip_connect: 75 | # return self.block(x) + x 76 | # else: 77 | # return self.block(x) 78 | 79 | # class MBConv3_5x5(nn.Module): 80 | # def __init__(self, in_channels, out_channels, stride): 81 | # super(MBConv3_5x5, self).__init__() 82 | # mid_channels = int(3 * in_channels) 83 | 84 | # self.block = nn.Sequential( 85 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 86 | # nn.BatchNorm2d(mid_channels), 87 | # nn.ReLU6(), 88 | 89 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False), 90 | # nn.BatchNorm2d(mid_channels), 91 | # nn.ReLU6(), 92 | 93 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 94 | # nn.BatchNorm2d(out_channels) 95 | # ) 96 | 97 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 98 | 99 | # def forward(self, x): 100 | # if self.use_skip_connect: 101 | # return self.block(x) + x 102 | # else: 103 | # return self.block(x) 104 | 105 | # class MBConv6_3x3(nn.Module): 106 | # def __init__(self, in_channels, out_channels, stride): 107 | # super(MBConv6_3x3, self).__init__() 108 | # mid_channels = int(6 * in_channels) 109 | 110 | # self.block = nn.Sequential( 111 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 112 | # nn.BatchNorm2d(mid_channels), 113 | # nn.ReLU6(), 114 | 115 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False), 116 | # nn.BatchNorm2d(mid_channels), 117 | # nn.ReLU6(), 118 | 119 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 120 | # nn.BatchNorm2d(out_channels) 121 | # ) 122 | 123 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 124 | 125 | # def forward(self, x): 126 | # if self.use_skip_connect: 127 | # return self.block(x) + x 128 | # else: 129 | # return self.block(x) 130 | 131 | # class MBConv6_5x5(nn.Module): 132 | # def __init__(self, in_channels, out_channels, stride): 133 | # super(MBConv6_5x5, self).__init__() 134 | # mid_channels = int(6 * in_channels) 135 | 136 | # self.block = nn.Sequential( 137 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 138 | # nn.BatchNorm2d(mid_channels), 139 | # nn.ReLU6(), 140 | 141 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False), 142 | # nn.BatchNorm2d(mid_channels), 143 | # nn.ReLU6(), 144 | 145 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 146 | # nn.BatchNorm2d(out_channels) 147 | # ) 148 | 149 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 150 | 151 | # def forward(self, x): 152 | # if self.use_skip_connect: 153 | # return self.block(x) + x 154 | # else: 155 | # return self.block(x) 156 | 157 | # class MnasNet(nn.Module): 158 | # def __init__(self, num_classes=1000, width_mult=1.): 159 | # super(MnasNet, self).__init__() 160 | 161 | # self.out_channels = int(1280 * width_mult) 162 | 163 | # self.conv1 = Conv_3x3(3, int(32 * width_mult), 2) 164 | # self.conv2 = SepConv_3x3(int(32 * width_mult), int(16 * width_mult), 1) 165 | 166 | # self.feature = nn.Sequential( 167 | # self._make_layer(MBConv3_3x3, 3, int(16 * width_mult), int(24 * width_mult), 2), 168 | # self._make_layer(MBConv3_5x5, 3, int(24 * width_mult), int(64 * width_mult), 2) 169 | # ) 170 | # self.feature1=nn.Sequential( 171 | # self._make_layer(MBConv6_5x5, 3, int(64 * width_mult), int(80 * width_mult), 2), 172 | 173 | # ) 174 | # self.feature2=nn.Sequential( 175 | # self._make_layer(MBConv6_3x3, 2, int(80 * width_mult), int(128 * width_mult), 1) 176 | # ) 177 | # self.feature3=nn.Sequential( 178 | # self._make_layer(MBConv6_5x5, 4, int(128 * width_mult), int(192 * width_mult), 2) 179 | # ) 180 | # self.feature4=nn.Sequential( 181 | # self._make_layer(MBConv6_3x3, 1, int(192 * width_mult), int(256 * width_mult), 1) 182 | # ) 183 | 184 | # # self.conv3 = Conv_1x1(int(256 * width_mult), int(1280 * width_mult), 1) 185 | # # self.gap = nn.AdaptiveAvgPool2d(1) 186 | # # self.classifier = nn.Linear(int(1280 * width_mult), num_classes) 187 | 188 | # self._initialize_weights() 189 | 190 | # def _make_layer(self, block, blocks, in_channels, out_channels, stride=1): 191 | # strides = [stride] + [1] * (blocks - 1) 192 | # layers = [] 193 | # for _stride in strides: 194 | # layers.append(block(in_channels, out_channels, _stride)) 195 | # in_channels = out_channels 196 | 197 | # return nn.Sequential(*layers) 198 | 199 | # def _initialize_weights(self): 200 | # for m in self.modules(): 201 | # if isinstance(m, nn.Conv2d): 202 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 203 | # m.weight.data.normal_(0, math.sqrt(2. / n)) 204 | # if m.bias is not None: 205 | # m.bias.data.zero_() 206 | # elif isinstance(m, nn.BatchNorm2d): 207 | # m.weight.data.fill_(1) 208 | # m.bias.data.zero_() 209 | # elif isinstance(m, nn.Linear): 210 | # n = m.weight.size(1) 211 | # m.weight.data.normal_(0, 0.01) 212 | # m.bias.data.zero_() 213 | 214 | # def forward(self, x): 215 | # result=OrderedDict() 216 | # x = self.conv2(self.conv1(x)) 217 | # x1 = self.feature(x) 218 | # result[1]=x1 219 | # x=self.feature1(x1) 220 | # x2=self.feature2(x) 221 | # result[2]=x2 222 | # x=self.feature3(x2) 223 | # x3=self.feature4(x) 224 | # result[3]=x3 225 | # return result 226 | 227 | # if __name__ == '__main__': 228 | # net = MnasNet() 229 | # x = torch.randn(1,3,320,320) 230 | # net(x) 231 | # # for i in range(15): 232 | # # time1 = datetime.datetime.now() 233 | # # y = net(x) 234 | # # print('Time Cost: ', (datetime.datetime.now() - time1).microseconds) 235 | # #y = net(x) 236 | # #print(y) 237 | 238 | import torch 239 | from torch.utils.serialization import load_lua 240 | for i in range(7201,22999): 241 | try: 242 | x = load_lua('/versa/elvishelvis/landmarks56/data55/{}.t7'.format(i)) 243 | torch.save(x,'/versa/elvishelvis/landmarks56/data55/{}.pth'.format(i)) 244 | except: 245 | print(i) 246 | -------------------------------------------------------------------------------- /mnas.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import math 6 | 7 | # class list_(object): 8 | # def __init__(self,li=None): 9 | # if(li!=None): 10 | # self.li=[] 11 | # else: 12 | # self.li=li 13 | # def ret(self): 14 | # return self.li 15 | 16 | result_list=[] 17 | last_fm_list=[] 18 | def Conv_3x3(in_channels, out_channels, stride): 19 | return nn.Sequential( 20 | nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False), 21 | nn.BatchNorm2d(out_channels), 22 | nn.ReLU6() 23 | ) 24 | 25 | def Conv_1x1(in_channels, out_channels, stride): 26 | return nn.Sequential( 27 | nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False), 28 | nn.BatchNorm2d(out_channels), 29 | nn.ReLU6() 30 | ) 31 | 32 | def SepConv_3x3(in_channels, out_channels, stride): 33 | return nn.Sequential( 34 | nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False), 35 | nn.BatchNorm2d(in_channels), 36 | nn.ReLU6(), 37 | 38 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 39 | nn.BatchNorm2d(out_channels) 40 | ) 41 | 42 | class MBConv3_3x3(nn.Module): 43 | def __init__(self, in_channels, out_channels, stride): 44 | super(MBConv3_3x3, self).__init__() 45 | mid_channels = int(3 * in_channels) 46 | 47 | self.block = nn.Sequential( 48 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 49 | nn.BatchNorm2d(mid_channels), 50 | nn.ReLU6(), 51 | 52 | nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False), 53 | nn.BatchNorm2d(mid_channels), 54 | nn.ReLU6(), 55 | 56 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 57 | nn.BatchNorm2d(out_channels) 58 | ) 59 | 60 | self.use_skip_connect = (1 == stride and in_channels == out_channels) 61 | 62 | def forward(self, x): 63 | if self.use_skip_connect: 64 | return self.block(x) + x 65 | else: 66 | return self.block(x) 67 | 68 | class MBConv3_5x5(nn.Module): 69 | def __init__(self, in_channels, out_channels, stride): 70 | super(MBConv3_5x5, self).__init__() 71 | mid_channels = int(3 * in_channels) 72 | 73 | self.block = nn.Sequential( 74 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 75 | nn.BatchNorm2d(mid_channels), 76 | nn.ReLU6(), 77 | 78 | nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False), 79 | nn.BatchNorm2d(mid_channels), 80 | nn.ReLU6(), 81 | 82 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 83 | nn.BatchNorm2d(out_channels) 84 | ) 85 | 86 | self.use_skip_connect = (1 == stride and in_channels == out_channels) 87 | 88 | def forward(self, x): 89 | if self.use_skip_connect: 90 | return self.block(x) + x 91 | else: 92 | return self.block(x) 93 | 94 | class MBConv6_3x3(nn.Module): 95 | def __init__(self, in_channels, out_channels, stride): 96 | super(MBConv6_3x3, self).__init__() 97 | mid_channels = int(6 * in_channels) 98 | 99 | self.block = nn.Sequential( 100 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 101 | nn.BatchNorm2d(mid_channels), 102 | nn.ReLU6(), 103 | 104 | nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False), 105 | nn.BatchNorm2d(mid_channels), 106 | nn.ReLU6(), 107 | 108 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 109 | nn.BatchNorm2d(out_channels) 110 | ) 111 | 112 | self.use_skip_connect = (1 == stride and in_channels == out_channels) 113 | 114 | def forward(self, x): 115 | if self.use_skip_connect: 116 | return self.block(x) + x 117 | else: 118 | return self.block(x) 119 | 120 | class MBConv6_5x5(nn.Module): 121 | def __init__(self, in_channels, out_channels, stride): 122 | super(MBConv6_5x5, self).__init__() 123 | mid_channels = int(6 * in_channels/1.125) 124 | 125 | self.block1 = nn.Sequential( 126 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 127 | nn.BatchNorm2d(mid_channels), 128 | nn.ReLU6(), 129 | ) 130 | self.block2 = nn.Sequential( 131 | nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False), 132 | nn.BatchNorm2d(mid_channels), 133 | nn.ReLU6() 134 | ) 135 | self.block3 = nn.Sequential( 136 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 137 | nn.BatchNorm2d(out_channels) 138 | ) 139 | 140 | self.use_skip_connect = (1 == stride and in_channels == out_channels) 141 | 142 | def forward(self, x): 143 | if self.use_skip_connect: 144 | x1=self.block1(x) 145 | x1=self.block2(x1) 146 | last_fm_list.append(x1) 147 | x1=self.block3(x1) 148 | 149 | return x1 + x 150 | else: 151 | x1=self.block1(x) 152 | result_list.append(x1) 153 | x1=self.block2(x1) 154 | 155 | 156 | x1=self.block3(x1) 157 | 158 | return x1 159 | class MnasNet(nn.Module): 160 | def __init__(self, num_classes=1000, width_mult=1.): 161 | super(MnasNet, self).__init__() 162 | 163 | self.out_channels = int(1280 * width_mult) 164 | 165 | self.conv1 = Conv_3x3(3, int(32 * width_mult), 2) 166 | self.conv2 = SepConv_3x3(int(32 * width_mult), int(16 * width_mult), 1) 167 | 168 | self.feature = nn.Sequential( 169 | self._make_layer(MBConv3_3x3, 3, int(16 * width_mult), int(24 * width_mult), 2), 170 | self._make_layer(MBConv3_5x5, 3, int(24 * width_mult), int(48 * width_mult), 2), 171 | self._make_layer(MBConv6_5x5, 3, int(48 * width_mult), int(80 * width_mult), 2), 172 | self._make_layer(MBConv6_3x3, 2, int(80 * width_mult), int(96 * width_mult), 1), 173 | self._make_layer(MBConv6_5x5, 4, int(96 * width_mult), int(192 * width_mult), 2) 174 | # self._make_layer(MBConv6_3x3, 1, int(192 * width_mult), int(320 * width_mult), 1) 175 | ) 176 | 177 | 178 | self._initialize_weights() 179 | 180 | def _make_layer(self, block, blocks, in_channels, out_channels, stride=1): 181 | strides = [stride] + [1] * (blocks - 1) 182 | layers = [] 183 | for _stride in strides: 184 | layers.append(block(in_channels, out_channels, _stride)) 185 | in_channels = out_channels 186 | 187 | return nn.Sequential(*layers) 188 | 189 | def _initialize_weights(self): 190 | for m in self.modules(): 191 | if isinstance(m, nn.Conv2d): 192 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 193 | m.weight.data.normal_(0, math.sqrt(2. / n)) 194 | if m.bias is not None: 195 | m.bias.data.zero_() 196 | elif isinstance(m, nn.BatchNorm2d): 197 | m.weight.data.fill_(1) 198 | m.bias.data.zero_() 199 | elif isinstance(m, nn.Linear): 200 | n = m.weight.size(1) 201 | m.weight.data.normal_(0, 0.01) 202 | m.bias.data.zero_() 203 | 204 | def forward(self, x): 205 | # global result_list 206 | x = self.conv2(self.conv1(x)) 207 | x = self.feature(x) 208 | result=OrderedDict() 209 | result_list.append(last_fm_list[-1]) 210 | result[0]=result_list[0] 211 | result[1]=result_list[1] 212 | result[2]=result_list[2] 213 | return result 214 | 215 | if __name__ == '__main__': 216 | net = MnasNet(width_mult=0.25) 217 | x = torch.randn(1,3,320,320) 218 | net(x) 219 | 220 | -------------------------------------------------------------------------------- /mobile.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch.nn as nn 3 | import torch 4 | class mobileV1(nn.Module): 5 | def __init__(self): 6 | super(mobileV1, self).__init__() 7 | 8 | self.mobilenet0_conv0 = nn.Sequential( 9 | nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=2, padding=1, bias=False), 10 | nn.BatchNorm2d(num_features=8, momentum=0.9), 11 | nn.ReLU(inplace=True)) 12 | 13 | self.mobilenet0_conv1 = nn.Sequential( 14 | nn.Conv2d(in_channels=8, out_channels=8, kernel_size=3, stride=1, padding=1, groups=8, bias=False), 15 | nn.BatchNorm2d(num_features=8, momentum=0.9), 16 | nn.ReLU(inplace=True)) 17 | 18 | self.mobilenet0_conv2 = nn.Sequential( 19 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=1, stride=1, padding=0, bias=False), 20 | nn.BatchNorm2d(num_features=16, momentum=0.9), 21 | nn.ReLU(inplace=True)) 22 | 23 | self.mobilenet0_conv3 = nn.Sequential( 24 | nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=2, padding=1, groups=16, bias=False), 25 | nn.BatchNorm2d(num_features=16, momentum=0.9), 26 | nn.ReLU(inplace=True)) 27 | 28 | self.mobilenet0_conv4 = nn.Sequential( 29 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=1, stride=1, padding=0, bias=False), 30 | nn.BatchNorm2d(num_features=32, momentum=0.9), 31 | nn.ReLU(inplace=True)) 32 | 33 | self.mobilenet0_conv5 = nn.Sequential( 34 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1, groups=32, bias=False), 35 | nn.BatchNorm2d(num_features=32, momentum=0.9), 36 | nn.ReLU(inplace=True)) 37 | 38 | self.mobilenet0_conv6 = nn.Sequential( 39 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=1, stride=1, padding=0, bias=False), 40 | nn.BatchNorm2d(num_features=32, momentum=0.9), 41 | nn.ReLU(inplace=True)) 42 | 43 | self.mobilenet0_conv7 = nn.Sequential( 44 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1, groups=32, bias=False), 45 | nn.BatchNorm2d(num_features=32, momentum=0.9), 46 | nn.ReLU(inplace=True)) 47 | 48 | self.mobilenet0_conv8 = nn.Sequential( 49 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=1, stride=1, padding=0, bias=False), 50 | nn.BatchNorm2d(num_features=64, momentum=0.9), 51 | nn.ReLU(inplace=True)) 52 | 53 | self.mobilenet0_conv9 = nn.Sequential( 54 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, groups=64, bias=False), 55 | nn.BatchNorm2d(num_features=64, momentum=0.9), 56 | nn.ReLU(inplace=True)) 57 | 58 | self.mobilenet0_conv10 = nn.Sequential( 59 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0, bias=False), 60 | nn.BatchNorm2d(num_features=64, momentum=0.9), 61 | nn.ReLU(inplace=True)) 62 | 63 | self.mobilenet0_conv11 = nn.Sequential( 64 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=2, padding=1, groups=64, bias=False), 65 | nn.BatchNorm2d(num_features=64, momentum=0.9), 66 | nn.ReLU(inplace=True)) 67 | 68 | self.mobilenet0_conv12 = nn.Sequential( 69 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False), 70 | nn.BatchNorm2d(num_features=128, momentum=0.9), 71 | nn.ReLU(inplace=True)) 72 | 73 | self.mobilenet0_conv13 = nn.Sequential( 74 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False), 75 | nn.BatchNorm2d(num_features=128, momentum=0.9), 76 | nn.ReLU(inplace=True)) 77 | 78 | self.mobilenet0_conv14 = nn.Sequential( 79 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False), 80 | nn.BatchNorm2d(num_features=128, momentum=0.9), 81 | nn.ReLU(inplace=True)) 82 | 83 | self.mobilenet0_conv15 = nn.Sequential( 84 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False), 85 | nn.BatchNorm2d(num_features=128), 86 | nn.ReLU(inplace=True)) 87 | 88 | self.mobilenet0_conv16 = nn.Sequential( 89 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False), 90 | nn.BatchNorm2d(num_features=128, momentum=0.9), 91 | nn.ReLU(inplace=True)) 92 | 93 | self.mobilenet0_conv17 = nn.Sequential( 94 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False), 95 | nn.BatchNorm2d(num_features=128, momentum=0.9), 96 | nn.ReLU(inplace=True)) 97 | 98 | self.mobilenet0_conv18 = nn.Sequential( 99 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False), 100 | nn.BatchNorm2d(num_features=128, momentum=0.9), 101 | nn.ReLU(inplace=True)) 102 | 103 | self.mobilenet0_conv19 = nn.Sequential( 104 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False), 105 | nn.BatchNorm2d(num_features=128, momentum=0.9), 106 | nn.ReLU(inplace=True)) 107 | 108 | self.mobilenet0_conv20 = nn.Sequential( 109 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False), 110 | nn.BatchNorm2d(num_features=128, momentum=0.9), 111 | nn.ReLU(inplace=True)) 112 | 113 | self.mobilenet0_conv21 = nn.Sequential( 114 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False), 115 | nn.BatchNorm2d(num_features=128, momentum=0.9), 116 | nn.ReLU(inplace=True)) 117 | 118 | self.mobilenet0_conv22 = nn.Sequential( 119 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False), 120 | nn.BatchNorm2d(num_features=128, momentum=0.9), 121 | nn.ReLU(inplace=True)) 122 | 123 | self.mobilenet0_conv23 = nn.Sequential( 124 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1, groups=128, bias=False), 125 | nn.BatchNorm2d(num_features=128, momentum=0.9), 126 | nn.ReLU(inplace=True)) 127 | 128 | self.mobilenet0_conv24 = nn.Sequential( 129 | nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False), 130 | nn.BatchNorm2d(num_features=256, momentum=0.9), 131 | nn.ReLU(inplace=True)) 132 | 133 | self.mobilenet0_conv25 = nn.Sequential( 134 | nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256, bias=False), 135 | nn.BatchNorm2d(num_features=256, momentum=0.9), 136 | nn.ReLU(inplace=True)) 137 | 138 | self.mobilenet0_conv26 = nn.Sequential( 139 | nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False), 140 | nn.BatchNorm2d(num_features=256, momentum=0.9), 141 | nn.ReLU(inplace=True)) 142 | def forward(self, x): 143 | result_=OrderedDict() 144 | batchsize = x.shape[0] 145 | # k1=F.interpolate(k,(512,512),mode='nearest') 146 | x = self.mobilenet0_conv0(x) 147 | x = self.mobilenet0_conv1(x) 148 | x = self.mobilenet0_conv2(x) 149 | x = self.mobilenet0_conv3(x) 150 | x = self.mobilenet0_conv4(x) 151 | x = self.mobilenet0_conv5(x) 152 | x = self.mobilenet0_conv6(x) 153 | x = self.mobilenet0_conv7(x) 154 | x = self.mobilenet0_conv8(x) 155 | x = self.mobilenet0_conv9(x) 156 | x10 = self.mobilenet0_conv10(x) 157 | x = self.mobilenet0_conv11(x10) 158 | x = self.mobilenet0_conv12(x) 159 | x = self.mobilenet0_conv13(x) 160 | x = self.mobilenet0_conv14(x) 161 | x = self.mobilenet0_conv15(x) 162 | x = self.mobilenet0_conv16(x) 163 | x = self.mobilenet0_conv17(x) 164 | x = self.mobilenet0_conv18(x) 165 | x = self.mobilenet0_conv19(x) 166 | x = self.mobilenet0_conv20(x) 167 | x = self.mobilenet0_conv21(x) 168 | x22 = self.mobilenet0_conv22(x) 169 | x = self.mobilenet0_conv23(x22) 170 | x = self.mobilenet0_conv24(x) 171 | x = self.mobilenet0_conv25(x) 172 | x26 = self.mobilenet0_conv26(x) 173 | result_[1]=x10 174 | result_[2]=x22 175 | result_[3]=x26 176 | return result_ 177 | if __name__ == "__main__": 178 | from thop import profile 179 | net = mobileV1() 180 | from thop import profile 181 | 182 | from thop import clever_format 183 | # x = torch.randn(1,3,320,320) 184 | input = torch.randn(1, 3, 224, 224) 185 | flops, params = profile(net, inputs=(input, )) 186 | flops, params = clever_format([flops, params], "%.3f") 187 | print(params) 188 | print(flops) 189 | -------------------------------------------------------------------------------- /mobile_testing.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch.nn as nn 3 | import torch 4 | class mobileV1(nn.Module): 5 | def __init__(self): 6 | super(mobileV1, self).__init__() 7 | 8 | self.mmm = nn.Sequential( 9 | nn.Conv2d(in_channels=3, out_channels=32*4, kernel_size=7, stride=4, padding=2, bias=False), 10 | nn.BatchNorm2d(num_features=32*4, momentum=0.9), 11 | nn.ReLU(inplace=True)) 12 | 13 | 14 | self.mmm1 = nn.Sequential( 15 | nn.Conv2d(in_channels=3, out_channels=3, kernel_size=7, stride=4, padding=2, bias=False), 16 | nn.BatchNorm2d(num_features=3, momentum=0.9), 17 | nn.ReLU(inplace=True)) 18 | self.mmm2 = nn.Sequential( 19 | nn.Conv2d(in_channels=3, out_channels=32*4, kernel_size=1, stride=1, padding=0, bias=False), 20 | nn.BatchNorm2d(num_features=32*4, momentum=0.9), 21 | nn.ReLU(inplace=True)) 22 | 23 | self.mobilenet0_conv0 = nn.Sequential( 24 | nn.Conv2d(in_channels=3, out_channels=8*4, kernel_size=3, stride=2, padding=1, bias=False), 25 | nn.BatchNorm2d(num_features=8*4, momentum=0.9), 26 | nn.ReLU(inplace=True)) 27 | 28 | self.mobilenet0_conv1 = nn.Sequential( 29 | nn.Conv2d(in_channels=8*4, out_channels=8*4, kernel_size=3, stride=1, padding=1, groups=8*4, bias=False), 30 | nn.BatchNorm2d(num_features=8*4, momentum=0.9), 31 | nn.ReLU(inplace=True)) 32 | 33 | self.mobilenet0_conv2 = nn.Sequential( 34 | nn.Conv2d(in_channels=8*4, out_channels=16*4, kernel_size=1, stride=1, padding=0, bias=False), 35 | nn.BatchNorm2d(num_features=16*4, momentum=0.9), 36 | nn.ReLU(inplace=True)) 37 | 38 | self.mobilenet0_conv3 = nn.Sequential( 39 | nn.Conv2d(in_channels=16*4, out_channels=16*4, kernel_size=3, stride=2, padding=1, groups=16*4, bias=False), 40 | nn.BatchNorm2d(num_features=16*4, momentum=0.9), 41 | nn.ReLU(inplace=True)) 42 | 43 | self.mobilenet0_conv4 = nn.Sequential( 44 | nn.Conv2d(in_channels=16*4, out_channels=32*4, kernel_size=1, stride=1, padding=0, bias=False), 45 | nn.BatchNorm2d(num_features=32*4, momentum=0.9), 46 | nn.ReLU(inplace=True)) 47 | 48 | self.mobilenet0_conv5 = nn.Sequential( 49 | nn.Conv2d(in_channels=32*4, out_channels=32*4, kernel_size=3, stride=1, padding=1, groups=32*4, bias=False), 50 | nn.BatchNorm2d(num_features=32*4, momentum=0.9), 51 | nn.ReLU(inplace=True)) 52 | 53 | self.mobilenet0_conv6 = nn.Sequential( 54 | nn.Conv2d(in_channels=32*4, out_channels=32*4, kernel_size=1, stride=1, padding=0, bias=False), 55 | nn.BatchNorm2d(num_features=32*4, momentum=0.9), 56 | nn.ReLU(inplace=True)) 57 | 58 | self.mobilenet0_conv7 = nn.Sequential( 59 | nn.Conv2d(in_channels=32*4, out_channels=32*4, kernel_size=3, stride=2, padding=1, groups=32*4, bias=False), 60 | nn.BatchNorm2d(num_features=32*4, momentum=0.9), 61 | nn.ReLU(inplace=True)) 62 | 63 | self.mobilenet0_conv8 = nn.Sequential( 64 | nn.Conv2d(in_channels=32*4, out_channels=64*4, kernel_size=1, stride=1, padding=0, bias=False), 65 | nn.BatchNorm2d(num_features=64*4, momentum=0.9), 66 | nn.ReLU(inplace=True)) 67 | 68 | self.mobilenet0_conv9 = nn.Sequential( 69 | nn.Conv2d(in_channels=64*4, out_channels=64*4, kernel_size=3, stride=1, padding=1, groups=64*4, bias=False), 70 | nn.BatchNorm2d(num_features=64*4, momentum=0.9), 71 | nn.ReLU(inplace=True)) 72 | 73 | self.mobilenet0_conv10 = nn.Sequential( 74 | nn.Conv2d(in_channels=64*4, out_channels=64*4, kernel_size=1, stride=1, padding=0, bias=False), 75 | nn.BatchNorm2d(num_features=64*4, momentum=0.9), 76 | nn.ReLU(inplace=True)) 77 | 78 | self.mobilenet0_conv11 = nn.Sequential( 79 | nn.Conv2d(in_channels=64*4, out_channels=64*4, kernel_size=3, stride=2, padding=1, groups=64*4, bias=False), 80 | nn.BatchNorm2d(num_features=64*4, momentum=0.9), 81 | nn.ReLU(inplace=True)) 82 | 83 | self.mobilenet0_conv12 = nn.Sequential( 84 | nn.Conv2d(in_channels=64*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False), 85 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 86 | nn.ReLU(inplace=True)) 87 | 88 | self.mobilenet0_conv13 = nn.Sequential( 89 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False), 90 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 91 | nn.ReLU(inplace=True)) 92 | 93 | self.mobilenet0_conv14 = nn.Sequential( 94 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False), 95 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 96 | nn.ReLU(inplace=True)) 97 | 98 | self.mobilenet0_conv15 = nn.Sequential( 99 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False), 100 | nn.BatchNorm2d(num_features=128*4), 101 | nn.ReLU(inplace=True)) 102 | 103 | self.mobilenet0_conv16 = nn.Sequential( 104 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False), 105 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 106 | nn.ReLU(inplace=True)) 107 | 108 | self.mobilenet0_conv17 = nn.Sequential( 109 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False), 110 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 111 | nn.ReLU(inplace=True)) 112 | 113 | self.mobilenet0_conv18 = nn.Sequential( 114 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False), 115 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 116 | nn.ReLU(inplace=True)) 117 | 118 | self.mobilenet0_conv19 = nn.Sequential( 119 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False), 120 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 121 | nn.ReLU(inplace=True)) 122 | 123 | self.mobilenet0_conv20 = nn.Sequential( 124 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False), 125 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 126 | nn.ReLU(inplace=True)) 127 | 128 | self.mobilenet0_conv21 = nn.Sequential( 129 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False), 130 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 131 | nn.ReLU(inplace=True)) 132 | 133 | self.mobilenet0_conv22 = nn.Sequential( 134 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False), 135 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 136 | nn.ReLU(inplace=True)) 137 | 138 | self.mobilenet0_conv23 = nn.Sequential( 139 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=2, padding=1, groups=128*4, bias=False), 140 | nn.BatchNorm2d(num_features=128*4, momentum=0.9), 141 | nn.ReLU(inplace=True)) 142 | 143 | self.mobilenet0_conv24 = nn.Sequential( 144 | nn.Conv2d(in_channels=128*4, out_channels=256*4, kernel_size=1, stride=1, padding=0, bias=False), 145 | nn.BatchNorm2d(num_features=256*4, momentum=0.9), 146 | nn.ReLU(inplace=True)) 147 | 148 | self.mobilenet0_conv25 = nn.Sequential( 149 | nn.Conv2d(in_channels=256*4, out_channels=256*4, kernel_size=3, stride=1, padding=1, groups=256*4, bias=False), 150 | nn.BatchNorm2d(num_features=256*4, momentum=0.9), 151 | nn.ReLU(inplace=True)) 152 | 153 | self.mobilenet0_conv26 = nn.Sequential( 154 | nn.Conv2d(in_channels=256*4, out_channels=256*4, kernel_size=1, stride=1, padding=0, bias=False), 155 | nn.BatchNorm2d(num_features=256*4, momentum=0.9), 156 | nn.ReLU(inplace=True)) 157 | def forward(self, x): 158 | result=OrderedDict() 159 | batchsize = x.shape[0] 160 | # k1=F.interpolate(k,(512,512),mode='nearest') 161 | # x = self.mobilenet0_conv0(x) 162 | # x = self.mobilenet0_conv1(x) 163 | # x = self.mobilenet0_conv2(x) 164 | 165 | # x = self.mobilenet0_conv3(x) 166 | # x = self.mobilenet0_conv4(x) 167 | # x=self.mmm1(x) 168 | x=self.mmm(x) 169 | # print(x.shape) 170 | x = self.mobilenet0_conv5(x) 171 | x = self.mobilenet0_conv6(x) 172 | x = self.mobilenet0_conv7(x) 173 | x = self.mobilenet0_conv8(x) 174 | x = self.mobilenet0_conv9(x) 175 | x10 = self.mobilenet0_conv10(x) 176 | x = self.mobilenet0_conv11(x10) 177 | x = self.mobilenet0_conv12(x) 178 | x = self.mobilenet0_conv13(x) 179 | x = self.mobilenet0_conv14(x) 180 | x = self.mobilenet0_conv15(x) 181 | x = self.mobilenet0_conv16(x) 182 | x = self.mobilenet0_conv17(x) 183 | x = self.mobilenet0_conv18(x) 184 | x = self.mobilenet0_conv19(x) 185 | x = self.mobilenet0_conv20(x) 186 | x = self.mobilenet0_conv21(x) 187 | x22 = self.mobilenet0_conv22(x) 188 | x = self.mobilenet0_conv23(x22) 189 | x = self.mobilenet0_conv24(x) 190 | x = self.mobilenet0_conv25(x) 191 | x26 = self.mobilenet0_conv26(x) 192 | result[1]=x10 193 | result[2]=x22 194 | result[3]=x26 195 | return result 196 | if __name__ == '__main__': 197 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 198 | net = mobileV1().to(device) 199 | 200 | #print(net) 201 | import time 202 | x = torch.randn(1,3,640,640).to(device) 203 | torch.cuda.synchronize() 204 | start=time.time() 205 | for i in range(10): 206 | net(x) 207 | torch.cuda.synchronize() 208 | print(time.time()-start) 209 | torch.save(net.state_dict(),'aaa.torch') 210 | 211 | 212 | # import torch 213 | # import torch.nn as nn 214 | # import torch.nn.functional as F 215 | # import math 216 | # import datetime 217 | 218 | # def Conv_3x3(in_channels, out_channels, stride): 219 | # return nn.Sequential( 220 | # nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False), 221 | # nn.BatchNorm2d(out_channels), 222 | # nn.ReLU6() 223 | # ) 224 | 225 | # def Conv_1x1(in_channels, out_channels, stride): 226 | # return nn.Sequential( 227 | # nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False), 228 | # nn.BatchNorm2d(out_channels), 229 | # nn.ReLU6() 230 | # ) 231 | 232 | # def SepConv_3x3(in_channels, out_channels, stride): 233 | # return nn.Sequential( 234 | # nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False), 235 | # nn.BatchNorm2d(in_channels), 236 | # nn.ReLU6(), 237 | 238 | # nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 239 | # nn.BatchNorm2d(out_channels) 240 | # ) 241 | 242 | # class MBConv3_3x3(nn.Module): 243 | # def __init__(self, in_channels, out_channels, stride): 244 | # super(MBConv3_3x3, self).__init__() 245 | # mid_channels = int(3 * in_channels) 246 | 247 | # self.block = nn.Sequential( 248 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 249 | # nn.BatchNorm2d(mid_channels), 250 | # nn.ReLU6(), 251 | 252 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False), 253 | # nn.BatchNorm2d(mid_channels), 254 | # nn.ReLU6(), 255 | 256 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 257 | # nn.BatchNorm2d(out_channels) 258 | # ) 259 | 260 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 261 | 262 | # def forward(self, x): 263 | # if self.use_skip_connect: 264 | # return self.block(x) + x 265 | # else: 266 | # return self.block(x) 267 | 268 | # class MBConv3_5x5(nn.Module): 269 | # def __init__(self, in_channels, out_channels, stride): 270 | # super(MBConv3_5x5, self).__init__() 271 | # mid_channels = int(3 * in_channels) 272 | 273 | # self.block = nn.Sequential( 274 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 275 | # nn.BatchNorm2d(mid_channels), 276 | # nn.ReLU6(), 277 | 278 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False), 279 | # nn.BatchNorm2d(mid_channels), 280 | # nn.ReLU6(), 281 | 282 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 283 | # nn.BatchNorm2d(out_channels) 284 | # ) 285 | 286 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 287 | 288 | # def forward(self, x): 289 | # if self.use_skip_connect: 290 | # return self.block(x) + x 291 | # else: 292 | # return self.block(x) 293 | 294 | # class MBConv6_3x3(nn.Module): 295 | # def __init__(self, in_channels, out_channels, stride): 296 | # super(MBConv6_3x3, self).__init__() 297 | # mid_channels = int(6 * in_channels) 298 | 299 | # self.block = nn.Sequential( 300 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 301 | # nn.BatchNorm2d(mid_channels), 302 | # nn.ReLU6(), 303 | 304 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False), 305 | # nn.BatchNorm2d(mid_channels), 306 | # nn.ReLU6(), 307 | 308 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 309 | # nn.BatchNorm2d(out_channels) 310 | # ) 311 | 312 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 313 | 314 | # def forward(self, x): 315 | # if self.use_skip_connect: 316 | # return self.block(x) + x 317 | # else: 318 | # return self.block(x) 319 | 320 | # class MBConv6_5x5(nn.Module): 321 | # def __init__(self, in_channels, out_channels, stride): 322 | # super(MBConv6_5x5, self).__init__() 323 | # mid_channels = int(6 * in_channels) 324 | 325 | # self.block = nn.Sequential( 326 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False), 327 | # nn.BatchNorm2d(mid_channels), 328 | # nn.ReLU6(), 329 | 330 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False), 331 | # nn.BatchNorm2d(mid_channels), 332 | # nn.ReLU6(), 333 | 334 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False), 335 | # nn.BatchNorm2d(out_channels) 336 | # ) 337 | 338 | # self.use_skip_connect = (1 == stride and in_channels == out_channels) 339 | 340 | # def forward(self, x): 341 | # if self.use_skip_connect: 342 | # return self.block(x) + x 343 | # else: 344 | # return self.block(x) 345 | 346 | # class MnasNet(nn.Module): 347 | # def __init__(self, width_mult=1.): 348 | # super(MnasNet, self).__init__() 349 | 350 | # self.out_channels = int(1280 * width_mult) 351 | 352 | # self.conv1 = Conv_3x3(3, int(32 * width_mult), 2) 353 | # self.conv2 = SepConv_3x3(int(32 * width_mult), int(16 * width_mult), 1) 354 | 355 | # self.feature1 = nn.Sequential( 356 | # self._make_layer(MBConv3_3x3, 3, int(16 * width_mult), int(24 * width_mult), 2), 357 | # self._make_layer(MBConv3_5x5, 3, int(24 * width_mult), int(64 * width_mult), 2) 358 | 359 | # ) 360 | # self.feature2=nn.Sequential( 361 | 362 | # self._make_layer(MBConv6_5x5, 3, int(64 * width_mult), int(80 * width_mult), 2), 363 | # self._make_layer(MBConv6_3x3, 2, int(80 * width_mult), int(128 * width_mult), 1) 364 | 365 | # ) 366 | # self.feature3=nn.Sequential( 367 | 368 | # self._make_layer(MBConv6_5x5, 4, int(128 * width_mult), int(192 * width_mult), 2), 369 | # self._make_layer(MBConv6_3x3, 1, int(192 * width_mult), int(256 * width_mult), 1)) 370 | 371 | # self._initialize_weights() 372 | 373 | # def _make_layer(self, block, blocks, in_channels, out_channels, stride=1): 374 | # strides = [stride] + [1] * (blocks - 1) 375 | # layers = [] 376 | # for _stride in strides: 377 | # layers.append(block(in_channels, out_channels, _stride)) 378 | # in_channels = out_channels 379 | 380 | # return nn.Sequential(*layers) 381 | 382 | # def _initialize_weights(self): 383 | # for m in self.modules(): 384 | # if isinstance(m, nn.Conv2d): 385 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 386 | # m.weight.data.normal_(0, math.sqrt(2. / n)) 387 | # if m.bias is not None: 388 | # m.bias.data.zero_() 389 | # elif isinstance(m, nn.BatchNorm2d): 390 | # m.weight.data.fill_(1) 391 | # m.bias.data.zero_() 392 | # elif isinstance(m, nn.Linear): 393 | # n = m.weight.size(1) 394 | # m.weight.data.normal_(0, 0.01) 395 | # m.bias.data.zero_() 396 | 397 | # def forward(self, x): 398 | # x = self.conv2(self.conv1(x)) 399 | # # print(x.shape) 400 | # x = self.feature1(x) 401 | # # print(x.shape) 402 | # x = self.feature2(x) 403 | # # print(x.shape) 404 | # x = self.feature3(x) 405 | # # print(x.shape) 406 | 407 | # return x 408 | 409 | # if __name__ == '__main__': 410 | # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 411 | # net = MnasNet().to(device) 412 | # #print(net) 413 | # import time 414 | # torch.cuda.synchronize() 415 | # x = torch.randn(1,3,640,640).to(device) 416 | # start=time.time() 417 | # for i in range(10): 418 | # net(x) 419 | # torch.cuda.synchronize() 420 | # print(time.time()-start) 421 | 422 | # torch.save(net.state_dict(),'aaa.torch') 423 | # # print(net) 424 | # #print(y) 425 | 426 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import math 4 | import time 5 | import torch.utils.model_zoo as model_zoo 6 | from utils import BasicBlock, Bottleneck, RegressionTransform 7 | from anchors import Anchors 8 | import losses 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | } 17 | 18 | class PyramidFeatures(nn.Module): 19 | def __init__(self, C2_size, C3_size, C4_size, C5_size, feature_size=256): 20 | super(PyramidFeatures, self).__init__() 21 | 22 | # upsample C5 to get P5 from the FPN paper 23 | self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0) 24 | self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest') 25 | self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) 26 | 27 | # add P5 elementwise to C4 28 | self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0) 29 | self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest') 30 | self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) 31 | 32 | # add P4 elementwise to C3 33 | self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0) 34 | self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest') 35 | self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) 36 | 37 | # "P6 is obtained via a 3x3 stride-2 conv on C5" 38 | self.P6 = nn.Conv2d(C5_size, feature_size, kernel_size=3, stride=2, padding=1) 39 | 40 | # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6" 41 | # Retinaface does not need P7 42 | # self.P7_1 = nn.ReLU() 43 | # self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1) 44 | 45 | # solve C2 46 | self.P2_1 = nn.Conv2d(C2_size, feature_size, kernel_size=1, stride=1, padding=0) 47 | self.P2_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1) 48 | 49 | def forward(self, inputs): 50 | 51 | C2, C3, C4, C5 = inputs 52 | 53 | P5_x = self.P5_1(C5) 54 | P5_upsampled_x = self.P5_upsampled(P5_x) 55 | P5_x = self.P5_2(P5_x) 56 | 57 | P4_x = self.P4_1(C4) 58 | P4_x = P5_upsampled_x + P4_x 59 | P4_upsampled_x = self.P4_upsampled(P4_x) 60 | P4_x = self.P4_2(P4_x) 61 | 62 | P3_x = self.P3_1(C3) 63 | P3_x = P3_x + P4_upsampled_x 64 | P3_upsampled_x = self.P3_upsampled(P3_x) 65 | P3_x = self.P3_2(P3_x) 66 | 67 | P2_x = self.P2_1(C2) 68 | P2_x = P2_x + P3_upsampled_x 69 | P2_x = self.P2_2(P2_x) 70 | 71 | P6_x = self.P6(C5) 72 | 73 | return [P2_x, P3_x, P4_x, P5_x, P6_x] 74 | 75 | class ClassHead(nn.Module): 76 | def __init__(self,inchannels=512,num_anchors=3): 77 | super(ClassHead,self).__init__() 78 | self.num_anchors = num_anchors 79 | self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1) 80 | 81 | # if use focal loss instead of OHEM 82 | #self.output_act = nn.Sigmoid() 83 | 84 | # if use OHEM 85 | self.output_act = nn.LogSoftmax(dim=-1) 86 | 87 | 88 | def forward(self,x): 89 | out = self.conv1x1(x) 90 | out = out.permute(0,2,3,1) 91 | b, h, w, c = out.shape 92 | out = out.view(b, h, w, self.num_anchors, 2) 93 | #out = out.permute(0,2,3,1).contiguous().view(out.shape[0], -1, 2) 94 | out = self.output_act(out) 95 | 96 | return out.contiguous().view(out.shape[0], -1, 2) 97 | 98 | class BboxHead(nn.Module): 99 | def __init__(self,inchannels=512,num_anchors=3): 100 | super(BboxHead,self).__init__() 101 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1) 102 | 103 | def forward(self,x): 104 | out = self.conv1x1(x) 105 | out = out.permute(0,2,3,1) 106 | 107 | return out.contiguous().view(out.shape[0], -1, 4) 108 | 109 | class LandmarkHead(nn.Module): 110 | def __init__(self,inchannels=512,num_anchors=3): 111 | super(LandmarkHead,self).__init__() 112 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1) 113 | 114 | def forward(self,x): 115 | out = self.conv1x1(x) 116 | out = out.permute(0,2,3,1) 117 | 118 | return out.contiguous().view(out.shape[0], -1, 10) 119 | 120 | 121 | class ClassHead_(nn.Module): 122 | def __init__(self,inchannels=256,num_anchors=3): 123 | super(ClassHead_,self).__init__() 124 | self.num_anchors = num_anchors 125 | self.feature_head = self._make_head(self.num_anchors*2) 126 | self.output_act = nn.LogSoftmax(dim=-1) 127 | 128 | def _make_head(self,out_size): 129 | layers = [] 130 | for _ in range(4): 131 | layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True)] 132 | layers += [nn.Conv2d(256, out_size, 3, padding=1)] 133 | return nn.Sequential(*layers) 134 | 135 | def forward(self,x): 136 | out = self.feature_head(x) 137 | out = out.permute(0,2,3,1) 138 | b, h, w, c = out.shape 139 | out = out.view(b, h, w, self.num_anchors, 2) 140 | #out = out.permute(0,2,3,1).contiguous().view(out.shape[0], -1, 2) 141 | out = self.output_act(out) 142 | 143 | return out.contiguous().view(out.shape[0], -1, 2) 144 | 145 | class BboxHead_(nn.Module): 146 | def __init__(self,inchannels=256,num_anchors=3): 147 | super(BboxHead_,self).__init__() 148 | self.num_anchors = num_anchors 149 | self.feature_head = self._make_head(self.num_anchors*4) 150 | 151 | def _make_head(self,out_size): 152 | layers = [] 153 | for _ in range(4): 154 | layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True)] 155 | layers += [nn.Conv2d(256, out_size, 3, padding=1)] 156 | return nn.Sequential(*layers) 157 | 158 | def forward(self,x): 159 | out = self.feature_head(x) 160 | out = out.permute(0,2,3,1) 161 | 162 | return out.contiguous().view(out.shape[0], -1, 4) 163 | 164 | class LandmarkHead_(nn.Module): 165 | def __init__(self,inchannels=256,num_anchors=3): 166 | super(LandmarkHead_,self).__init__() 167 | self.num_anchors = num_anchors 168 | self.feature_head = self._make_head(self.num_anchors*10) 169 | 170 | def _make_head(self,out_size): 171 | layers = [] 172 | for _ in range(4): 173 | layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True)] 174 | layers += [nn.Conv2d(256, out_size, 3, padding=1)] 175 | return nn.Sequential(*layers) 176 | 177 | def forward(self,x): 178 | out = self.feature_head(x) 179 | out = out.permute(0,2,3,1) 180 | 181 | return out.contiguous().view(out.shape[0], -1, 10) 182 | 183 | 184 | class CBR(nn.Module): 185 | def __init__(self,inchannels,outchannels): 186 | super(CBR,self).__init__() 187 | self.conv3x3 = nn.Conv2d(inchannels,outchannels,kernel_size=3,stride=1,padding=1,bias=False) 188 | self.bn = nn.BatchNorm2d(outchannels) 189 | self.relu = nn.ReLU(inplace=True) 190 | 191 | for m in self.modules(): 192 | if isinstance(m, nn.BatchNorm2d): 193 | nn.init.constant_(m.weight, 1) 194 | nn.init.constant_(m.bias, 0) 195 | if isinstance(m, nn.Conv2d): 196 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 197 | #nn.init.normal_(m.weight, std=0.01) 198 | 199 | def forward(self,x): 200 | x = self.conv3x3(x) 201 | x = self.bn(x) 202 | x = self.relu(x) 203 | 204 | return x 205 | 206 | class CB(nn.Module): 207 | def __init__(self,inchannels): 208 | super(CB,self).__init__() 209 | self.conv3x3 = nn.Conv2d(inchannels,inchannels,kernel_size=3,stride=1,padding=1,bias=False) 210 | self.bn = nn.BatchNorm2d(inchannels) 211 | 212 | for m in self.modules(): 213 | if isinstance(m, nn.BatchNorm2d): 214 | nn.init.constant_(m.weight, 1) 215 | nn.init.constant_(m.bias, 0) 216 | if isinstance(m, nn.Conv2d): 217 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 218 | #nn.init.normal_(m.weight, std=0.01) 219 | 220 | def forward(self,x): 221 | x = self.conv3x3(x) 222 | x = self.bn(x) 223 | 224 | return x 225 | 226 | class Concat(nn.Module): 227 | def forward(self,*feature): 228 | out = torch.cat(feature,dim=1) 229 | return out 230 | 231 | class Context(nn.Module): 232 | def __init__(self,inchannels=256): 233 | super(Context,self).__init__() 234 | self.context_plain = inchannels//2 235 | self.conv1 = CB(inchannels) 236 | self.conv2 = CBR(inchannels,self.context_plain) 237 | self.conv2_1 = CB(self.context_plain) 238 | self.conv2_2_1 = CBR(self.context_plain,self.context_plain) 239 | self.conv2_2_2 = CB(self.context_plain) 240 | self.concat = Concat() 241 | self.relu = nn.ReLU(inplace=True) 242 | 243 | def forward(self,x): 244 | f1 = self.conv1(x) 245 | f2_ = self.conv2(x) 246 | f2 = self.conv2_1(f2_) 247 | f3 = self.conv2_2_1(f2_) 248 | f3 = self.conv2_2_2(f3) 249 | 250 | #out = torch.cat([f1,f2,f3],dim=1) 251 | out = self.concat(f1,f2,f3) 252 | out = self.relu(out) 253 | 254 | return out 255 | 256 | def initialize_layer(layer): 257 | if isinstance(layer, nn.Conv2d): 258 | nn.init.normal_(layer.weight, std=0.01) 259 | if layer.bias is not None: 260 | nn.init.constant_(layer.bias, val=0) 261 | 262 | class ResNet(nn.Module): 263 | 264 | def __init__(self, num_classes, block, layers, num_anchors=3): 265 | self.inplanes = 64 266 | super(ResNet, self).__init__() 267 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 268 | self.bn1 = nn.BatchNorm2d(64) 269 | self.relu = nn.ReLU(inplace=True) 270 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 271 | self.layer1 = self._make_layer(block, 64, layers[0]) 272 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 273 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 274 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 275 | 276 | if block == BasicBlock: 277 | fpn_sizes = [self.layer1[layers[0]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels, 278 | self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels] 279 | elif block == Bottleneck: 280 | fpn_sizes = [self.layer1[layers[0]-1].conv3.out_channels, self.layer2[layers[1]-1].conv3.out_channels, 281 | self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels] 282 | 283 | self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2],fpn_sizes[3]) 284 | 285 | self.context = self._make_contextlayer() 286 | 287 | self.clsHead = ClassHead_() 288 | self.bboxHead = BboxHead_() 289 | self.ldmHead = LandmarkHead_() 290 | 291 | # self.clsHead = self._make_class_head() 292 | # self.bboxHead = self._make_bbox_head() 293 | # self.ldmHead = self._make_landmark_head() 294 | 295 | self.anchors = Anchors() 296 | 297 | self.regressBoxes = RegressionTransform() 298 | 299 | self.losslayer = losses.LossLayer() 300 | 301 | self.freeze_bn() 302 | 303 | # initialize head 304 | # self.clsHead.apply(initialize_layer) 305 | # self.bboxHead.apply(initialize_layer) 306 | # self.ldmHead.apply(initialize_layer) 307 | 308 | # initialize context 309 | for layer in self.context: 310 | for m in layer.modules(): 311 | if isinstance(m, nn.Conv2d): 312 | nn.init.normal_(m.weight, std=0.01) 313 | if m.bias is not None: 314 | nn.init.constant_(m.bias, 0) 315 | if isinstance(m, nn.BatchNorm2d): 316 | nn.init.constant_(m.weight, 1) 317 | nn.init.constant_(m.bias, 0) 318 | 319 | def _make_contextlayer(self,fpn_num=5,inchannels=256): 320 | context = nn.ModuleList() 321 | for i in range(fpn_num): 322 | context.append(Context()) 323 | 324 | return context 325 | 326 | def _make_class_head(self,fpn_num=5,inchannels=512,anchor_num=3): 327 | classhead = nn.ModuleList() 328 | for i in range(fpn_num): 329 | classhead.append(ClassHead(inchannels,anchor_num)) 330 | return classhead 331 | 332 | def _make_bbox_head(self,fpn_num=5,inchannels=512,anchor_num=3): 333 | bboxhead = nn.ModuleList() 334 | for i in range(fpn_num): 335 | bboxhead.append(BboxHead(inchannels,anchor_num)) 336 | return bboxhead 337 | 338 | def _make_landmark_head(self,fpn_num=5,inchannels=512,anchor_num=3): 339 | landmarkhead = nn.ModuleList() 340 | for i in range(fpn_num): 341 | landmarkhead.append(LandmarkHead(inchannels,anchor_num)) 342 | return landmarkhead 343 | 344 | 345 | def _make_layer(self, block, planes, blocks, stride=1): 346 | downsample = None 347 | if stride != 1 or self.inplanes != planes * block.expansion: 348 | downsample = nn.Sequential( 349 | nn.Conv2d(self.inplanes, planes * block.expansion, 350 | kernel_size=1, stride=stride, bias=False), 351 | nn.BatchNorm2d(planes * block.expansion), 352 | ) 353 | 354 | layers = [] 355 | layers.append(block(self.inplanes, planes, stride, downsample)) 356 | self.inplanes = planes * block.expansion 357 | for i in range(1, blocks): 358 | layers.append(block(self.inplanes, planes)) 359 | 360 | return nn.Sequential(*layers) 361 | 362 | def freeze_bn(self): 363 | '''Freeze BatchNorm layers.''' 364 | for layer in self.modules(): 365 | if isinstance(layer, nn.BatchNorm2d): 366 | layer.eval() 367 | 368 | def freeze_first_layer(self): 369 | '''Freeze First layer''' 370 | for param in self.conv1.parameters(): 371 | param.requires_grad = False 372 | 373 | 374 | def forward(self, inputs): 375 | 376 | if self.training: 377 | img_batch, annotations = inputs 378 | else: 379 | img_batch = inputs 380 | 381 | x = self.conv1(img_batch) 382 | x = self.bn1(x) 383 | x = self.relu(x) 384 | x = self.maxpool(x) 385 | 386 | x1 = self.layer1(x) 387 | x2 = self.layer2(x1) 388 | x3 = self.layer3(x2) 389 | x4 = self.layer4(x3) 390 | 391 | features = self.fpn([x1, x2, x3, x4]) 392 | #context_features = [self.context[i](feature) for i,feature in enumerate(features)] 393 | 394 | # bbox_regressions = torch.cat([self.bboxHead[i](feature) for i,feature in enumerate(context_features)], dim=1) 395 | # ldm_regressions = torch.cat([self.ldmHead[i](feature) for i,feature in enumerate(context_features)], dim=1) 396 | # classifications = torch.cat([self.clsHead[i](feature) for i,feature in enumerate(context_features)],dim=1) 397 | 398 | bbox_regressions = torch.cat([self.bboxHead(feature) for feature in features], dim=1) 399 | ldm_regressions = torch.cat([self.ldmHead(feature) for feature in features], dim=1) 400 | classifications = torch.cat([self.clsHead(feature) for feature in features],dim=1) 401 | 402 | anchors = self.anchors(img_batch) 403 | 404 | if self.training: 405 | return self.losslayer(classifications, bbox_regressions,ldm_regressions, anchors, annotations) 406 | else: 407 | bboxes, landmarks = self.regressBoxes(anchors, bbox_regressions, ldm_regressions, img_batch) 408 | 409 | return classifications, bboxes, landmarks 410 | 411 | def resnet18(num_classes, pretrained=False, **kwargs): 412 | """Constructs a ResNet-18 model. 413 | Args: 414 | pretrained (bool): If True, returns a model pre-trained on ImageNet 415 | """ 416 | model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs) 417 | if pretrained: 418 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'], model_dir='.'), strict=False) 419 | return model 420 | 421 | 422 | def resnet34(num_classes, pretrained=False, **kwargs): 423 | """Constructs a ResNet-34 model. 424 | Args: 425 | pretrained (bool): If True, returns a model pre-trained on ImageNet 426 | """ 427 | model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs) 428 | if pretrained: 429 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'], model_dir='.'), strict=False) 430 | return model 431 | 432 | 433 | def resnet50(num_classes, pretrained=False, **kwargs): 434 | """Constructs a ResNet-50 model. 435 | Args: 436 | pretrained (bool): If True, returns a model pre-trained on ImageNet 437 | """ 438 | model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs) 439 | if pretrained: 440 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], model_dir='.'), strict=False) 441 | return model 442 | 443 | def resnet101(num_classes, pretrained=False, **kwargs): 444 | """Constructs a ResNet-101 model. 445 | Args: 446 | pretrained (bool): If True, returns a model pre-trained on ImageNet 447 | """ 448 | model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs) 449 | if pretrained: 450 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'], model_dir='.'), strict=False) 451 | return model 452 | 453 | 454 | def resnet152(num_classes, pretrained=False, **kwargs): 455 | """Constructs a ResNet-152 model. 456 | Args: 457 | pretrained (bool): If True, returns a model pre-trained on ImageNet 458 | """ 459 | model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs) 460 | if pretrained: 461 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False) 462 | return model -------------------------------------------------------------------------------- /network.torch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/network.torch -------------------------------------------------------------------------------- /out/stage_5_68_full_model_epoch_121.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/out/stage_5_68_full_model_epoch_121.pt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit_image==0.15.0 2 | numpy==1.16.4 3 | terminaltables==3.1.0 4 | torch==1.1.0 5 | tqdm==4.32.1 6 | opencv_python==4.1.0.25 7 | torchvision==0.3.0 8 | Pillow==6.2.0 9 | skimage==0.0 10 | tensorboardX==1.8 11 | -------------------------------------------------------------------------------- /test_argu.py: -------------------------------------------------------------------------------- 1 | # from dataloader import TrainDataset, collater, Resizer, PadToSquare,Color,Rotate,RandomErasing,RandomFlip,ValDataset 2 | # import torchvision.transforms as transforms 3 | # import cv2 4 | # import copy 5 | # import torch.nn.functional as F 6 | # import torch 7 | # from PIL import Image 8 | # import numpy as np 9 | # import os 10 | # import skimage 11 | 12 | 13 | 14 | # dataset_train = ValDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(640),PadToSquare()])) 15 | # list__=dataset_train[99] 16 | # img=np.array(list__['img']) 17 | # print(img.shape) 18 | # # img = skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/\ 19 | # # CelebA/Img/img_celeba.7z/img_celeba/101299.jpg") 20 | 21 | # box=np.array(list__['annot'])[0] 22 | 23 | 24 | # img=cv2.circle(img,(int(box[0]),int(box[1])),radius=1,color=(0,255,0),thickness=10) 25 | # img=cv2.circle(img,(int(box[2]),int(box[3])),radius=1,color=(255,0,0),thickness=10) 26 | # img=cv2.rectangle(img,(int(box[0]),int(box[1])),(int(box[2]),int(box[3])),(0,0,255),thickness=2) 27 | 28 | # for i in range(4,140,2): 29 | # try: 30 | # if(i>=100): 31 | # img=cv2.circle(img,(int(box[i]),int(box[i+1])),radius=1,color=(255,255,255),thickness=2) 32 | # else: 33 | # img=cv2.circle(img,(int(box[i]),int(box[i+1])),radius=1,color=(0,0,255),thickness=2) 34 | # # img=cv2.circle(img,(int(box[i+2]),int(box[i+3])),radius=1,color=(255,0,0),thickness=2) 35 | # # img=cv2.circle(img,(int(box[i+4]),int(box[i+5])),radius=1,color=(0,255,0),thickness=2) 36 | # # img=cv2.circle(img,(int(box[i+6]),int(box[i+7])),radius=1,color=(255,255,0),thickness=2) 37 | # except: 38 | # break 39 | # # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 40 | # cv2.imwrite('sdfas33df.jpg',img) 41 | 42 | 43 | # import torch 44 | # from torch import nn 45 | 46 | 47 | # # torch.log and math.log is e based 48 | # class AdaptiveWingLoss(nn.Module): 49 | # def __init__(self, omega=14, theta=0.5, epsilon=1, alpha=2.1): 50 | # super(AdaptiveWingLoss, self).__init__() 51 | # self.omega = omega 52 | # self.theta = theta 53 | # self.epsilon = epsilon 54 | # self.alpha = alpha 55 | 56 | # def forward(self, pred, target): 57 | # ''' 58 | # :param pred: BxNxHxH 59 | # :param target: BxNxHxH 60 | # :return: 61 | # ''' 62 | 63 | # y = target 64 | # y_hat = pred 65 | # delta_y = (y - y_hat).abs() 66 | # delta_y1 = delta_y[delta_y < self.theta] 67 | # delta_y2 = delta_y[delta_y >= self.theta] 68 | # y1 = y[delta_y < self.theta] 69 | # y2 = y[delta_y >= self.theta] 70 | # loss1 = self.omega * torch.log(1 + torch.pow(delta_y1 / self.omega, self.alpha - y1)) 71 | # A = self.omega * (1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))) * (self.alpha - y2) * ( 72 | # torch.pow(self.theta / self.epsilon, self.alpha - y2 - 1)) * (1 / self.epsilon) 73 | # C = self.theta * A - self.omega * torch.log(1 + torch.pow(self.theta / self.epsilon, self.alpha - y2)) 74 | # loss2 = A * delta_y2 - C 75 | # return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2)) 76 | 77 | # if __name__ == "__main__": 78 | # loss_func = AdaptiveWingLoss() 79 | # y = torch.rand(3,136) 80 | # y_hat = torch.rand(3,136) 81 | # print(y_hat) 82 | # y_hat.requires_grad_(True) 83 | # loss = loss_func(y_hat, y) 84 | # loss.backward() 85 | # print(loss) 86 | 87 | 88 | import torch 89 | import math 90 | import torch.nn as nn 91 | class WingLoss(nn.Module): 92 | def __init__(self, omega=1, epsilon=2): 93 | super(WingLoss, self).__init__() 94 | self.omega = omega 95 | self.epsilon = epsilon 96 | 97 | def forward(self, pred, target): 98 | y = target 99 | y_hat = pred 100 | delta_y = (y - y_hat).abs() 101 | print(delta_y.shape) 102 | delta_y1 = delta_y[delta_y < self.omega] 103 | delta_y2 = delta_y[delta_y >= self.omega] 104 | print(delta_y2) 105 | sdf 106 | loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon) 107 | C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon) 108 | loss2 = delta_y2 - C 109 | return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2)) 110 | aaa=WingLoss() 111 | a=torch.rand(1,136)*3 112 | b=torch.rand(1,136) 113 | print(aaa(a,b)) 114 | -------------------------------------------------------------------------------- /torchvision_model.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | # import torch.nn as nn 3 | # import torchvision.models.detection.backbone_utils as backbone_utils 4 | # import torchvision.models.resnet as resnet 5 | # import torchvision.models._utils as _utils 6 | # import torch.nn.functional as F 7 | # from collections import OrderedDict 8 | # from anchors import Anchors 9 | # from utils import RegressionTransform 10 | # import losses 11 | # from mobile import mobileV1 12 | 13 | # class ContextModule(nn.Module): 14 | # def __init__(self,in_channels=256): 15 | # super(ContextModule,self).__init__() 16 | # self.det_conv1 = nn.Sequential( 17 | # nn.Conv2d(in_channels,in_channels,kernel_size=3,stride=1,padding=1), 18 | # nn.BatchNorm2d(in_channels) 19 | # ) 20 | # self.det_context_conv1 = nn.Sequential( 21 | # nn.Conv2d(in_channels,in_channels//2,kernel_size=3,stride=1,padding=1), 22 | # nn.BatchNorm2d(in_channels//2), 23 | # nn.ReLU(inplace=True) 24 | # ) 25 | # self.det_context_conv2 = nn.Sequential( 26 | # nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1), 27 | # nn.BatchNorm2d(in_channels//2) 28 | # ) 29 | # self.det_context_conv3_1 = nn.Sequential( 30 | # nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1), 31 | # nn.BatchNorm2d(in_channels//2), 32 | # nn.ReLU(inplace=True) 33 | # ) 34 | # self.det_context_conv3_2 = nn.Sequential( 35 | # nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1), 36 | # nn.BatchNorm2d(in_channels//2) 37 | # ) 38 | # self.det_concat_relu = nn.ReLU(inplace=True) 39 | 40 | # def forward(self,x): 41 | # x1 = self.det_conv1(x) 42 | # x_ = self.det_context_conv1(x) 43 | # x2 = self.det_context_conv2(x_) 44 | # x3_ = self.det_context_conv3_1(x_) 45 | # x3 = self.det_context_conv3_2(x3_) 46 | 47 | # out = torch.cat((x1,x2,x3),1) 48 | # act_out = self.det_concat_relu(out) 49 | 50 | # return act_out 51 | 52 | # class FeaturePyramidNetwork(nn.Module): 53 | # def __init__(self,in_channels_list,out_channels): 54 | # super(FeaturePyramidNetwork,self).__init__() 55 | # self.lateral_blocks = nn.ModuleList() 56 | # self.context_blocks = nn.ModuleList() 57 | # self.aggr_blocks = nn.ModuleList() 58 | # for i, in_channels in enumerate(in_channels_list): 59 | # if in_channels == 0: 60 | # continue 61 | # lateral_block_module = nn.Sequential( 62 | # nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=1,padding=0), 63 | # nn.BatchNorm2d(out_channels), 64 | # nn.ReLU(inplace=True) 65 | # ) 66 | # aggr_block_module = nn.Sequential( 67 | # nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=1,padding=1), 68 | # nn.BatchNorm2d(out_channels), 69 | # nn.ReLU(inplace=True) 70 | # ) 71 | # context_block_module = ContextModule(out_channels) 72 | # self.lateral_blocks.append(lateral_block_module) 73 | # self.context_blocks.append(context_block_module) 74 | # if i > 0 : 75 | # self.aggr_blocks.append(aggr_block_module) 76 | 77 | # # initialize params of fpn layers 78 | # for m in self.modules(): 79 | # if isinstance(m,nn.Conv2d): 80 | # nn.init.kaiming_uniform_(m.weight, a=1) 81 | # nn.init.constant_(m.bias, 0) 82 | 83 | # def forward(self,x): 84 | # names = list(x.keys()) 85 | # x = list(x.values()) 86 | 87 | # last_inner = self.lateral_blocks[-1](x[-1]) 88 | # results = [] 89 | # results.append(self.context_blocks[-1](last_inner)) 90 | # for feature, lateral_block, context_block, aggr_block in zip( 91 | # x[:-1][::-1], self.lateral_blocks[:-1][::-1], self.context_blocks[:-1][::-1], self.aggr_blocks[::-1] 92 | # ): 93 | # if not lateral_block: 94 | # continue 95 | # lateral_feature = lateral_block(feature) 96 | # feat_shape = lateral_feature.shape[-2:] 97 | # inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") 98 | # last_inner = lateral_feature + inner_top_down 99 | # last_inner = aggr_block(last_inner) 100 | # results.insert(0, context_block(last_inner)) 101 | 102 | # # make it back an OrderedDict 103 | # out = OrderedDict([(k, v) for k, v in zip(names, results)]) 104 | 105 | # return out 106 | 107 | # class ClassHead(nn.Module): 108 | # def __init__(self,inchannels=64,num_anchors=3): 109 | # super(ClassHead,self).__init__() 110 | # self.num_anchors = num_anchors 111 | # self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0) 112 | # self.output_act = nn.LogSoftmax(dim=-1) 113 | 114 | # def forward(self,x): 115 | # out = self.conv1x1(x) 116 | # out = out.permute(0,2,3,1) 117 | # b, h, w, c = out.shape 118 | # out = out.view(b, h, w, self.num_anchors, 2) 119 | # out = self.output_act(out) 120 | 121 | # return out.contiguous().view(out.shape[0], -1, 2) 122 | 123 | # class BboxHead(nn.Module): 124 | # def __init__(self,inchannels=64,num_anchors=3): 125 | # super(BboxHead,self).__init__() 126 | # self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0) 127 | 128 | # def forward(self,x): 129 | # out = self.conv1x1(x) 130 | # out = out.permute(0,2,3,1) 131 | 132 | # return out.contiguous().view(out.shape[0], -1, 4) 133 | 134 | # class LandmarkHead(nn.Module): 135 | # def __init__(self,inchannels=64,num_anchors=3): 136 | # super(LandmarkHead,self).__init__() 137 | # self.conv1x1 = nn.Conv2d(inchannels,num_anchors*136,kernel_size=(1,1),stride=1,padding=0) 138 | 139 | # def forward(self,x): 140 | # out = self.conv1x1(x) 141 | # out = out.permute(0,2,3,1) 142 | 143 | # return out.contiguous().view(out.shape[0], -1, 136) 144 | 145 | # class RetinaFace(nn.Module): 146 | # def __init__(self,backbone,return_layers,anchor_nums=3): 147 | # super(RetinaFace,self).__init__() 148 | # # if backbone_name == 'resnet50': 149 | # # self.backbone = resnet.resnet50(pretrained) 150 | # # self.backbone = resnet.__dict__[backbone_name](pretrained=pretrained) 151 | # # self.return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} 152 | # assert backbone,'Backbone can not be none!' 153 | # assert len(return_layers)>0,'There must be at least one return layers' 154 | # self.body = mobileV1() 155 | # in_channels_stage2 = 32 156 | # # in_channels_stage2 = 64 157 | # in_channels_list = [ 158 | # #in_channels_stage2, 159 | # in_channels_stage2 * 2, 160 | # in_channels_stage2 * 4, 161 | # in_channels_stage2 * 8, 162 | # ] 163 | # out_channels = 32 164 | # self.fpn = FeaturePyramidNetwork(in_channels_list,out_channels) 165 | # # self.ClassHead = ClassHead() 166 | # # self.BboxHead = BboxHead() 167 | # # self.LandmarkHead = LandmarkHead() 168 | # self.ClassHead = self._make_class_head() 169 | # self.BboxHead = self._make_bbox_head() 170 | # self.LandmarkHead = self._make_landmark_head() 171 | # self.anchors = Anchors() 172 | # self.regressBoxes = RegressionTransform() 173 | # self.losslayer = losses.LossLayer() 174 | 175 | # def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=3): 176 | # classhead = nn.ModuleList() 177 | # for i in range(fpn_num): 178 | # classhead.append(ClassHead(inchannels,anchor_num)) 179 | # return classhead 180 | 181 | # def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=3): 182 | # bboxhead = nn.ModuleList() 183 | # for i in range(fpn_num): 184 | # bboxhead.append(BboxHead(inchannels,anchor_num)) 185 | # return bboxhead 186 | 187 | # def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=3): 188 | # landmarkhead = nn.ModuleList() 189 | # for i in range(fpn_num): 190 | # landmarkhead.append(LandmarkHead(inchannels,anchor_num)) 191 | # return landmarkhead 192 | 193 | # def freeze_bn(self): 194 | # '''Freeze BatchNorm layers.''' 195 | # for layer in self.modules(): 196 | # if isinstance(layer, nn.BatchNorm2d): 197 | # layer.eval() 198 | 199 | # def forward(self,inputs): 200 | # if self.training: 201 | # img_batch, annotations = inputs 202 | # else: 203 | # img_batch = inputs 204 | 205 | # out = self.body(img_batch) 206 | # features = self.fpn(out) 207 | 208 | # # bbox_regressions = torch.cat([self.BboxHead(feature) for feature in features.values()], dim=1) 209 | # # ldm_regressions = torch.cat([self.LandmarkHead(feature) for feature in features.values()], dim=1) 210 | # # classifications = torch.cat([self.ClassHead(feature) for feature in features.values()],dim=1) 211 | # bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features.values())], dim=1) 212 | # # ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features.values())], dim=1) 213 | # classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features.values())],dim=1) 214 | 215 | # anchors = self.anchors(img_batch) 216 | 217 | # if self.training: 218 | # return self.losslayer(classifications, bbox_regressions, anchors, annotations) 219 | # else: 220 | # bboxes = self.regressBoxes(anchors, bbox_regressions, img_batch) 221 | 222 | # return classifications, bboxes 223 | 224 | 225 | # def create_retinaface(return_layers,backbone_name='resnet50',anchors_num=3,pretrained=True): 226 | # # backbone = resnet.__dict__[backbone_name](pretrained=pretrained) 227 | # backbone=1 228 | # # freeze layer1 229 | # # for name, parameter in backbone.named_parameters(): 230 | # # # if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: 231 | # # # parameter.requires_grad_(False) 232 | # # if name == 'conv1.weight': 233 | # # # print('freeze first conv layer...') 234 | # # parameter.requires_grad_(False) 235 | 236 | # model = RetinaFace(backbone,return_layers,anchor_nums=3) 237 | 238 | # return model 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | import torch 256 | import torch.nn as nn 257 | import torchvision.models.detection.backbone_utils as backbone_utils 258 | import torchvision.models.resnet as resnet 259 | import torchvision.models._utils as _utils 260 | import torch.nn.functional as F 261 | from collections import OrderedDict 262 | from anchors import Anchors 263 | from utils import RegressionTransform 264 | import losses 265 | from mobile import mobileV1 266 | 267 | class ContextModule(nn.Module): 268 | def __init__(self,in_channels=256): 269 | super(ContextModule,self).__init__() 270 | self.det_conv1 = nn.Sequential( 271 | nn.Conv2d(in_channels,in_channels,kernel_size=3,stride=1,padding=1), 272 | nn.BatchNorm2d(in_channels) 273 | ) 274 | self.det_context_conv1 = nn.Sequential( 275 | nn.Conv2d(in_channels,in_channels//2,kernel_size=3,stride=1,padding=1), 276 | nn.BatchNorm2d(in_channels//2), 277 | nn.ReLU(inplace=True) 278 | ) 279 | self.det_context_conv2 = nn.Sequential( 280 | nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1), 281 | nn.BatchNorm2d(in_channels//2) 282 | ) 283 | self.det_context_conv3_1 = nn.Sequential( 284 | nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1), 285 | nn.BatchNorm2d(in_channels//2), 286 | nn.ReLU(inplace=True) 287 | ) 288 | self.det_context_conv3_2 = nn.Sequential( 289 | nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1), 290 | nn.BatchNorm2d(in_channels//2) 291 | ) 292 | self.det_concat_relu = nn.ReLU(inplace=True) 293 | 294 | def forward(self,x): 295 | x1 = self.det_conv1(x) 296 | x_ = self.det_context_conv1(x) 297 | x2 = self.det_context_conv2(x_) 298 | x3_ = self.det_context_conv3_1(x_) 299 | x3 = self.det_context_conv3_2(x3_) 300 | 301 | out = torch.cat((x1,x2,x3),1) 302 | act_out = self.det_concat_relu(out) 303 | 304 | return act_out 305 | 306 | class FeaturePyramidNetwork(nn.Module): 307 | def __init__(self,in_channels_list,out_channels): 308 | super(FeaturePyramidNetwork,self).__init__() 309 | self.lateral_blocks = nn.ModuleList() 310 | self.context_blocks = nn.ModuleList() 311 | self.aggr_blocks = nn.ModuleList() 312 | for i, in_channels in enumerate(in_channels_list): 313 | if in_channels == 0: 314 | continue 315 | lateral_block_module = nn.Sequential( 316 | nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=1,padding=0), 317 | nn.BatchNorm2d(out_channels), 318 | nn.ReLU(inplace=True) 319 | ) 320 | aggr_block_module = nn.Sequential( 321 | nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=1,padding=1), 322 | nn.BatchNorm2d(out_channels), 323 | nn.ReLU(inplace=True) 324 | ) 325 | context_block_module = ContextModule(out_channels) 326 | self.lateral_blocks.append(lateral_block_module) 327 | self.context_blocks.append(context_block_module) 328 | if i > 0 : 329 | self.aggr_blocks.append(aggr_block_module) 330 | 331 | # initialize params of fpn layers 332 | for m in self.modules(): 333 | if isinstance(m,nn.Conv2d): 334 | nn.init.kaiming_uniform_(m.weight, a=1) 335 | nn.init.constant_(m.bias, 0) 336 | 337 | def forward(self,x): 338 | names = list(x.keys()) 339 | x = list(x.values()) 340 | 341 | last_inner = self.lateral_blocks[-1](x[-1]) 342 | results = [] 343 | results.append(self.context_blocks[-1](last_inner)) 344 | for feature, lateral_block, context_block, aggr_block in zip( 345 | x[:-1][::-1], self.lateral_blocks[:-1][::-1], self.context_blocks[:-1][::-1], self.aggr_blocks[::-1] 346 | ): 347 | if not lateral_block: 348 | continue 349 | lateral_feature = lateral_block(feature) 350 | feat_shape = lateral_feature.shape[-2:] 351 | inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") 352 | last_inner = lateral_feature + inner_top_down 353 | last_inner = aggr_block(last_inner) 354 | results.insert(0, context_block(last_inner)) 355 | 356 | # make it back an OrderedDict 357 | out = OrderedDict([(k, v) for k, v in zip(names, results)]) 358 | 359 | return out 360 | 361 | class ClassHead(nn.Module): 362 | def __init__(self,inchannels=64,num_anchors=3): 363 | super(ClassHead,self).__init__() 364 | self.num_anchors = num_anchors 365 | self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0) 366 | self.output_act = nn.LogSoftmax(dim=-1) 367 | 368 | def forward(self,x): 369 | out = self.conv1x1(x) 370 | out = out.permute(0,2,3,1) 371 | b, h, w, c = out.shape 372 | out = out.view(b, h, w, self.num_anchors, 2) 373 | out = self.output_act(out) 374 | 375 | return out.contiguous().view(out.shape[0], -1, 2) 376 | 377 | class BboxHead(nn.Module): 378 | def __init__(self,inchannels=64,num_anchors=3): 379 | super(BboxHead,self).__init__() 380 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0) 381 | 382 | def forward(self,x): 383 | out = self.conv1x1(x) 384 | out = out.permute(0,2,3,1) 385 | 386 | return out.contiguous().view(out.shape[0], -1, 4) 387 | 388 | class LandmarkHead(nn.Module): 389 | def __init__(self,inchannels=64,num_anchors=3): 390 | super(LandmarkHead,self).__init__() 391 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*136,kernel_size=(1,1),stride=1,padding=0) 392 | 393 | def forward(self,x): 394 | out = self.conv1x1(x) 395 | out = out.permute(0,2,3,1) 396 | 397 | return out.contiguous().view(out.shape[0], -1, 136) 398 | 399 | class RetinaFace(nn.Module): 400 | def __init__(self,backbone,return_layers,anchor_nums=3): 401 | super(RetinaFace,self).__init__() 402 | # if backbone_name == 'resnet50': 403 | # self.backbone = resnet.resnet50(pretrained) 404 | # self.backbone = resnet.__dict__[backbone_name](pretrained=pretrained) 405 | # self.return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} 406 | assert backbone,'Backbone can not be none!' 407 | assert len(return_layers)>0,'There must be at least one return layers' 408 | self.body = mobileV1() 409 | in_channels_stage2 = 32 410 | # in_channels_stage2 = 64 411 | in_channels_list = [ 412 | #in_channels_stage2, 413 | in_channels_stage2 * 2, 414 | in_channels_stage2 * 4, 415 | in_channels_stage2 * 8, 416 | ] 417 | out_channels = 32 418 | self.fpn = FeaturePyramidNetwork(in_channels_list,out_channels) 419 | # self.ClassHead = ClassHead() 420 | # self.BboxHead = BboxHead() 421 | # self.LandmarkHead = LandmarkHead() 422 | self.ClassHead = self._make_class_head() 423 | self.BboxHead = self._make_bbox_head() 424 | self.LandmarkHead = self._make_landmark_head() 425 | self.anchors = Anchors() 426 | self.regressBoxes = RegressionTransform() 427 | self.losslayer = losses.LossLayer() 428 | 429 | def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=3): 430 | classhead = nn.ModuleList() 431 | for i in range(fpn_num): 432 | classhead.append(ClassHead(inchannels,anchor_num)) 433 | return classhead 434 | 435 | def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=3): 436 | bboxhead = nn.ModuleList() 437 | for i in range(fpn_num): 438 | bboxhead.append(BboxHead(inchannels,anchor_num)) 439 | return bboxhead 440 | 441 | def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=3): 442 | landmarkhead = nn.ModuleList() 443 | for i in range(fpn_num): 444 | landmarkhead.append(LandmarkHead(inchannels,anchor_num)) 445 | return landmarkhead 446 | 447 | def freeze_bn(self): 448 | for layer in self.modules(): 449 | if isinstance(layer, nn.BatchNorm2d): 450 | layer.eval() 451 | 452 | def forward(self,inputs): 453 | if self.training: 454 | img_batch, annotations = inputs 455 | else: 456 | img_batch = inputs 457 | 458 | out = self.body(img_batch) 459 | features = self.fpn(out) 460 | 461 | # bbox_regressions = torch.cat([self.BboxHead(feature) for feature in features.values()], dim=1) 462 | # ldm_regressions = torch.cat([self.LandmarkHead(feature) for feature in features.values()], dim=1) 463 | # classifications = torch.cat([self.ClassHead(feature) for feature in features.values()],dim=1) 464 | bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features.values())], dim=1) 465 | ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features.values())], dim=1) 466 | classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features.values())],dim=1) 467 | 468 | anchors = self.anchors(img_batch) 469 | 470 | if self.training: 471 | return self.losslayer(classifications, bbox_regressions,ldm_regressions, anchors, annotations) 472 | else: 473 | bboxes, landmarks = self.regressBoxes(anchors, bbox_regressions, ldm_regressions, img_batch) 474 | 475 | return classifications, bboxes, landmarks 476 | 477 | 478 | def create_retinaface(return_layers,backbone_name='resnet50',anchors_num=3,pretrained=True): 479 | # backbone = resnet.__dict__[backbone_name](pretrained=pretrained) 480 | backbone=1 481 | # freeze layer1 482 | # for name, parameter in backbone.named_parameters(): 483 | # # if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: 484 | # # parameter.requires_grad_(False) 485 | # if name == 'conv1.weight': 486 | # # print('freeze first conv layer...') 487 | # parameter.requires_grad_(False) 488 | 489 | model = RetinaFace(backbone,return_layers,anchor_nums=3) 490 | 491 | return model 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from torchvision import datasets, models, transforms 7 | from dataloader import TrainDataset, collater, Resizer, PadToSquare,Color,Rotate,RandomErasing,RandomFlip, ValDataset 8 | from torch.utils.data import Dataset, DataLoader, random_split 9 | from terminaltables import AsciiTable, DoubleTable, SingleTable 10 | # from tensorboardX import SummaryWriter 11 | from torch.optim import lr_scheduler 12 | import torch.distributed as dist 13 | import eval_widerface 14 | import torchvision 15 | import model 16 | import os 17 | from torch.utils.data.distributed import DistributedSampler 18 | import torchvision_model 19 | 20 | 21 | def get_args(): 22 | parser = argparse.ArgumentParser(description="Train program for retinaface.") 23 | parser.add_argument('--data_path', type=str,default='./widerface' ,help='Path for dataset,default WIDERFACE') 24 | parser.add_argument('--batch', type=int, default=32, help='Batch size') 25 | parser.add_argument('--epochs', type=int, default=121, help='Max training epochs') 26 | parser.add_argument('--shuffle', type=bool, default=True, help='Shuffle dataset or not') 27 | parser.add_argument('--img_size', type=int, default=640, help='Input image size') 28 | parser.add_argument('--verbose', type=int, default=20, help='Log verbose') 29 | parser.add_argument('--save_step', type=int, default=10, help='Save every save_step epochs') 30 | parser.add_argument('--eval_step', type=int, default=10, help='Evaluate every eval_step epochs') 31 | parser.add_argument('--save_path', type=str, default='./out', help='Model save path') 32 | parser.add_argument('--training', help='the training mode or not ( True for Training, False for eval', type=bool, default=True) 33 | args = parser.parse_args() 34 | print(args) 35 | 36 | return args 37 | 38 | 39 | def main(): 40 | args = get_args() 41 | if not os.path.exists(args.save_path): 42 | os.mkdir(args.save_path) 43 | log_path = os.path.join(args.save_path,'log') 44 | if not os.path.exists(log_path): 45 | os.mkdir(log_path) 46 | 47 | 48 | data_path = args.data_path 49 | # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),()])) 50 | dataset_train = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([RandomErasing(),RandomFlip(),Rotate(),Color(),Resizer(),PadToSquare()])) 51 | # dataset_train = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(),PadToSquare()])) 52 | dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=args.batch, collate_fn=collater,shuffle=True) 53 | # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()])) 54 | dataset_val = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(640),PadToSquare()])) 55 | dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater) 56 | 57 | total_batch = len(dataloader_train) 58 | 59 | 60 | 61 | # Create torchvision model 62 | return_layers = {'layer2':1,'layer3':2,'layer4':3} 63 | retinaface = torchvision_model.create_retinaface(return_layers) 64 | retinaface_ = retinaface.cuda() 65 | retinaface = torch.nn.DataParallel(retinaface_).cuda() 66 | retinaface.training = True 67 | base_lr=1e-7 68 | 69 | # pre_train = torch.load('network.torch') 70 | # cur=retinaface.state_dict() 71 | # for k, v in cur.items(): 72 | # if k[12:] in pre_train: 73 | # print(k[12:]) 74 | # cur[k]=pre_train[k[12:]] 75 | # retinaface.load_state_dict(cur) 76 | retinaface.load_state_dict(torch.load("/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_121.pt")) 77 | lr=base_lr 78 | # optimizer=torch.optim.Adam(retinaface.parameters(),lr=lr) 79 | # fix encoder 80 | for name, value in retinaface.named_parameters(): 81 | if 'Landmark' in name: 82 | value.requires_grad = False 83 | lr_cos = lambda n: 0.5 * (1 + np.cos((n) / (args.epochs) * np.pi)) * base_lr 84 | params = filter(lambda p: p.requires_grad==True, retinaface.parameters()) 85 | body=filter(lambda p: p.requires_grad==False, retinaface.parameters()) 86 | optimizer = torch.optim.Adam([ 87 | {'params': body, 'lr': lr*3}, 88 | {'params': params, 'lr': lr}] 89 | ) 90 | #evaluation the current model 91 | if (args.training==False): 92 | print("not pretrain") 93 | recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface) 94 | print('Recall:',recall) 95 | print('Precision:',precision) 96 | print("landmark: ",str(landmakr)) 97 | print("miss: "+ str(miss)) 98 | return 99 | ## 100 | print('Start to train.') 101 | 102 | epoch_loss = [] 103 | iteration = 0 104 | retinaface=retinaface.cuda() 105 | for epoch in range(args.epochs): 106 | lr=lr_cos(epoch) 107 | 108 | retinaface.train() 109 | 110 | # Training 111 | for iter_num,data in enumerate(dataloader_train): 112 | optimizer.zero_grad() 113 | classification_loss, bbox_regression_loss,ldm_regression_loss = retinaface([data['img'].cuda().float(), data['annot']]) 114 | classification_loss = classification_loss.mean() 115 | bbox_regression_loss = bbox_regression_loss.mean() 116 | ldm_regression_loss = ldm_regression_loss.mean() 117 | 118 | # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss 119 | loss = classification_loss + 0.15*bbox_regression_loss + 0.25*ldm_regression_loss 120 | 121 | loss.backward() 122 | optimizer.step() 123 | 124 | if iter_num % args.verbose == 0: 125 | log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, args.epochs, iter_num, total_batch) 126 | table_data = [ 127 | ['loss name','value'], 128 | ['total_loss',str(loss.item())], 129 | ['classification',str(classification_loss.item())], 130 | ['bbox',str(bbox_regression_loss.item())], 131 | ['landmarks',str(ldm_regression_loss.item())] 132 | ] 133 | table = AsciiTable(table_data) 134 | log_str +=table.table 135 | print(log_str) 136 | iteration +=1 137 | 138 | 139 | # Eval 140 | if epoch % args.eval_step == 0: 141 | with open("aaa.txt", 'a') as f: 142 | f.write('-------- RetinaFace Pytorch --------'+'\n') 143 | f.write ('Evaluating epoch {}'.format(epoch)+'\n') 144 | f.write('total_loss:'+str(loss.item())+'\n') 145 | f.write('classification'+str(classification_loss.item())+'\n') 146 | f.write('bbox'+str(bbox_regression_loss.item())+'\n') 147 | f.write('landmarks'+str(ldm_regression_loss.item())+'\n') 148 | 149 | f.close() 150 | print('-------- RetinaFace Pytorch --------') 151 | print ('Evaluating epoch {}'.format(epoch)) 152 | recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface) 153 | print('Recall:',recall) 154 | print('Precision:',precision) 155 | print("landmark: ",str(landmakr)) 156 | print("miss: "+ str(miss)) 157 | 158 | with open("aaa.txt", 'a') as f: 159 | f.write('-------- RetinaFace Pytorch --------(not pretrain)'+'\n') 160 | f.write ('Evaluating epoch {}'.format(epoch)+'\n') 161 | f.write('Recall:'+str(recall)+'\n') 162 | f.write('Precision:'+str(precision)+'\n') 163 | f.write("landmark: "+str(landmakr)+'\n') 164 | f.write("miss: "+ str(miss)+'\n') 165 | f.close() 166 | # Save model 167 | if (epoch) % args.save_step == 0: 168 | torch.save(retinaface.state_dict(), args.save_path + '/stage_5_68_full_model_epoch_{}.pt'.format(epoch + 1)) 169 | 170 | # writer.close() 171 | 172 | 173 | if __name__=='__main__': 174 | main() 175 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | # import torch.nn as nn 3 | # import numpy as np 4 | 5 | # def conv3x3(in_planes, out_planes, stride=1): 6 | # """3x3 convolution with padding""" 7 | # return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 8 | # padding=1, bias=False) 9 | 10 | # class BasicBlock(nn.Module): 11 | # expansion = 1 12 | 13 | # def __init__(self, inplanes, planes, stride=1, downsample=None): 14 | # super(BasicBlock, self).__init__() 15 | # self.conv1 = conv3x3(inplanes, planes, stride) 16 | # self.bn1 = nn.BatchNorm2d(planes) 17 | # self.relu = nn.ReLU(inplace=True) 18 | # self.conv2 = conv3x3(planes, planes) 19 | # self.bn2 = nn.BatchNorm2d(planes) 20 | # self.downsample = downsample 21 | # self.stride = stride 22 | 23 | # def forward(self, x): 24 | # residual = x 25 | 26 | # out = self.conv1(x) 27 | # out = self.bn1(out) 28 | # out = self.relu(out) 29 | 30 | # out = self.conv2(out) 31 | # out = self.bn2(out) 32 | 33 | # if self.downsample is not None: 34 | # residual = self.downsample(x) 35 | 36 | # out += residual 37 | # out = self.relu(out) 38 | 39 | # return out 40 | 41 | 42 | # class Bottleneck(nn.Module): 43 | # expansion = 4 44 | 45 | # def __init__(self, inplanes, planes, stride=1, downsample=None): 46 | # super(Bottleneck, self).__init__() 47 | # self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 48 | # self.bn1 = nn.BatchNorm2d(planes) 49 | # self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 50 | # padding=1, bias=False) 51 | # self.bn2 = nn.BatchNorm2d(planes) 52 | # self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 53 | # self.bn3 = nn.BatchNorm2d(planes * 4) 54 | # self.relu = nn.ReLU(inplace=True) 55 | # self.downsample = downsample 56 | # self.stride = stride 57 | 58 | # def forward(self, x): 59 | # residual = x 60 | 61 | # out = self.conv1(x) 62 | # out = self.bn1(out) 63 | # out = self.relu(out) 64 | 65 | # out = self.conv2(out) 66 | # out = self.bn2(out) 67 | # out = self.relu(out) 68 | 69 | # out = self.conv3(out) 70 | # out = self.bn3(out) 71 | 72 | # if self.downsample is not None: 73 | # residual = self.downsample(x) 74 | 75 | # out += residual 76 | # out = self.relu(out) 77 | 78 | # return out 79 | 80 | # class RegressionTransform(nn.Module): 81 | # def __init__(self,mean=None,std_box=None,std_ldm=None): 82 | # super(RegressionTransform, self).__init__() 83 | # if mean is None: 84 | # #self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda() 85 | # self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)) 86 | # else: 87 | # self.mean = mean 88 | # if std_box is None: 89 | # #self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda() 90 | # self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)) 91 | # else: 92 | # self.std_box = std_box 93 | # if std_ldm is None: 94 | # #self.std_ldm = (torch.ones(1,10) * 0.1).cuda() 95 | # self.std_ldm = (torch.ones(1,136) * 0.1) 96 | 97 | # def forward(self,anchors,bbox_deltas,img): 98 | # widths = anchors[:, :, 2] - anchors[:, :, 0] 99 | # heights = anchors[:, :, 3] - anchors[:, :, 1] 100 | # ctr_x = anchors[:, :, 0] + 0.5 * widths 101 | # ctr_y = anchors[:, :, 1] + 0.5 * heights 102 | 103 | # # Rescale 104 | # # ldm_deltas = ldm_deltas * self.std_ldm.cuda() 105 | # bbox_deltas = bbox_deltas * self.std_box.cuda() 106 | 107 | # bbox_dx = bbox_deltas[:, :, 0] 108 | # bbox_dy = bbox_deltas[:, :, 1] 109 | # bbox_dw = bbox_deltas[:, :, 2] 110 | # bbox_dh = bbox_deltas[:, :, 3] 111 | 112 | # # get predicted boxes 113 | # pred_ctr_x = ctr_x + bbox_dx * widths 114 | # pred_ctr_y = ctr_y + bbox_dy * heights 115 | # pred_w = torch.exp(bbox_dw) * widths 116 | # pred_h = torch.exp(bbox_dh) * heights 117 | 118 | # pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w 119 | # pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h 120 | # pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w 121 | # pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h 122 | 123 | 124 | # # pred_landmarks=[] 125 | 126 | # # for i in range(0,136): 127 | # # if i %2==0: 128 | # # candidate=ctr_x + ldm_deltas[:,:,i] * widths 129 | # # else: 130 | # # candidate=ctr_y + ldm_deltas[:,:,i] * heights 131 | # # pred_landmarks.append(candidate) 132 | 133 | # # # pred_landmarks=torch.stack((pred_landmarks),dim=2) 134 | # pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2) 135 | 136 | 137 | # # clip bboxes and landmarks 138 | # B,C,H,W = img.shape 139 | 140 | # pred_boxes[:,:,::2] = torch.clamp(pred_boxes[:,:,::2], min=0, max=W) 141 | # pred_boxes[:,:,1::2] = torch.clamp(pred_boxes[:,:,1::2], min=0, max=H) 142 | # # # pred_landmarks[:,:,::2] = torch.clamp(pred_landmarks[:,:,::2], min=0, max=W) 143 | # # # pred_landmarks[:,:,1::2] = torch.clamp(pred_landmarks[:,:,1::2], min=0, max=H) 144 | 145 | # # return pred_boxes, pred_landmarks 146 | # return pred_boxes 147 | 148 | 149 | # def nms(boxes,scores,iou_threshold): 150 | # boxes = boxes.cpu().numpy() 151 | # score = scores.cpu().numpy() 152 | 153 | # # coordinates of bounding boxes 154 | # start_x = boxes[:, 0] 155 | # start_y = boxes[:, 1] 156 | # end_x = boxes[:, 2] 157 | # end_y = boxes[:, 3] 158 | 159 | # # Picked bounding boxes 160 | # picked_boxes = [] 161 | # picked_score = [] 162 | 163 | # # Compute areas of bounding boxes 164 | # areas = (end_x - start_x + 1) * (end_y - start_y + 1) 165 | 166 | # # Sort by confidence score of bounding boxes 167 | # order = np.argsort(score) 168 | 169 | # # Iterate bounding boxes 170 | # while order.size > 0: 171 | # # The index of largest confidence score 172 | # index = order[-1] 173 | 174 | # # Pick the bounding box with largest confidence score 175 | # picked_boxes.append(boxes[index]) 176 | # picked_score.append(score[index]) 177 | # a=start_x[index] 178 | # b=order[:-1] 179 | # c=start_x[order[:-1]] 180 | # # Compute ordinates of intersection-over-union(IOU) 181 | # x1 = np.maximum(start_x[index], start_x[order[:-1]]) 182 | # x2 = np.minimum(end_x[index], end_x[order[:-1]]) 183 | # y1 = np.maximum(start_y[index], start_y[order[:-1]]) 184 | # y2 = np.minimum(end_y[index], end_y[order[:-1]]) 185 | 186 | # # Compute areas of intersection-over-union 187 | # w = np.maximum(0.0, x2 - x1 + 1) 188 | # h = np.maximum(0.0, y2 - y1 + 1) 189 | # intersection = w * h 190 | 191 | # # Compute the ratio between intersection and union 192 | # ratio = intersection / (areas[index] + areas[order[:-1]] - intersection) 193 | 194 | # left = np.where(ratio < iou_threshold) 195 | # order = order[left] 196 | 197 | # picked_boxes = torch.Tensor(picked_boxes) 198 | # picked_score = torch.Tensor(picked_score) 199 | # return picked_boxes, picked_score 200 | 201 | 202 | 203 | import torch 204 | import torch.nn as nn 205 | import numpy as np 206 | 207 | def conv3x3(in_planes, out_planes, stride=1): 208 | """3x3 convolution with padding""" 209 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 210 | padding=1, bias=False) 211 | 212 | class BasicBlock(nn.Module): 213 | expansion = 1 214 | 215 | def __init__(self, inplanes, planes, stride=1, downsample=None): 216 | super(BasicBlock, self).__init__() 217 | self.conv1 = conv3x3(inplanes, planes, stride) 218 | self.bn1 = nn.BatchNorm2d(planes) 219 | self.relu = nn.ReLU(inplace=True) 220 | self.conv2 = conv3x3(planes, planes) 221 | self.bn2 = nn.BatchNorm2d(planes) 222 | self.downsample = downsample 223 | self.stride = stride 224 | 225 | def forward(self, x): 226 | residual = x 227 | 228 | out = self.conv1(x) 229 | out = self.bn1(out) 230 | out = self.relu(out) 231 | 232 | out = self.conv2(out) 233 | out = self.bn2(out) 234 | 235 | if self.downsample is not None: 236 | residual = self.downsample(x) 237 | 238 | out += residual 239 | out = self.relu(out) 240 | 241 | return out 242 | 243 | 244 | class Bottleneck(nn.Module): 245 | expansion = 4 246 | 247 | def __init__(self, inplanes, planes, stride=1, downsample=None): 248 | super(Bottleneck, self).__init__() 249 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 250 | self.bn1 = nn.BatchNorm2d(planes) 251 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 252 | padding=1, bias=False) 253 | self.bn2 = nn.BatchNorm2d(planes) 254 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 255 | self.bn3 = nn.BatchNorm2d(planes * 4) 256 | self.relu = nn.ReLU(inplace=True) 257 | self.downsample = downsample 258 | self.stride = stride 259 | 260 | def forward(self, x): 261 | residual = x 262 | 263 | out = self.conv1(x) 264 | out = self.bn1(out) 265 | out = self.relu(out) 266 | 267 | out = self.conv2(out) 268 | out = self.bn2(out) 269 | out = self.relu(out) 270 | 271 | out = self.conv3(out) 272 | out = self.bn3(out) 273 | 274 | if self.downsample is not None: 275 | residual = self.downsample(x) 276 | 277 | out += residual 278 | out = self.relu(out) 279 | 280 | return out 281 | 282 | class RegressionTransform(nn.Module): 283 | def __init__(self,mean=None,std_box=None,std_ldm=None): 284 | super(RegressionTransform, self).__init__() 285 | if mean is None: 286 | #self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda() 287 | self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)) 288 | else: 289 | self.mean = mean 290 | if std_box is None: 291 | #self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda() 292 | self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)) 293 | else: 294 | self.std_box = std_box 295 | if std_ldm is None: 296 | #self.std_ldm = (torch.ones(1,10) * 0.1).cuda() 297 | self.std_ldm = (torch.ones(1,136) * 0.1) 298 | 299 | def forward(self,anchors,bbox_deltas,ldm_deltas,img): 300 | widths = anchors[:, :, 2] - anchors[:, :, 0] 301 | heights = anchors[:, :, 3] - anchors[:, :, 1] 302 | ctr_x = anchors[:, :, 0] + 0.5 * widths 303 | ctr_y = anchors[:, :, 1] + 0.5 * heights 304 | 305 | # Rescale 306 | ldm_deltas = ldm_deltas * self.std_ldm.cuda() 307 | bbox_deltas = bbox_deltas * self.std_box.cuda() 308 | 309 | bbox_dx = bbox_deltas[:, :, 0] 310 | bbox_dy = bbox_deltas[:, :, 1] 311 | bbox_dw = bbox_deltas[:, :, 2] 312 | bbox_dh = bbox_deltas[:, :, 3] 313 | 314 | # get predicted boxes 315 | pred_ctr_x = ctr_x + bbox_dx * widths 316 | pred_ctr_y = ctr_y + bbox_dy * heights 317 | pred_w = torch.exp(bbox_dw) * widths 318 | pred_h = torch.exp(bbox_dh) * heights 319 | 320 | pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w 321 | pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h 322 | pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w 323 | pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h 324 | 325 | 326 | pred_landmarks=[] 327 | 328 | for i in range(0,136): 329 | if i %2==0: 330 | candidate=ctr_x + ldm_deltas[:,:,i] * widths 331 | else: 332 | candidate=ctr_y + ldm_deltas[:,:,i] * heights 333 | pred_landmarks.append(candidate) 334 | 335 | pred_landmarks=torch.stack((pred_landmarks),dim=2) 336 | pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2) 337 | 338 | 339 | # clip bboxes and landmarks 340 | B,C,H,W = img.shape 341 | 342 | pred_boxes[:,:,::2] = torch.clamp(pred_boxes[:,:,::2], min=0, max=W) 343 | pred_boxes[:,:,1::2] = torch.clamp(pred_boxes[:,:,1::2], min=0, max=H) 344 | pred_landmarks[:,:,::2] = torch.clamp(pred_landmarks[:,:,::2], min=0, max=W) 345 | pred_landmarks[:,:,1::2] = torch.clamp(pred_landmarks[:,:,1::2], min=0, max=H) 346 | 347 | return pred_boxes, pred_landmarks 348 | 349 | 350 | def nms(boxes,scores,iou_threshold): 351 | boxes = boxes.cpu().numpy() 352 | score = scores.cpu().numpy() 353 | 354 | # coordinates of bounding boxes 355 | start_x = boxes[:, 0] 356 | start_y = boxes[:, 1] 357 | end_x = boxes[:, 2] 358 | end_y = boxes[:, 3] 359 | 360 | # Picked bounding boxes 361 | picked_boxes = [] 362 | picked_score = [] 363 | 364 | # Compute areas of bounding boxes 365 | areas = (end_x - start_x + 1) * (end_y - start_y + 1) 366 | 367 | # Sort by confidence score of bounding boxes 368 | order = np.argsort(score) 369 | 370 | # Iterate bounding boxes 371 | while order.size > 0: 372 | # The index of largest confidence score 373 | index = order[-1] 374 | 375 | # Pick the bounding box with largest confidence score 376 | picked_boxes.append(boxes[index]) 377 | picked_score.append(score[index]) 378 | a=start_x[index] 379 | b=order[:-1] 380 | c=start_x[order[:-1]] 381 | # Compute ordinates of intersection-over-union(IOU) 382 | x1 = np.maximum(start_x[index], start_x[order[:-1]]) 383 | x2 = np.minimum(end_x[index], end_x[order[:-1]]) 384 | y1 = np.maximum(start_y[index], start_y[order[:-1]]) 385 | y2 = np.minimum(end_y[index], end_y[order[:-1]]) 386 | 387 | # Compute areas of intersection-over-union 388 | w = np.maximum(0.0, x2 - x1 + 1) 389 | h = np.maximum(0.0, y2 - y1 + 1) 390 | intersection = w * h 391 | 392 | # Compute the ratio between intersection and union 393 | ratio = intersection / (areas[index] + areas[order[:-1]] - intersection) 394 | 395 | left = np.where(ratio < iou_threshold) 396 | order = order[left] 397 | 398 | picked_boxes = torch.Tensor(picked_boxes) 399 | picked_score = torch.Tensor(picked_score) 400 | return picked_boxes, picked_score 401 | 402 | 403 | -------------------------------------------------------------------------------- /video_detect.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import numpy as np 7 | import skimage 8 | from skimage import io 9 | from PIL import Image 10 | import cv2 11 | import torchvision 12 | import eval_widerface 13 | import torchvision_model 14 | import os 15 | import skimage 16 | from dataloader import ValDataset, Resizer, PadToSquare,ValDataset_CeleB 17 | from torchvision import datasets, models, transforms 18 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 19 | def pad_to_square(img, pad_value): 20 | _, h, w = img.shape 21 | dim_diff = np.abs(h - w) 22 | # (upper / left) padding and (lower / right) padding 23 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 24 | # Determine padding 25 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) 26 | # Add padding 27 | img = F.pad(img, pad, "constant", value=pad_value) 28 | 29 | return img, pad 30 | 31 | def resize(image, size): 32 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) 33 | return image 34 | 35 | def get_args(): 36 | parser = argparse.ArgumentParser(description="Detect program for retinaface.") 37 | parser.add_argument('--image_path', type=str, default='WechatIMG10.jpeg', help='Path for image to detect') 38 | parser.add_argument('--model_path', type=str, help='Path for model',default="/versa/elvishelvis/RetinaYang/out/stage_4_68_full_model_epoch_61.pt") 39 | parser.add_argument('--save_path', type=str, default='./out', help='Path for result image') 40 | parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) 41 | args = parser.parse_args() 42 | 43 | return args 44 | 45 | def main(nummmmmm): 46 | args = get_args() 47 | 48 | # Create the model 49 | # if args.depth == 18: 50 | # RetinaFace = model.resnet18(num_classes=2, pretrained=True) 51 | # elif args.depth == 34: 52 | # RetinaFace = model.resnet34(num_classes=2, pretrained=True) 53 | # elif args.depth == 50: 54 | # RetinaFace = model.resnet50(num_classes=2, pretrained=True) 55 | # elif args.depth == 101: 56 | # RetinaFace = model.resnet101(num_classes=2, pretrained=True) 57 | # elif args.depth == 152: 58 | # RetinaFace = model.resnet152(num_classes=2, pretrained=True) 59 | # else: 60 | # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') 61 | 62 | # Create torchvision model 63 | 64 | return_layers = {'layer2':1,'layer3':2,'layer4':3} 65 | RetinaFace = torchvision_model.create_retinaface(return_layers) 66 | device= torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 67 | 68 | # Load trained model 69 | retina_dict = RetinaFace.state_dict() 70 | pre_state_dict = torch.load('stage_5_68_full_model_epoch_121.pt',map_location='cpu') 71 | pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict} 72 | RetinaFace.load_state_dict(pretrained_dict) 73 | RetinaFace.to(device) 74 | 75 | import time 76 | 77 | video = cv2.VideoCapture(0) 78 | # Read image 79 | while True: 80 | start=time.time() 81 | ret, img = video.read() 82 | img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 83 | img = torch.from_numpy(img) 84 | img = img.permute(2,0,1) 85 | resized_img=img.float() 86 | # resized_img = resize(img.float(),(360,640)) 87 | # print(resized_img.shape) 88 | input_img = resized_img.float().unsqueeze(0) 89 | 90 | picked_boxes, picked_landmarks = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) 91 | # print(picked_boxes) 92 | np_img = resized_img.cpu().permute(1,2,0).numpy() 93 | np_img.astype(int) 94 | img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB) 95 | 96 | for j, boxes in enumerate(picked_boxes): 97 | if boxes is not None: 98 | for box,landmark in zip(boxes,picked_landmarks[j]): 99 | cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2) 100 | for i in range(0,136,2): 101 | cv2.circle(img,(landmark[i],landmark[i+1]),radius=1,color=(0,0,255),thickness=2) 102 | cv2.imshow('RetinaFace-Pytorch',img) 103 | print(time.time()-start) 104 | if cv2.waitKey(1) & 0xFF == ord('q'): 105 | break 106 | if __name__=='__main__': 107 | main(20) 108 | --------------------------------------------------------------------------------