├── README.md
├── anchors.py
├── assets
├── 1.jpg
├── 2.jpg
└── 3.jpg
├── dataloader.py
├── detect.py
├── down.py
├── eval_widerface.py
├── img_tester.py
├── losses.py
├── magic_convert.py
├── mnas.py
├── mobile.py
├── mobile_testing.py
├── model.py
├── network.torch
├── out
└── stage_5_68_full_model_epoch_121.pt
├── requirements.txt
├── test_argu.py
├── torchvision_model.py
├── train.py
├── utils.py
└── video_detect.py
/README.md:
--------------------------------------------------------------------------------
1 | # Retinaface-Pytorch-version
2 | ### It's not the best version of my model due to confidentiality
3 | Thanks to Alvin Yang (https://github.com/supernotman/RetinaFace_Pytorch)
4 |
5 | This is the branch for 68 landmarks detection, the pre-trained model is in ./out
6 |
7 | Working on 96 landmarks detection( refer to the other branch)
8 |
9 |

10 | 
11 | 
12 | The model also predicted the occulded part of the landmarks, can hide them if don't want them to show up.
13 |
14 |
15 |
16 |
17 | Based on RetinaFace
18 | ### current model
19 | mobileNet V1+FPN+context module+ regressor 1.6MB
20 | CPU~10FPS GPU 50FPU
21 |
22 |
23 |
24 | ### Train:( Please refer to dataloader.py to change the file location)
25 | python3 train.py -train
26 | This model use LS3D-W dataset,or change your dataset to the format of demo.pt/ demo.jpg(68*2 tensor)
27 |
28 |
29 | ### Use local camera :
30 | python3 video_detect.py ( need to delete all 'cuda()', and run locally with a CPU)
31 |
32 |
33 | ### Eval Model:
34 | python3 train.py -train False
35 |
36 | ## Todo:
37 | - [ ] Use SBR and BFLD to improve performance
38 |
39 | If you have train a model with this code, welcome to discuss with me at elvishelvis6@gmail.com
40 |
--------------------------------------------------------------------------------
/anchors.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 |
5 |
6 | class Anchors(nn.Module):
7 | def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
8 | super(Anchors, self).__init__()
9 |
10 | if pyramid_levels is None:
11 | # self.pyramid_levels = [2, 3, 4, 5, 6]
12 | self.pyramid_levels = [3, 4, 5]
13 | if strides is None:
14 | self.strides = [2 ** x for x in self.pyramid_levels]
15 | if sizes is None:
16 | # self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
17 | self.sizes = [2 ** 4.0, 2 ** 6.0, 2 ** 8.0]
18 | if ratios is None:
19 | self.ratios = np.array([1, 1, 1])
20 | if scales is None:
21 | # self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
22 | self.scales = np.array([2 ** 0, 2 ** (1/2.0) , 2 ** 1.0 ])
23 |
24 | def forward(self, image):
25 |
26 | image_shape = image.shape[2:]
27 | image_shape = np.array(image_shape)
28 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
29 |
30 | # compute anchors over all pyramid levels
31 | all_anchors = np.zeros((0, 4)).astype(np.float32)
32 |
33 | for idx, p in enumerate(self.pyramid_levels):
34 | anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
35 | shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
36 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
37 |
38 | all_anchors = np.expand_dims(all_anchors, axis=0)
39 |
40 | return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
41 |
42 | def generate_anchors(base_size=16, ratios=None, scales=None):
43 | """
44 | Generate anchor (reference) windows by enumerating aspect ratios X
45 | scales w.r.t. a reference window.
46 | """
47 |
48 | if ratios is None:
49 | ratios = np.array([1, 1, 1])
50 |
51 | if scales is None:
52 | scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
53 |
54 | num_anchors = len(scales)
55 |
56 | # initialize output anchors
57 | anchors = np.zeros((num_anchors, 4))
58 |
59 | # scale base_size
60 | anchors[:, 2:] = base_size * np.tile(scales, (2, 1)).T
61 |
62 | # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
63 | anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
64 | anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
65 |
66 | return anchors
67 |
68 | def shift(shape, stride, anchors):
69 | shift_x = (np.arange(0, shape[1]) + 0.5) * stride
70 | shift_y = (np.arange(0, shape[0]) + 0.5) * stride
71 |
72 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
73 |
74 | shifts = np.vstack((
75 | shift_x.ravel(), shift_y.ravel(),
76 | shift_x.ravel(), shift_y.ravel()
77 | )).transpose()
78 |
79 | # add A anchors (1, A, 4) to
80 | # cell K shifts (K, 1, 4) to get
81 | # shift anchors (K, A, 4)
82 | # reshape to (K * A, 4) shifted anchors
83 | A = anchors.shape[0]
84 | K = shifts.shape[0]
85 | all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
86 | all_anchors = all_anchors.reshape((K * A, 4))
87 |
88 | return all_anchors
--------------------------------------------------------------------------------
/assets/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/assets/1.jpg
--------------------------------------------------------------------------------
/assets/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/assets/2.jpg
--------------------------------------------------------------------------------
/assets/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/assets/3.jpg
--------------------------------------------------------------------------------
/dataloader.py:
--------------------------------------------------------------------------------
1 | import torchvision.transforms as transforms
2 | from torch.utils.data.sampler import Sampler
3 | from torch.utils.data import Dataset
4 | import torch.nn.functional as F
5 | from skimage.util import crop
6 | import skimage.transform
7 | from PIL import Image
8 | import skimage.color
9 | import torch.nn as nn
10 | import numpy as np
11 | import skimage.io
12 | import skimage
13 | import random
14 | import torch
15 | import math
16 | import os
17 | import cv2
18 | from scipy import misc
19 |
20 | class TrainDataset(Dataset):
21 | def __init__(self,txt_path=None,transform=None,flip=False):
22 | self.words = []
23 | self.transform = transform
24 | self.flip = flip
25 | self.batch_count = 0
26 | self.img_size = 640
27 |
28 | def __len__(self):
29 | # return len(self.name_list)
30 | # return 10
31 | # return 22995
32 | return 1000
33 | # return 10
34 |
35 | def __getitem__(self,index):
36 | img = cv2.imread("/versa/elvishelvis/landmarks56/new_dataset/{}.jpg".format(index))
37 | try:
38 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
39 | except:
40 | import random
41 | rad=random.randint(1,22995)
42 | return self.__getitem__(rad)
43 |
44 | #img = img.astype(np.float32)/255.0
45 |
46 | annotations = np.zeros((0, 4+136))
47 | annotation = np.zeros((1,140))
48 | landmark=[]
49 | minx=float('inf')
50 | miny=float('inf')
51 | maxx=0
52 | maxy=0
53 | path="/versa/elvishelvis/landmarks56/new_dataset/{}.pth".format(index)
54 | data=np.array(torch.load(path))
55 | for da in data:
56 | if(da[0]maxx):
59 | maxx=da[0]
60 | if(da[1]maxy):
63 | maxy=da[1]
64 | landmark.append(da[0])
65 | landmark.append(da[1])
66 | # bbox
67 | annotation[0,0] = minx -int((maxx-minx)/10) # x1
68 | annotation[0,1] = miny -int((maxy-miny)/10) # y1
69 | annotation[0,2] = maxx +int((maxx-minx)/10)
70 | annotation[0,3] = maxy +int((maxy-miny)/10)
71 |
72 | for i in range(4,140):
73 | annotation[0,i] = landmark[i-4]
74 | annotations = np.append(annotations,annotation,axis=0)
75 | sample = {'img':torch.tensor(img), 'annot':torch.tensor(annotations)}
76 | if self.transform is not None:
77 | sample = self.transform(sample)
78 | return sample
79 |
80 |
81 |
82 | def collater(data):
83 | batch_size = len(data)
84 |
85 | imgs = [s['img'] for s in data]
86 | annots = [s['annot'] for s in data]
87 |
88 | # batch images
89 | height = imgs[0].shape[0]
90 | width = imgs[0].shape[1]
91 | assert height==width ,'Input width must eqs height'
92 |
93 | input_size = width
94 | batched_imgs = torch.zeros(batch_size, height, width, 3)
95 |
96 | for i in range(batch_size):
97 | img = imgs[i]
98 | batched_imgs[i,:] = img
99 |
100 | # batch annotations
101 | max_num_annots = max(annot.shape[0] for annot in annots)
102 |
103 | if max_num_annots > 0:
104 | if annots[0].shape[1] > 4:
105 | annot_padded = torch.ones((len(annots), max_num_annots, 140)) * -1
106 | for idx, annot in enumerate(annots):
107 | if annot.shape[0] > 0:
108 | annot_padded[idx, :annot.shape[0], :] = annot
109 | else:
110 | annot_padded = torch.ones((len(annots), max_num_annots, 4)) * -1
111 | #print('annot~~~~~~~~~~~~~~~~~~,',annots)
112 | for idx, annot in enumerate(annots):
113 | if annot.shape[0] > 0:
114 | annot_padded[idx, :annot.shape[0], :] = annot
115 | else:
116 | if annots[0].shape[1] > 4:
117 | annot_padded = torch.ones((len(annots), 1, 140)) * -1
118 | else:
119 | annot_padded = torch.ones((len(annots), 1, 4)) * -1
120 |
121 | batched_imgs = batched_imgs.permute(0, 3, 1, 2)
122 |
123 | return {'img': batched_imgs, 'annot': annot_padded}
124 |
125 |
126 | class RandomFlip(object):
127 | def __call__(self, sample, input_size=320, flip_x=0.4):
128 | aaa=np.random.rand()
129 | if aaa < flip_x:
130 | image, annots = sample['img'], sample['annot']
131 | c,w,h=image.shape
132 | # flip image
133 | image = torch.flip(image,[1])
134 |
135 | image = image.numpy()
136 | annots = annots.numpy()
137 | # relocate bboxes
138 | for i in range(0,140):
139 | if i%2==0:
140 | annots[0, i] = w - annots[0, i]
141 | annots[0, 0],annots[0, 2]=annots[0, 2],annots[0, 0]
142 | for k in range(4,20):
143 | if(k%2==0):
144 | annots[0, k],annots[0, (40-k)]=annots[0, (40-k)],annots[0, k]
145 | else:
146 | annots[0, k],annots[0, (42-k)]=annots[0, (42-k)],annots[0, k]
147 | for b in range(38,48):
148 | if(b%2==0):
149 | annots[0, b],annots[0, (94-b)]=annots[0, (94-b)],annots[0, b]
150 | else:
151 | annots[0, b],annots[0, (96-b)]=annots[0, (96-b)],annots[0, b]
152 | for a in range(76,84):
153 | if(a%2==0):
154 | annots[0, a],annots[0, (170-a)]=annots[0, (170-a)],annots[0, a]
155 | else:
156 | annots[0, a],annots[0, (172-a)]=annots[0, (172-a)],annots[0, a]
157 |
158 | annots[0, 86],annots[0, 96]=annots[0, 96],annots[0, 86]
159 | annots[0, 84],annots[0, 98]=annots[0, 98],annots[0, 84]
160 |
161 | annots[0, 66],annots[0, 74]=annots[0, 74],annots[0, 66]
162 | annots[0, 67],annots[0, 75]=annots[0, 75],annots[0, 67]
163 | annots[0, 68],annots[0, 72]=annots[0, 72],annots[0, 68]
164 | annots[0, 69],annots[0, 73]=annots[0, 73],annots[0, 69]
165 |
166 | annots[0, 100],annots[0, 112]=annots[0, 112],annots[0, 100]
167 | annots[0, 102],annots[0, 110]=annots[0, 110],annots[0, 102]
168 | annots[0, 104],annots[0, 108]=annots[0, 108],annots[0, 104]
169 | annots[0, 126],annots[0, 130]=annots[0, 130],annots[0, 126]
170 | annots[0, 138],annots[0, 134]=annots[0, 134],annots[0, 138]
171 | annots[0, 116],annots[0, 120]=annots[0, 120],annots[0, 116]
172 | annots[0, 114],annots[0, 122]=annots[0, 122],annots[0, 114]
173 | annots[0, 124],annots[0, 132]=annots[0, 132],annots[0, 124]
174 |
175 |
176 |
177 | image = torch.from_numpy(image)
178 | annots = torch.from_numpy(annots)
179 |
180 | sample = {'img': image, 'annot': annots}
181 |
182 | return sample
183 |
184 |
185 | class Rotate(object):
186 | def __init__(self,angle=[-45,45],p=0.3):
187 | self.angle=angle
188 | self.p=p
189 | def __call__(self,sample):
190 | if(np.random.rand()maxx):
222 | maxx=box[i]
223 | if(box[i+1]maxy):
226 | maxy=box[i+1]
227 |
228 | box[0] = minx -int((maxx-minx)/10) # x1
229 | box[1] = miny -int((maxy-miny)/10) # y1
230 | box[2] = maxx +int((maxx-minx)/10)
231 | box[3] = maxy +int((maxy-miny)/10)
232 |
233 | return {'img': torch.tensor(img), 'annot': torch.tensor(box[np.newaxis,:])}
234 | return sample
235 |
236 | class RandomErasing(object):
237 | def __init__(self,p=0.3):
238 | self.p=p
239 | def __call__(self, sample):
240 | if(np.random.rand() 4 :
297 | annots = annots * scale
298 | else :
299 | annots[:,:4] = annots[:,:4] * scale
300 |
301 | return {'img': torch.tensor(resized_image), 'annot': annots}
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 | class PadToSquare(object):
310 | def __call__(self, sample, input_size=640):
311 | image, annots = sample['img'], sample['annot']
312 | rows, cols, _ = image.shape
313 | dim_diff = np.abs(rows - cols)
314 |
315 | # relocate bbox annotations
316 | if rows == input_size:
317 | diff = input_size - cols
318 | annots[:,0] = annots[:,0] + diff/2
319 | annots[:,2] = annots[:,2] + diff/2
320 | elif cols == input_size:
321 | diff = input_size - rows
322 | annots[:,1] = annots[:,1] + diff/2
323 | annots[:,3] = annots[:,3] + diff/2
324 | if annots.shape[1] > 4 :
325 | ldm_mask = annots[:,4] > 0
326 | if rows == input_size:
327 | diff = input_size - cols
328 | annots[ldm_mask,4::2] = annots[ldm_mask,4::2] + diff/2
329 | elif cols == input_size:
330 | diff = input_size - rows
331 | annots[ldm_mask,5::2] = annots[ldm_mask,5::2] + diff/2
332 |
333 | # pad image to square
334 | img = image
335 | img = img.permute(2,0,1)
336 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
337 | pad = (0, 0, pad1, pad2) if rows <= cols else (pad1, pad2, 0, 0)
338 |
339 | padded_img = F.pad(img, pad, "constant", value=0)
340 |
341 | # # pad to input size
342 | pad_=input_size-padded_img.shape[1]
343 | num1= random.randint(0,pad_)
344 | num2= random.randint(0,pad_)
345 |
346 | pading = (num1, pad_-num1,num2,pad_-num2)
347 | padded_img = F.pad(padded_img, pading, "constant", value=0)
348 | for i in range(0,140):
349 | if i%2==0:
350 | annots[0,i]+=num1
351 | else:
352 | annots[0,i]+=num2
353 | padded_img = padded_img.permute(1,2,0)
354 |
355 | return {'img': padded_img, 'annot': annots}
356 |
357 |
358 | class ValDataset(Dataset):
359 | def __init__(self,txt_path=None,transform=None,flip=False):
360 | self.words = []
361 | self.transform = transform
362 | self.flip = flip
363 | self.batch_count = 0
364 | self.img_size = 640
365 |
366 | def __len__(self):
367 | # return len(self.name_list)
368 | return 299
369 | # return 50
370 | # return 10
371 |
372 | def __getitem__(self,index):
373 | index+=1
374 | img = cv2.imread("/versa/elvishelvis/landmarks56/300w/{}.jpg".format(index))
375 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
376 | #img = img.astype(np.float32)/255.0
377 |
378 | annotations = np.zeros((0, 4+136))
379 | annotation = np.zeros((1,140))
380 | landmark=[]
381 | minx=float('inf')
382 | miny=float('inf')
383 | maxx=0
384 | maxy=0
385 | label=[]
386 | with open("/versa/elvishelvis/landmarks56/300w/{}.pts".format(index),'r') as f:
387 | f.readline()
388 | f.readline()
389 | f.readline()
390 | while(True):
391 | try:
392 | item=f.readline()
393 | label.append([float(item[0:7]),float(item[8:15])])
394 | item[2]
395 | except:
396 | break
397 | # label=torch.tensor(label)
398 | for da in label:
399 | if(da[0]maxx):
402 | maxx=da[0]
403 | if(da[1]maxy):
406 | maxy=da[1]
407 | landmark.append(da[0])
408 | landmark.append(da[1])
409 | # bbox
410 | annotation[0,0] = minx -int((maxx-minx)/5) # x1
411 | annotation[0,1] = miny -int((maxy-miny)/5) # y1
412 | annotation[0,2] = maxx +int((maxx-minx)/5)
413 | annotation[0,3] = maxy +int((maxy-miny)/5)
414 | if(len(landmark)!=136):
415 | return self.__getitem__(index+1)
416 | for i in range(4,140):
417 | annotation[0,i] = landmark[i-4]
418 | annotations = np.append(annotations,annotation,axis=0)
419 | sample = {'img':torch.tensor(img), 'annot':torch.tensor(annotations)}
420 | if self.transform is not None:
421 | sample = self.transform(sample)
422 | return sample
423 |
424 |
425 |
426 |
427 | class ValDataset_CeleB(Dataset):
428 | def __init__(self,txt_path=None,transform=None,flip=False):
429 | self.words = []
430 | self.transform = transform
431 | self.flip = flip
432 | self.batch_count = 0
433 | self.img_size = 640
434 | self.name_list=[]
435 | self.bbox = []
436 | self.landmarks=[]
437 | path1="/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Anno/list_bbox_celeba.txt"
438 | # for the bbox
439 | f = open(path1,'r')
440 | f.readline()
441 | f.readline()
442 | lines = f.readlines()
443 | for line in lines:
444 | self.name_list.append(line[0:10])
445 | count=0
446 | begin=11
447 | temp=[]
448 | is_first=False
449 | while (count<4):
450 | while(line[begin]==" "):
451 | begin+=1
452 | cur=begin
453 | while(line[cur]!=" " and line[cur]!='\n'):
454 | cur+=1
455 | temp.append(line[begin:cur])
456 | is_first=True
457 | begin=cur
458 | count+=1
459 | self.bbox.append(temp)
460 |
461 | path2="/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Anno/list_landmarks_celeba.txt"
462 | k = open(path2,'r')
463 | k.readline()
464 | k.readline()
465 | lines = k.readlines()
466 | for line in lines:
467 | count=0
468 | begin=11
469 | temp=[]
470 | is_first=False
471 | while (count<10):
472 | while(line[begin]==" "):
473 | begin+=1
474 | cur=begin
475 | while(line[cur]!=" " and line[cur]!='\n'):
476 | cur+=1
477 | temp.append(line[begin:cur])
478 | is_first=True
479 | begin=cur
480 | count+=1
481 | self.landmarks.append(temp)
482 |
483 | def __len__(self):
484 | # return len(self.name_list)
485 | return 20
486 | # return 30
487 |
488 | def __getitem__(self,index):
489 | img = skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/\
490 | CelebA/Img/img_celeba.7z/img_celeba/"+str(self.name_list[int(index)]))
491 | #img = img.astype(np.float32)/255.0
492 |
493 | box_ = self.bbox[int(index)]
494 | land_=self.landmarks[int(index)]
495 | annotations = np.zeros((0, 14))
496 | if len(box_) == 0:
497 | return annotations
498 | annotation = np.zeros((1,14))
499 | # bbox
500 | annotation[0,0] = box_[0] # x1
501 | annotation[0,1] = box_[1] # y1
502 | annotation[0,2] = str(int(box_[0]) + int(box_[2])) # x2
503 | annotation[0,3] = str(int(box_[1]) + int(box_[3])) # y2
504 |
505 | # landmarks
506 | annotation[0,4] = land_[0] # l0_x
507 | annotation[0,5] = land_[1] # l0_y
508 | annotation[0,6] = land_[2] # l1_x
509 | annotation[0,7] = land_[3] # l1_y
510 | annotation[0,8] = land_[4] # l2_x
511 | annotation[0,9] = land_[5] # l2_y
512 | annotation[0,10] = land_[6] # l3_x
513 | annotation[0,11] = land_[7] # l3_y
514 | annotation[0,12] = land_[8] # l4_x
515 | annotation[0,13] = land_[9] # l4_y
516 |
517 | annotations = np.append(annotations,annotation,axis=0)
518 | sample = {'img':img, 'annot':torch.tensor(annotations)}
519 | if self.transform is not None:
520 | sample = self.transform(sample)
521 | return sample
522 |
523 | '''
524 | class ValDataset(Dataset):
525 | def __init__(self,txt_path,transform=None,flip=False):
526 | self.imgs_path = []
527 | self.words = []
528 | self.transform = transform
529 | self.flip = flip
530 | self.batch_count = 0
531 | self.img_size = 320
532 |
533 | f = open(txt_path,'r')
534 | lines = f.readlines()
535 | isFirst = True
536 | bbox = []
537 | for line in lines:
538 | line = line.rstrip()
539 | if line.startswith('#'):
540 | if isFirst is True:
541 | isFirst = False
542 | else:
543 | labels_copy = labels.copy()
544 | self.words.append(labels_copy)
545 | labels.clear()
546 | path = line[2:]
547 | path = txt_path.replace('label.txt','images/') + path
548 | self.imgs_path.append(path)
549 | else:
550 | line = line.split(' ')
551 | label = [float(x) for x in line]
552 | labels.append(label)
553 |
554 | self.words.append(labels)
555 |
556 | def __getitem__(self,index):
557 | img = skimage.io.imread(self.imgs_path[index])
558 |
559 | labels = self.words[index]
560 | annotations = np.zeros((0, 4))
561 | if len(labels) == 0:
562 | return annotations
563 | for idx, label in enumerate(labels):
564 | annotation = np.zeros((1,4))
565 | # bbox
566 | annotation[0,0] = label[0] # x1
567 | annotation[0,1] = label[1] # y1
568 | annotation[0,2] = label[2] # x2
569 | annotation[0,3] = label[3] # y2
570 |
571 | annotations = np.append(annotations,annotation,axis=0)
572 |
573 | sample = {'img':img, 'annot':annotations}
574 | if self.transform is not None:
575 | sample = self.transform(sample)
576 |
577 | return sample
578 |
579 | def __len__(self):
580 | return len(self.imgs_path)
581 |
582 | def _load_annotations(self,index):
583 | labels = self.words[index]
584 | annotations = np.zeros((0,4))
585 |
586 | if len(labels) == 0:
587 | return annotations
588 |
589 | for idx, label in enumerate(labels):
590 | annotation = np.zeros((1,4))
591 | annotation[0,0] = label[0] # x1
592 | annotation[0,1] = label[1] # y1
593 | annotation[0,2] = label[0] + label[2] # x2
594 | annotation[0,3] = label[1] + label[3] # y2
595 |
596 | annotations = np.append(annotations, annotation, axis=0)
597 |
598 | return annotations
599 | '''
600 |
601 |
602 |
603 | '''
604 | class RandomCroper(object):
605 | def __call__(self, sample, input_size=640):
606 | image, annots = sample['img'], sample['annot']
607 | rows, cols, _ = image.shape
608 |
609 | smallest_side = min(rows, cols)
610 | longest_side = max(rows,cols)
611 | scale = random.uniform(0.3,1)
612 | short_size = int(smallest_side * scale)
613 | start_short_upscale = smallest_side - short_size
614 | start_long_upscale = longest_side - short_size
615 | crop_short = random.randint(0,start_short_upscale)
616 | crop_long = random.randint(0,start_long_upscale)
617 | crop_y = 0
618 | crop_x = 0
619 | if smallest_side == rows:
620 | crop_y = crop_short
621 | crop_x = crop_long
622 | else:
623 | crop_x = crop_short
624 | crop_y = crop_long
625 | # crop
626 | cropped_img = image[crop_y:crop_y + short_size,crop_x:crop_x + short_size]
627 | # resize
628 | new_image = skimage.transform.resize(cropped_img, (input_size, input_size))
629 |
630 | # why normalized from 255 to 1 after skimage.transform?????????
631 | new_image = new_image * 255
632 |
633 | # relocate bbox
634 | annots[:,0] -= crop_x
635 | annots[:,1] -= crop_y
636 | annots[:,2] -= crop_x
637 | annots[:,3] -= crop_y
638 |
639 | # relocate landmarks56
640 | if annots.shape[1] > 4:
641 | # l_mask = annots[:,4]!=-1
642 | l_mask = annots[:,4] > 0
643 | annots[l_mask,4] -= crop_x
644 | annots[l_mask,5] -= crop_y
645 | annots[l_mask,6] -= crop_x
646 | annots[l_mask,7] -= crop_y
647 | annots[l_mask,8] -= crop_x
648 | annots[l_mask,9] -= crop_y
649 | annots[l_mask,10] -= crop_x
650 | annots[l_mask,11] -= crop_y
651 | annots[l_mask,12] -= crop_x
652 | annots[l_mask,13] -= crop_y
653 |
654 | # scale annotations
655 | resize_scale = input_size/short_size
656 | annots[:,:4] = annots[:,:4] * resize_scale
657 | if annots.shape[1] > 4:
658 | annots[l_mask,4:] = annots[l_mask,4:] * resize_scale
659 |
660 | # remove faces center not in image afer crop
661 | center_x = (annots[:,0] + annots[:,2]) / 2
662 | center_y = (annots[:,1] + annots[:,3]) / 2
663 |
664 | mask_x = (center_x[:,]>0)&(center_x[:,]0)&(center_y[:,] 4:
674 | annots[l_mask,4:] = annots[l_mask,4:].clip(0, input_size)
675 |
676 | annots = annots[mask]
677 |
678 | return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots)}
679 | '''
--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import numpy as np
6 | import skimage
7 | from skimage import io
8 | from PIL import Image
9 | import cv2
10 | import torchvision
11 | import eval_widerface
12 | import torchvision_model
13 | import model
14 | import os
15 |
16 | def pad_to_square(img, pad_value):
17 | _, h, w = img.shape
18 | dim_diff = np.abs(h - w)
19 | # (upper / left) padding and (lower / right) padding
20 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
21 | # Determine padding
22 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
23 | # Add padding
24 | img = F.pad(img, pad, "constant", value=pad_value)
25 |
26 | return img, pad
27 |
28 | def resize(image, size):
29 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
30 | return image
31 |
32 | def get_args():
33 | parser = argparse.ArgumentParser(description="Detect program for retinaface.")
34 | parser.add_argument('--image_path', type=str, default='test.jpg', help='Path for image to detect')
35 | parser.add_argument('--model_path', type=str, help='Path for model')
36 | parser.add_argument('--save_path', type=str, default='./out', help='Path for result image')
37 | parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
38 | parser.add_argument('--scale', type=float, default=1.0, help='Image resize scale', )
39 | args = parser.parse_args()
40 |
41 | return args
42 |
43 | def main():
44 | args = get_args()
45 | # Create torchvision model
46 | return_layers = {'layer2':1,'layer3':2,'layer4':3}
47 | RetinaFace = torchvision_model.create_retinaface(return_layers)
48 |
49 | # Load trained model
50 | retina_dict = RetinaFace.state_dict()
51 | pre_state_dict = torch.load(args.model_path)
52 | pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict}
53 | RetinaFace.load_state_dict(pretrained_dict)
54 |
55 | RetinaFace = RetinaFace.cuda()
56 | RetinaFace.eval()
57 |
58 | # Read image
59 | img = skimage.io.imread(args.image_path)
60 | img = torch.from_numpy(img)
61 | img = img.permute(2,0,1)
62 |
63 | if not args.scale == 1.0:
64 | size1 = int(img.shape[1]/args.scale)
65 | size2 = int(img.shape[2]/args.scale)
66 | img = resize(img.float(),(size1,size2))
67 |
68 | input_img = img.unsqueeze(0).float().cuda()
69 | picked_boxes, picked_landmarks = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
70 |
71 | # np_img = resized_img.cpu().permute(1,2,0).numpy()
72 | np_img = img.cpu().permute(1,2,0).numpy()
73 | np_img.astype(int)
74 | img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB)
75 |
76 | for j, boxes in enumerate(picked_boxes):
77 | if boxes is not None:
78 | for box,landmark in zip(boxes,picked_landmarks[j]):
79 | cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2)
80 | cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
81 | cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
82 | cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
83 | cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
84 | cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)
85 |
86 | image_name = args.image_path.split('/')[-1]
87 | save_path = os.path.join(args.save_path,image_name)
88 | cv2.imwrite(save_path, img)
89 | cv2.imshow('RetinaFace-Pytorch',img)
90 | cv2.waitKey()
91 |
92 | if __name__=='__main__':
93 | main()
94 |
--------------------------------------------------------------------------------
/down.py:
--------------------------------------------------------------------------------
1 | from requests import get # to make GET request
2 |
3 |
4 | def download(url, file_name):
5 | # open in binary mode
6 | with open(file_name, "wb") as file:
7 | print("runinng!!!!!")
8 | # get request
9 | response = get(url)
10 | # write to file
11 | print("get {}".format(file_name))
12 |
13 | file.write(response.content)
14 |
15 | download("https://www.adrianbulat.com/downloads/FaceAlignment/LS3D-W-balanced-20-03-2017.zip ",'sample.zip')
16 | download('https://uniofnottm-my.sharepoint.com/personal/adrian_bulat_nottingham_ac_uk/_layouts/15/download.aspx?SourceUrl=%2Fpersonal%2Fadrian%5Fbulat%5Fnottingham%5Fac%5Fuk%2FDocuments%2FUoN%20Box%20Migration%2FPublic%2FLS3D%2DW%2FLS3D%2DW%2Etar%2Egz','all.zip')
17 |
18 |
19 |
--------------------------------------------------------------------------------
/eval_widerface.py:
--------------------------------------------------------------------------------
1 | import utils
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | import os
6 | from tqdm import tqdm
7 | import torchvision.ops as ops
8 | import cv2
9 | import time
10 | def get_detections(img_batch, model,score_threshold=0.5, iou_threshold=0.5):
11 | start=time.time()
12 | model.eval()
13 | model.cuda()
14 | img_batch.cuda()
15 | with torch.no_grad():
16 | #[1,16800,2]
17 | classifications, bboxes, landmarks = model(img_batch)
18 | batch_size = classifications.shape[0]
19 | picked_boxes = []
20 | picked_landmarks = []
21 |
22 | for i in range(batch_size):
23 | #[16800,2]
24 | classification = torch.exp(classifications[i,:,:])
25 | bbox = bboxes[i,:,:]
26 | landmark = landmarks[i,:,:]
27 |
28 | # choose positive and scores > score_threshold
29 | scores, argmax = torch.max(classification, dim=1)
30 | argmax_indice = argmax==0
31 | scores_indice = scores > score_threshold
32 | positive_indices = argmax_indice & scores_indice
33 |
34 | scores = scores[positive_indices]
35 |
36 | if scores.shape[0] == 0:
37 | picked_boxes.append(None)
38 | picked_landmarks.append(None)
39 | continue
40 |
41 | bbox = bbox[positive_indices]
42 | landmark = landmark[positive_indices]
43 | keep = ops.boxes.nms(bbox, scores, iou_threshold)
44 | keep_boxes = bbox[keep]
45 | keep_landmarks = landmark[keep]
46 | picked_boxes.append(keep_boxes)
47 | picked_landmarks.append(keep_landmarks)
48 | # print(time.time()-start)
49 | return picked_boxes, picked_landmarks
50 |
51 | def compute_overlap(a,b):
52 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
53 |
54 | iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
55 | ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
56 |
57 | iw = np.maximum(iw, 0)
58 | ih = np.maximum(ih, 0)
59 |
60 | ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
61 |
62 | ua = np.maximum(ua, np.finfo(float).eps)
63 |
64 | intersection = iw * ih
65 |
66 | # (N, K) ndarray of overlap between boxes and query_boxes
67 | return torch.from_numpy(intersection / ua)
68 |
69 |
70 | def evaluate(val_data,retinaFace,threshold=0.5):
71 | recall = 0.
72 | precision = 0.
73 | landmark_loss=0
74 | miss=0
75 | #for i, data in tqdm(enumerate(val_data)):
76 | resssss=[]
77 | count=0
78 | for data in tqdm(iter(val_data)):
79 | img_batch = data['img'].cuda()
80 | annots = data['annot'].cuda()
81 |
82 |
83 | picked_boxes,picked_landmarks = get_detections(img_batch,retinaFace)
84 | recall_iter = 0.
85 | precision_iter = 0.
86 | for j, boxes in enumerate(picked_boxes):
87 | annot_boxes = annots[j]
88 | annot_boxes = annot_boxes[annot_boxes[:,0]!=-1]
89 | annot_boxes=annot_boxes[:,:4]
90 | annot_land=annot_boxes[:,4:]
91 | if boxes is None and annot_boxes.shape[0] == 0:
92 | continue
93 | elif boxes is None and annot_boxes.shape[0] != 0:
94 | recall_iter += 0.
95 | precision_iter += 1.
96 | continue
97 | elif boxes is not None and annot_boxes.shape[0] == 0:
98 | recall_iter += 1.
99 | precision_iter += 0.
100 | continue
101 | overlap = ops.boxes.box_iou(annot_boxes, boxes)
102 |
103 | # compute recall
104 | max_overlap, _ = torch.max(overlap,dim=1)
105 | mask = max_overlap > threshold
106 | detected_num = mask.sum().item()
107 | recall_iter += detected_num/annot_boxes.shape[0]
108 |
109 | # compute precision
110 | max_overlap, _ = torch.max(overlap,dim=0)
111 | mask = max_overlap > threshold
112 | true_positives = mask.sum().item()
113 | precision_iter += true_positives/boxes.shape[0]
114 | if (picked_landmarks==None):
115 | continue
116 | for i, land in enumerate(picked_landmarks):
117 |
118 | annot_land = annots[i]
119 | annot_land=annot_land[:,4:]
120 | # img_batch=np.array(img_batch[0].cpu()).transpose(1,2,0)
121 | try:
122 |
123 | land=land[0,:]
124 | landmark_loss=torch.mean(torch.sqrt(torch.sum((annot_land - land)**2)))
125 | offset=abs(int(annot_land[0][4])-int(annot_land[0][68]))
126 | # landmark_loss=nn.SmoothL1Loss()(annot_land,land)
127 | landmark_loss=float(landmark_loss/offset)
128 | if landmark_loss<1:
129 | resssss.append(landmark_loss)
130 | # annot_land=np.array(annot_land[0].cpu())
131 | # land=np.array(land.cpu())
132 | # for kkk in range(0,136,2):
133 | # img_batch=cv2.circle(img_batch,(annot_land[kkk],annot_land[kkk+1]),radius=1,color=(0,0,255),thickness=2)
134 | # img_batch=cv2.circle(img_batch,(land[kkk],land[kkk+1]),radius=1,color=(0,255,0),thickness=2)
135 | # cv2.imwrite('{}.jpg'.format(count),img_batch)
136 | # count+=1
137 | # landmark_loss+=torch.mean((annot_land-land)**2).item()
138 | except:
139 | # print('miss')
140 | miss+=1
141 |
142 | recall += recall_iter/len(picked_boxes)
143 | precision += precision_iter/len(picked_boxes)
144 | print(sorted(resssss))
145 | return recall/len(val_data),precision/len(val_data), np.mean(resssss) ,miss
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
--------------------------------------------------------------------------------
/img_tester.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import numpy as np
6 | import skimage
7 | from skimage import io
8 | from PIL import Image
9 | import cv2
10 | import torchvision
11 | import eval_widerface
12 | import torchvision_model
13 | import model
14 | import os
15 | import skimage
16 | from dataloader import ValDataset, Resizer, PadToSquare,ValDataset_CeleB, TrainDataset
17 | from torchvision import datasets, models, transforms
18 | def pad_to_square(img, pad_value):
19 | _, h, w = img.shape
20 | dim_diff = np.abs(h - w)
21 | # (upper / left) padding and (lower / right) padding
22 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
23 | # Determine padding
24 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
25 | # Add padding
26 | img = F.pad(img, pad, "constant", value=pad_value)
27 |
28 | return img, pad
29 |
30 | def resize(image, size):
31 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
32 | return image
33 |
34 | def get_args():
35 | parser = argparse.ArgumentParser(description="Detect program for retinaface.")
36 | parser.add_argument('--image_path', type=str, default='WechatIMG10.jpeg', help='Path for image to detect')
37 | parser.add_argument('--model_path', type=str, help='Path for model',default="/versa/elvishelvis/RetinaYang/out/68_full_model_epoch_10.pt")
38 | parser.add_argument('--save_path', type=str, default='./out', help='Path for result image')
39 | parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
40 | args = parser.parse_args()
41 |
42 | return args
43 |
44 | def main(nummmmmm):
45 | args = get_args()
46 |
47 | # Create the model
48 | # if args.depth == 18:
49 | # RetinaFace = model.resnet18(num_classes=2, pretrained=True)
50 | # elif args.depth == 34:
51 | # RetinaFace = model.resnet34(num_classes=2, pretrained=True)
52 | # elif args.depth == 50:
53 | # RetinaFace = model.resnet50(num_classes=2, pretrained=True)
54 | # elif args.depth == 101:
55 | # RetinaFace = model.resnet101(num_classes=2, pretrained=True)
56 | # elif args.depth == 152:
57 | # RetinaFace = model.resnet152(num_classes=2, pretrained=True)
58 | # else:
59 | # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
60 |
61 | # Create torchvision model
62 |
63 | return_layers = {'layer2':1,'layer3':2,'layer4':3}
64 | RetinaFace = torchvision_model.create_retinaface(return_layers)
65 | device= torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
66 |
67 | # Load trained model
68 | retina_dict = RetinaFace.state_dict()
69 | pre_state_dict = torch.load('/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_51.pt')
70 | pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict}
71 | RetinaFace.load_state_dict(pretrained_dict)
72 | RetinaFace.to(device)
73 |
74 | import time
75 |
76 | dataset_val = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(640),PadToSquare()]))
77 | # dataset_val = ValDataset('./widerface/train/label.txt')
78 | for qq in range(100,150):
79 | img=dataset_val[qq]['img']
80 | # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
81 | # img=skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg".format(str(qq)))
82 | img = img.permute(2,0,1)
83 | resized_img = img.float()
84 | input_img = resized_img.unsqueeze(0).to(device)
85 | start=time.time()
86 | picked_boxes, picked_landmarks = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.9, iou_threshold=0.2)
87 | print(time.time()-start)
88 | # print(picked_boxes)
89 | np_img = resized_img.cpu().permute(1,2,0).numpy()
90 | np_img.astype(int)
91 | img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB)
92 |
93 | for j, boxes in enumerate(picked_boxes):
94 | if boxes is not None:
95 | for box,landmark in zip(boxes,picked_landmarks[j]):
96 | cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2)
97 | for i in range(0,136,2):
98 | cv2.circle(img,(landmark[i],landmark[i+1]),radius=1,color=(0,0,255),thickness=2)
99 |
100 | image_name = args.image_path.split('/')[-1]
101 | save_path = os.path.join(args.save_path,image_name)
102 | cv2.imwrite('./RetinaFace-Pytorch{}.jpg'.format(qq),cv2.resize(img,(640,640)))
103 | if __name__=='__main__':
104 | main(10)
105 |
106 |
107 |
--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
1 | import numpy as numpy
2 | import torch.nn as nn
3 | import torch
4 | import math
5 | # torch.log and math.log is e based
6 | class WingLoss(nn.Module):
7 | def __init__(self, omega=3, epsilon=2):
8 | super(WingLoss, self).__init__()
9 | self.omega = omega
10 | self.epsilon = epsilon
11 |
12 | def forward(self, pred, target):
13 | y = target
14 | y_hat = pred
15 | delta_y = (y - y_hat).abs()
16 | delta_y1 = delta_y[delta_y < self.omega]
17 | delta_y2 = delta_y[delta_y >= self.omega]
18 | loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
19 | C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
20 | loss2 = delta_y2 - C
21 | return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))
22 | class AdaptiveWingLoss(nn.Module):
23 | def __init__(self, omega=14, theta=0.5, epsilon=1, alpha=2.1):
24 | super(AdaptiveWingLoss, self).__init__()
25 | self.omega = omega
26 | self.theta = theta
27 | self.epsilon = epsilon
28 | self.alpha = alpha
29 |
30 | def forward(self, pred, target):
31 | '''
32 | :param pred: BxNxHxH
33 | :param target: BxNxHxH
34 | :return:
35 | '''
36 |
37 | y = target
38 | y_hat = pred
39 | delta_y = (y - y_hat).abs()
40 | delta_y1 = delta_y[delta_y < self.theta]
41 | delta_y2 = delta_y[delta_y >= self.theta]
42 | y1 = y[delta_y < self.theta]
43 | y2 = y[delta_y >= self.theta]
44 | loss1 = self.omega * torch.log(1 + torch.pow(delta_y1 / self.omega, self.alpha - y1))
45 | A = self.omega * (1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))) * (self.alpha - y2) * (
46 | torch.pow(self.theta / self.epsilon, self.alpha - y2 - 1)) * (1 / self.epsilon)
47 | C = self.theta * A - self.omega * torch.log(1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))
48 | loss2 = A * delta_y2 - C
49 | return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))
50 | def calc_iou(a, b):
51 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
52 |
53 | iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
54 | ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
55 |
56 | iw = torch.clamp(iw, min=0)
57 | ih = torch.clamp(ih, min=0)
58 |
59 | ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
60 |
61 | ua = torch.clamp(ua, min=1e-8)
62 |
63 | intersection = iw * ih
64 |
65 | IoU = intersection / ua
66 |
67 | return IoU
68 |
69 | def filt_IoU(a, b, l):
70 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
71 |
72 | iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
73 | ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
74 |
75 | iw = torch.clamp(iw, min=0)
76 | ih = torch.clamp(ih, min=0)
77 |
78 | ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
79 |
80 | ua = torch.clamp(ua, min=1e-8)
81 |
82 | intersection = iw * ih
83 |
84 | IoU = intersection / ua
85 |
86 | ldm_sum = l.sum(dim=1)
87 | mask = ldm_sum<0
88 | ldm_mask = torch.ones_like(mask)
89 | ldm_mask[mask] = -1
90 | filted_IoU = IoU * ldm_mask.float()
91 |
92 | return IoU, filted_IoU
93 |
94 | class LossLayer(nn.Module):
95 | def __init__(self):
96 | super(LossLayer, self).__init__()
97 | self.smoothl1 = nn.SmoothL1Loss()
98 |
99 | def forward(self,classifications,bbox_regressions,ldm_regressions,anchors,annotations):
100 | batch_size = classifications.shape[0]
101 | classification_losses = []
102 | bbox_regression_losses = []
103 | ldm_regression_losses = []
104 |
105 | anchor = anchors[0, :, :]
106 | anchor_widths = anchor[:, 2] - anchor[:, 0]
107 | anchor_heights = anchor[:, 3] - anchor[:, 1]
108 | anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
109 | anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
110 |
111 | #temp
112 | positive_indices_list = []
113 |
114 | for j in range(batch_size):
115 | classification = classifications[j,:,:]
116 | bbox_regression = bbox_regressions[j,:,:]
117 | ldm_regression = ldm_regressions[j,:,:]
118 |
119 | annotation = annotations[j,:,:]
120 | # annotation = annotation[annotation[:,0] != -1]
121 | annotation = annotation[annotation[:,0] > 0]
122 | bbox_annotation = annotation[:,:4]
123 | ldm_annotation = annotation[:,4:]
124 |
125 | if bbox_annotation.shape[0] == 0:
126 | bbox_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda())
127 | classification_losses.append(torch.tensor(0.,requires_grad=True).cuda())
128 | ldm_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda())
129 |
130 | # temp
131 | positive_indices_list.append([])
132 |
133 | continue
134 |
135 | IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4])
136 | #IoU, filt_iou = filt_IoU(anchors[0, :, :], bbox_annotation, ldm_annotation)
137 |
138 | IoU_max, IoU_argmax = torch.max(IoU, dim=1)
139 |
140 | targets = torch.ones(classification.shape)*-1
141 | targets = targets.cuda()
142 |
143 | # those whose iou<0.3 have no object
144 | negative_indices = torch.lt(IoU_max, 0.3)
145 | targets[negative_indices, :] = 0
146 | targets[negative_indices, 1] = 1
147 |
148 | # those whose iou>0.5 have object
149 | positive_indices = torch.ge(IoU_max, 0.7)
150 |
151 | #temp
152 | positive_indices_list.append(positive_indices)
153 |
154 | num_positive_anchors = positive_indices.sum()
155 |
156 | #keep positive and negative ratios with 1:3
157 | keep_negative_anchors = num_positive_anchors * 3
158 |
159 | bbox_assigned_annotations = bbox_annotation[IoU_argmax, :]
160 | ldm_assigned_annotations = ldm_annotation[IoU_argmax, :]
161 |
162 | targets[positive_indices, :] = 0
163 | targets[positive_indices, 0] = 1
164 |
165 | # ignore targets with no landmarks
166 | # f_IoU_max ,f_IoU_argmax = torch.max(filt_iou, dim=1)
167 | # ldm_positive_indices = torch.ge(f_IoU_max, 0.5)
168 |
169 | ldm_sum = ldm_assigned_annotations.sum(dim=1)
170 | ge0_mask = ldm_sum > 0
171 | ldm_positive_indices = ge0_mask & positive_indices
172 |
173 | # OHEM
174 | negative_losses = classification[negative_indices,1] * -1
175 | sorted_losses, _ = torch.sort(negative_losses, descending=True)
176 | if sorted_losses.numel() > keep_negative_anchors:
177 | sorted_losses = sorted_losses[:keep_negative_anchors]
178 | positive_losses = classification[positive_indices,0] * -1
179 |
180 | focal_loss = False
181 | # focal loss
182 | if focal_loss:
183 | alpha = 0.25
184 | gamma = 2.0
185 | alpha_factor = torch.ones(targets.shape).cuda() * alpha
186 |
187 | alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
188 | focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
189 | focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
190 |
191 | bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
192 |
193 | cls_loss = focal_weight * bce
194 |
195 | cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
196 |
197 | classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
198 | else:
199 | if positive_indices.sum() > 0:
200 | classification_losses.append(positive_losses.mean() + sorted_losses.mean())
201 | else:
202 | classification_losses.append(torch.tensor(0.,requires_grad=True).cuda())
203 |
204 |
205 | # compute bboxes loss
206 | if positive_indices.sum() > 0:
207 | # bbox
208 | bbox_assigned_annotations = bbox_assigned_annotations[positive_indices, :]
209 |
210 | anchor_widths_pi = anchor_widths[positive_indices]
211 | anchor_heights_pi = anchor_heights[positive_indices]
212 | anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
213 | anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
214 |
215 | gt_widths = bbox_assigned_annotations[:, 2] - bbox_assigned_annotations[:, 0]
216 | gt_heights = bbox_assigned_annotations[:, 3] - bbox_assigned_annotations[:, 1]
217 | gt_ctr_x = bbox_assigned_annotations[:, 0] + 0.5 * gt_widths
218 | gt_ctr_y = bbox_assigned_annotations[:, 1] + 0.5 * gt_heights
219 |
220 | targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / (anchor_widths_pi + 1e-14)
221 | targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / (anchor_heights_pi + 1e-14)
222 | targets_dw = torch.log(gt_widths / anchor_widths_pi)
223 | targets_dh = torch.log(gt_heights / anchor_heights_pi)
224 |
225 | bbox_targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
226 | bbox_targets = bbox_targets.t()
227 |
228 | # Rescale
229 | bbox_targets = bbox_targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
230 |
231 | # smooth L1
232 | # box losses
233 | bbox_regression_loss = self.smoothl1(bbox_targets,bbox_regression[positive_indices, :])
234 | bbox_regression_losses.append(bbox_regression_loss)
235 | else:
236 | bbox_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda())
237 |
238 | # compute landmarks loss
239 | if ldm_positive_indices.sum() > 0 :
240 | ldm_assigned_annotations = ldm_assigned_annotations[ldm_positive_indices, :]
241 |
242 | anchor_widths_l = anchor_widths[ldm_positive_indices]
243 | anchor_heights_l = anchor_heights[ldm_positive_indices]
244 | anchor_ctr_x_l = anchor_ctr_x[ldm_positive_indices]
245 | anchor_ctr_y_l = anchor_ctr_y[ldm_positive_indices]
246 | ldm_targets=[]
247 | for i in range(0,136):
248 | if i %2==0:
249 | candidate=(ldm_assigned_annotations[:,i] - anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
250 | else:
251 | candidate=(ldm_assigned_annotations[:,i] - anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
252 | ldm_targets.append(candidate)
253 | ldm_targets=torch.stack((ldm_targets))
254 | ldm_targets = ldm_targets.t()
255 |
256 | # Rescale
257 | scale = torch.ones(1,136)*0.1
258 | ldm_targets = ldm_targets/scale.cuda()
259 | # increase the weight for lips
260 | s1 = torch.ones(1,99)
261 | s2 = torch.ones(1,37)*3
262 | s=torch.cat([s1,s2],dim=-1).cuda()
263 | aaaaaaa=WingLoss()
264 | ldm_regression_loss = self.smoothl1(ldm_targets*s, ldm_regression[ldm_positive_indices, :]*s)
265 | ldm_regression_losses.append(ldm_regression_loss)
266 | else:
267 | ldm_regression_losses.append(torch.tensor(0.,requires_grad=True).cuda())
268 |
269 | return torch.stack(classification_losses), torch.stack(bbox_regression_losses),torch.stack(ldm_regression_losses)
270 |
--------------------------------------------------------------------------------
/magic_convert.py:
--------------------------------------------------------------------------------
1 | # # import numpy as np
2 | # # import torch
3 | # # import numpy as np
4 | # # from collections import OrderedDict
5 | # # b=torch.load('./out/mobile_model_epoch_1.pt')
6 | # # # a=torch.load('network.torch')
7 | # # # key_a=a.keys()
8 | # # # key_b=b.keys()
9 | # # # result=OrderedDict()
10 | # # # for ka in key_a:
11 | # # # for kb in key_b:
12 | # # # if(ka in kb):
13 | # # # result[kb]=a[ka]
14 | # # # print(len(result.keys()))
15 |
16 | # # # torch.save(result,"pretrained.torch")
17 |
18 | # # c=torch.load("pretrained.torch")
19 | # # print(b.keys())
20 |
21 |
22 | # import torch
23 | # import torch.nn as nn
24 | # import torch.nn.functional as F
25 | # import math
26 | # import datetime
27 | # from collections import OrderedDict
28 |
29 | # def Conv_3x3(in_channels, out_channels, stride):
30 | # return nn.Sequential(
31 | # nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False),
32 | # nn.BatchNorm2d(out_channels),
33 | # nn.ReLU6()
34 | # )
35 |
36 | # def Conv_1x1(in_channels, out_channels, stride):
37 | # return nn.Sequential(
38 | # nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False),
39 | # nn.BatchNorm2d(out_channels),
40 | # nn.ReLU6()
41 | # )
42 |
43 | # def SepConv_3x3(in_channels, out_channels, stride):
44 | # return nn.Sequential(
45 | # nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False),
46 | # nn.BatchNorm2d(in_channels),
47 | # nn.ReLU6(),
48 |
49 | # nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
50 | # nn.BatchNorm2d(out_channels)
51 | # )
52 |
53 | # class MBConv3_3x3(nn.Module):
54 | # def __init__(self, in_channels, out_channels, stride):
55 | # super(MBConv3_3x3, self).__init__()
56 | # mid_channels = int(3 * in_channels)
57 |
58 | # self.block = nn.Sequential(
59 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
60 | # nn.BatchNorm2d(mid_channels),
61 | # nn.ReLU6(),
62 |
63 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False),
64 | # nn.BatchNorm2d(mid_channels),
65 | # nn.ReLU6(),
66 |
67 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
68 | # nn.BatchNorm2d(out_channels)
69 | # )
70 |
71 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
72 |
73 | # def forward(self, x):
74 | # if self.use_skip_connect:
75 | # return self.block(x) + x
76 | # else:
77 | # return self.block(x)
78 |
79 | # class MBConv3_5x5(nn.Module):
80 | # def __init__(self, in_channels, out_channels, stride):
81 | # super(MBConv3_5x5, self).__init__()
82 | # mid_channels = int(3 * in_channels)
83 |
84 | # self.block = nn.Sequential(
85 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
86 | # nn.BatchNorm2d(mid_channels),
87 | # nn.ReLU6(),
88 |
89 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False),
90 | # nn.BatchNorm2d(mid_channels),
91 | # nn.ReLU6(),
92 |
93 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
94 | # nn.BatchNorm2d(out_channels)
95 | # )
96 |
97 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
98 |
99 | # def forward(self, x):
100 | # if self.use_skip_connect:
101 | # return self.block(x) + x
102 | # else:
103 | # return self.block(x)
104 |
105 | # class MBConv6_3x3(nn.Module):
106 | # def __init__(self, in_channels, out_channels, stride):
107 | # super(MBConv6_3x3, self).__init__()
108 | # mid_channels = int(6 * in_channels)
109 |
110 | # self.block = nn.Sequential(
111 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
112 | # nn.BatchNorm2d(mid_channels),
113 | # nn.ReLU6(),
114 |
115 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False),
116 | # nn.BatchNorm2d(mid_channels),
117 | # nn.ReLU6(),
118 |
119 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
120 | # nn.BatchNorm2d(out_channels)
121 | # )
122 |
123 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
124 |
125 | # def forward(self, x):
126 | # if self.use_skip_connect:
127 | # return self.block(x) + x
128 | # else:
129 | # return self.block(x)
130 |
131 | # class MBConv6_5x5(nn.Module):
132 | # def __init__(self, in_channels, out_channels, stride):
133 | # super(MBConv6_5x5, self).__init__()
134 | # mid_channels = int(6 * in_channels)
135 |
136 | # self.block = nn.Sequential(
137 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
138 | # nn.BatchNorm2d(mid_channels),
139 | # nn.ReLU6(),
140 |
141 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False),
142 | # nn.BatchNorm2d(mid_channels),
143 | # nn.ReLU6(),
144 |
145 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
146 | # nn.BatchNorm2d(out_channels)
147 | # )
148 |
149 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
150 |
151 | # def forward(self, x):
152 | # if self.use_skip_connect:
153 | # return self.block(x) + x
154 | # else:
155 | # return self.block(x)
156 |
157 | # class MnasNet(nn.Module):
158 | # def __init__(self, num_classes=1000, width_mult=1.):
159 | # super(MnasNet, self).__init__()
160 |
161 | # self.out_channels = int(1280 * width_mult)
162 |
163 | # self.conv1 = Conv_3x3(3, int(32 * width_mult), 2)
164 | # self.conv2 = SepConv_3x3(int(32 * width_mult), int(16 * width_mult), 1)
165 |
166 | # self.feature = nn.Sequential(
167 | # self._make_layer(MBConv3_3x3, 3, int(16 * width_mult), int(24 * width_mult), 2),
168 | # self._make_layer(MBConv3_5x5, 3, int(24 * width_mult), int(64 * width_mult), 2)
169 | # )
170 | # self.feature1=nn.Sequential(
171 | # self._make_layer(MBConv6_5x5, 3, int(64 * width_mult), int(80 * width_mult), 2),
172 |
173 | # )
174 | # self.feature2=nn.Sequential(
175 | # self._make_layer(MBConv6_3x3, 2, int(80 * width_mult), int(128 * width_mult), 1)
176 | # )
177 | # self.feature3=nn.Sequential(
178 | # self._make_layer(MBConv6_5x5, 4, int(128 * width_mult), int(192 * width_mult), 2)
179 | # )
180 | # self.feature4=nn.Sequential(
181 | # self._make_layer(MBConv6_3x3, 1, int(192 * width_mult), int(256 * width_mult), 1)
182 | # )
183 |
184 | # # self.conv3 = Conv_1x1(int(256 * width_mult), int(1280 * width_mult), 1)
185 | # # self.gap = nn.AdaptiveAvgPool2d(1)
186 | # # self.classifier = nn.Linear(int(1280 * width_mult), num_classes)
187 |
188 | # self._initialize_weights()
189 |
190 | # def _make_layer(self, block, blocks, in_channels, out_channels, stride=1):
191 | # strides = [stride] + [1] * (blocks - 1)
192 | # layers = []
193 | # for _stride in strides:
194 | # layers.append(block(in_channels, out_channels, _stride))
195 | # in_channels = out_channels
196 |
197 | # return nn.Sequential(*layers)
198 |
199 | # def _initialize_weights(self):
200 | # for m in self.modules():
201 | # if isinstance(m, nn.Conv2d):
202 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
203 | # m.weight.data.normal_(0, math.sqrt(2. / n))
204 | # if m.bias is not None:
205 | # m.bias.data.zero_()
206 | # elif isinstance(m, nn.BatchNorm2d):
207 | # m.weight.data.fill_(1)
208 | # m.bias.data.zero_()
209 | # elif isinstance(m, nn.Linear):
210 | # n = m.weight.size(1)
211 | # m.weight.data.normal_(0, 0.01)
212 | # m.bias.data.zero_()
213 |
214 | # def forward(self, x):
215 | # result=OrderedDict()
216 | # x = self.conv2(self.conv1(x))
217 | # x1 = self.feature(x)
218 | # result[1]=x1
219 | # x=self.feature1(x1)
220 | # x2=self.feature2(x)
221 | # result[2]=x2
222 | # x=self.feature3(x2)
223 | # x3=self.feature4(x)
224 | # result[3]=x3
225 | # return result
226 |
227 | # if __name__ == '__main__':
228 | # net = MnasNet()
229 | # x = torch.randn(1,3,320,320)
230 | # net(x)
231 | # # for i in range(15):
232 | # # time1 = datetime.datetime.now()
233 | # # y = net(x)
234 | # # print('Time Cost: ', (datetime.datetime.now() - time1).microseconds)
235 | # #y = net(x)
236 | # #print(y)
237 |
238 | import torch
239 | from torch.utils.serialization import load_lua
240 | for i in range(7201,22999):
241 | try:
242 | x = load_lua('/versa/elvishelvis/landmarks56/data55/{}.t7'.format(i))
243 | torch.save(x,'/versa/elvishelvis/landmarks56/data55/{}.pth'.format(i))
244 | except:
245 | print(i)
246 |
--------------------------------------------------------------------------------
/mnas.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import math
6 |
7 | # class list_(object):
8 | # def __init__(self,li=None):
9 | # if(li!=None):
10 | # self.li=[]
11 | # else:
12 | # self.li=li
13 | # def ret(self):
14 | # return self.li
15 |
16 | result_list=[]
17 | last_fm_list=[]
18 | def Conv_3x3(in_channels, out_channels, stride):
19 | return nn.Sequential(
20 | nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False),
21 | nn.BatchNorm2d(out_channels),
22 | nn.ReLU6()
23 | )
24 |
25 | def Conv_1x1(in_channels, out_channels, stride):
26 | return nn.Sequential(
27 | nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False),
28 | nn.BatchNorm2d(out_channels),
29 | nn.ReLU6()
30 | )
31 |
32 | def SepConv_3x3(in_channels, out_channels, stride):
33 | return nn.Sequential(
34 | nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False),
35 | nn.BatchNorm2d(in_channels),
36 | nn.ReLU6(),
37 |
38 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
39 | nn.BatchNorm2d(out_channels)
40 | )
41 |
42 | class MBConv3_3x3(nn.Module):
43 | def __init__(self, in_channels, out_channels, stride):
44 | super(MBConv3_3x3, self).__init__()
45 | mid_channels = int(3 * in_channels)
46 |
47 | self.block = nn.Sequential(
48 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
49 | nn.BatchNorm2d(mid_channels),
50 | nn.ReLU6(),
51 |
52 | nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False),
53 | nn.BatchNorm2d(mid_channels),
54 | nn.ReLU6(),
55 |
56 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
57 | nn.BatchNorm2d(out_channels)
58 | )
59 |
60 | self.use_skip_connect = (1 == stride and in_channels == out_channels)
61 |
62 | def forward(self, x):
63 | if self.use_skip_connect:
64 | return self.block(x) + x
65 | else:
66 | return self.block(x)
67 |
68 | class MBConv3_5x5(nn.Module):
69 | def __init__(self, in_channels, out_channels, stride):
70 | super(MBConv3_5x5, self).__init__()
71 | mid_channels = int(3 * in_channels)
72 |
73 | self.block = nn.Sequential(
74 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
75 | nn.BatchNorm2d(mid_channels),
76 | nn.ReLU6(),
77 |
78 | nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False),
79 | nn.BatchNorm2d(mid_channels),
80 | nn.ReLU6(),
81 |
82 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
83 | nn.BatchNorm2d(out_channels)
84 | )
85 |
86 | self.use_skip_connect = (1 == stride and in_channels == out_channels)
87 |
88 | def forward(self, x):
89 | if self.use_skip_connect:
90 | return self.block(x) + x
91 | else:
92 | return self.block(x)
93 |
94 | class MBConv6_3x3(nn.Module):
95 | def __init__(self, in_channels, out_channels, stride):
96 | super(MBConv6_3x3, self).__init__()
97 | mid_channels = int(6 * in_channels)
98 |
99 | self.block = nn.Sequential(
100 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
101 | nn.BatchNorm2d(mid_channels),
102 | nn.ReLU6(),
103 |
104 | nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False),
105 | nn.BatchNorm2d(mid_channels),
106 | nn.ReLU6(),
107 |
108 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
109 | nn.BatchNorm2d(out_channels)
110 | )
111 |
112 | self.use_skip_connect = (1 == stride and in_channels == out_channels)
113 |
114 | def forward(self, x):
115 | if self.use_skip_connect:
116 | return self.block(x) + x
117 | else:
118 | return self.block(x)
119 |
120 | class MBConv6_5x5(nn.Module):
121 | def __init__(self, in_channels, out_channels, stride):
122 | super(MBConv6_5x5, self).__init__()
123 | mid_channels = int(6 * in_channels/1.125)
124 |
125 | self.block1 = nn.Sequential(
126 | nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
127 | nn.BatchNorm2d(mid_channels),
128 | nn.ReLU6(),
129 | )
130 | self.block2 = nn.Sequential(
131 | nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False),
132 | nn.BatchNorm2d(mid_channels),
133 | nn.ReLU6()
134 | )
135 | self.block3 = nn.Sequential(
136 | nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
137 | nn.BatchNorm2d(out_channels)
138 | )
139 |
140 | self.use_skip_connect = (1 == stride and in_channels == out_channels)
141 |
142 | def forward(self, x):
143 | if self.use_skip_connect:
144 | x1=self.block1(x)
145 | x1=self.block2(x1)
146 | last_fm_list.append(x1)
147 | x1=self.block3(x1)
148 |
149 | return x1 + x
150 | else:
151 | x1=self.block1(x)
152 | result_list.append(x1)
153 | x1=self.block2(x1)
154 |
155 |
156 | x1=self.block3(x1)
157 |
158 | return x1
159 | class MnasNet(nn.Module):
160 | def __init__(self, num_classes=1000, width_mult=1.):
161 | super(MnasNet, self).__init__()
162 |
163 | self.out_channels = int(1280 * width_mult)
164 |
165 | self.conv1 = Conv_3x3(3, int(32 * width_mult), 2)
166 | self.conv2 = SepConv_3x3(int(32 * width_mult), int(16 * width_mult), 1)
167 |
168 | self.feature = nn.Sequential(
169 | self._make_layer(MBConv3_3x3, 3, int(16 * width_mult), int(24 * width_mult), 2),
170 | self._make_layer(MBConv3_5x5, 3, int(24 * width_mult), int(48 * width_mult), 2),
171 | self._make_layer(MBConv6_5x5, 3, int(48 * width_mult), int(80 * width_mult), 2),
172 | self._make_layer(MBConv6_3x3, 2, int(80 * width_mult), int(96 * width_mult), 1),
173 | self._make_layer(MBConv6_5x5, 4, int(96 * width_mult), int(192 * width_mult), 2)
174 | # self._make_layer(MBConv6_3x3, 1, int(192 * width_mult), int(320 * width_mult), 1)
175 | )
176 |
177 |
178 | self._initialize_weights()
179 |
180 | def _make_layer(self, block, blocks, in_channels, out_channels, stride=1):
181 | strides = [stride] + [1] * (blocks - 1)
182 | layers = []
183 | for _stride in strides:
184 | layers.append(block(in_channels, out_channels, _stride))
185 | in_channels = out_channels
186 |
187 | return nn.Sequential(*layers)
188 |
189 | def _initialize_weights(self):
190 | for m in self.modules():
191 | if isinstance(m, nn.Conv2d):
192 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
193 | m.weight.data.normal_(0, math.sqrt(2. / n))
194 | if m.bias is not None:
195 | m.bias.data.zero_()
196 | elif isinstance(m, nn.BatchNorm2d):
197 | m.weight.data.fill_(1)
198 | m.bias.data.zero_()
199 | elif isinstance(m, nn.Linear):
200 | n = m.weight.size(1)
201 | m.weight.data.normal_(0, 0.01)
202 | m.bias.data.zero_()
203 |
204 | def forward(self, x):
205 | # global result_list
206 | x = self.conv2(self.conv1(x))
207 | x = self.feature(x)
208 | result=OrderedDict()
209 | result_list.append(last_fm_list[-1])
210 | result[0]=result_list[0]
211 | result[1]=result_list[1]
212 | result[2]=result_list[2]
213 | return result
214 |
215 | if __name__ == '__main__':
216 | net = MnasNet(width_mult=0.25)
217 | x = torch.randn(1,3,320,320)
218 | net(x)
219 |
220 |
--------------------------------------------------------------------------------
/mobile.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import torch.nn as nn
3 | import torch
4 | class mobileV1(nn.Module):
5 | def __init__(self):
6 | super(mobileV1, self).__init__()
7 |
8 | self.mobilenet0_conv0 = nn.Sequential(
9 | nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=2, padding=1, bias=False),
10 | nn.BatchNorm2d(num_features=8, momentum=0.9),
11 | nn.ReLU(inplace=True))
12 |
13 | self.mobilenet0_conv1 = nn.Sequential(
14 | nn.Conv2d(in_channels=8, out_channels=8, kernel_size=3, stride=1, padding=1, groups=8, bias=False),
15 | nn.BatchNorm2d(num_features=8, momentum=0.9),
16 | nn.ReLU(inplace=True))
17 |
18 | self.mobilenet0_conv2 = nn.Sequential(
19 | nn.Conv2d(in_channels=8, out_channels=16, kernel_size=1, stride=1, padding=0, bias=False),
20 | nn.BatchNorm2d(num_features=16, momentum=0.9),
21 | nn.ReLU(inplace=True))
22 |
23 | self.mobilenet0_conv3 = nn.Sequential(
24 | nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=2, padding=1, groups=16, bias=False),
25 | nn.BatchNorm2d(num_features=16, momentum=0.9),
26 | nn.ReLU(inplace=True))
27 |
28 | self.mobilenet0_conv4 = nn.Sequential(
29 | nn.Conv2d(in_channels=16, out_channels=32, kernel_size=1, stride=1, padding=0, bias=False),
30 | nn.BatchNorm2d(num_features=32, momentum=0.9),
31 | nn.ReLU(inplace=True))
32 |
33 | self.mobilenet0_conv5 = nn.Sequential(
34 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1, groups=32, bias=False),
35 | nn.BatchNorm2d(num_features=32, momentum=0.9),
36 | nn.ReLU(inplace=True))
37 |
38 | self.mobilenet0_conv6 = nn.Sequential(
39 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=1, stride=1, padding=0, bias=False),
40 | nn.BatchNorm2d(num_features=32, momentum=0.9),
41 | nn.ReLU(inplace=True))
42 |
43 | self.mobilenet0_conv7 = nn.Sequential(
44 | nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1, groups=32, bias=False),
45 | nn.BatchNorm2d(num_features=32, momentum=0.9),
46 | nn.ReLU(inplace=True))
47 |
48 | self.mobilenet0_conv8 = nn.Sequential(
49 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=1, stride=1, padding=0, bias=False),
50 | nn.BatchNorm2d(num_features=64, momentum=0.9),
51 | nn.ReLU(inplace=True))
52 |
53 | self.mobilenet0_conv9 = nn.Sequential(
54 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, groups=64, bias=False),
55 | nn.BatchNorm2d(num_features=64, momentum=0.9),
56 | nn.ReLU(inplace=True))
57 |
58 | self.mobilenet0_conv10 = nn.Sequential(
59 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0, bias=False),
60 | nn.BatchNorm2d(num_features=64, momentum=0.9),
61 | nn.ReLU(inplace=True))
62 |
63 | self.mobilenet0_conv11 = nn.Sequential(
64 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=2, padding=1, groups=64, bias=False),
65 | nn.BatchNorm2d(num_features=64, momentum=0.9),
66 | nn.ReLU(inplace=True))
67 |
68 | self.mobilenet0_conv12 = nn.Sequential(
69 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False),
70 | nn.BatchNorm2d(num_features=128, momentum=0.9),
71 | nn.ReLU(inplace=True))
72 |
73 | self.mobilenet0_conv13 = nn.Sequential(
74 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False),
75 | nn.BatchNorm2d(num_features=128, momentum=0.9),
76 | nn.ReLU(inplace=True))
77 |
78 | self.mobilenet0_conv14 = nn.Sequential(
79 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False),
80 | nn.BatchNorm2d(num_features=128, momentum=0.9),
81 | nn.ReLU(inplace=True))
82 |
83 | self.mobilenet0_conv15 = nn.Sequential(
84 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False),
85 | nn.BatchNorm2d(num_features=128),
86 | nn.ReLU(inplace=True))
87 |
88 | self.mobilenet0_conv16 = nn.Sequential(
89 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False),
90 | nn.BatchNorm2d(num_features=128, momentum=0.9),
91 | nn.ReLU(inplace=True))
92 |
93 | self.mobilenet0_conv17 = nn.Sequential(
94 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False),
95 | nn.BatchNorm2d(num_features=128, momentum=0.9),
96 | nn.ReLU(inplace=True))
97 |
98 | self.mobilenet0_conv18 = nn.Sequential(
99 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False),
100 | nn.BatchNorm2d(num_features=128, momentum=0.9),
101 | nn.ReLU(inplace=True))
102 |
103 | self.mobilenet0_conv19 = nn.Sequential(
104 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False),
105 | nn.BatchNorm2d(num_features=128, momentum=0.9),
106 | nn.ReLU(inplace=True))
107 |
108 | self.mobilenet0_conv20 = nn.Sequential(
109 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False),
110 | nn.BatchNorm2d(num_features=128, momentum=0.9),
111 | nn.ReLU(inplace=True))
112 |
113 | self.mobilenet0_conv21 = nn.Sequential(
114 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128, bias=False),
115 | nn.BatchNorm2d(num_features=128, momentum=0.9),
116 | nn.ReLU(inplace=True))
117 |
118 | self.mobilenet0_conv22 = nn.Sequential(
119 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False),
120 | nn.BatchNorm2d(num_features=128, momentum=0.9),
121 | nn.ReLU(inplace=True))
122 |
123 | self.mobilenet0_conv23 = nn.Sequential(
124 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1, groups=128, bias=False),
125 | nn.BatchNorm2d(num_features=128, momentum=0.9),
126 | nn.ReLU(inplace=True))
127 |
128 | self.mobilenet0_conv24 = nn.Sequential(
129 | nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False),
130 | nn.BatchNorm2d(num_features=256, momentum=0.9),
131 | nn.ReLU(inplace=True))
132 |
133 | self.mobilenet0_conv25 = nn.Sequential(
134 | nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256, bias=False),
135 | nn.BatchNorm2d(num_features=256, momentum=0.9),
136 | nn.ReLU(inplace=True))
137 |
138 | self.mobilenet0_conv26 = nn.Sequential(
139 | nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False),
140 | nn.BatchNorm2d(num_features=256, momentum=0.9),
141 | nn.ReLU(inplace=True))
142 | def forward(self, x):
143 | result_=OrderedDict()
144 | batchsize = x.shape[0]
145 | # k1=F.interpolate(k,(512,512),mode='nearest')
146 | x = self.mobilenet0_conv0(x)
147 | x = self.mobilenet0_conv1(x)
148 | x = self.mobilenet0_conv2(x)
149 | x = self.mobilenet0_conv3(x)
150 | x = self.mobilenet0_conv4(x)
151 | x = self.mobilenet0_conv5(x)
152 | x = self.mobilenet0_conv6(x)
153 | x = self.mobilenet0_conv7(x)
154 | x = self.mobilenet0_conv8(x)
155 | x = self.mobilenet0_conv9(x)
156 | x10 = self.mobilenet0_conv10(x)
157 | x = self.mobilenet0_conv11(x10)
158 | x = self.mobilenet0_conv12(x)
159 | x = self.mobilenet0_conv13(x)
160 | x = self.mobilenet0_conv14(x)
161 | x = self.mobilenet0_conv15(x)
162 | x = self.mobilenet0_conv16(x)
163 | x = self.mobilenet0_conv17(x)
164 | x = self.mobilenet0_conv18(x)
165 | x = self.mobilenet0_conv19(x)
166 | x = self.mobilenet0_conv20(x)
167 | x = self.mobilenet0_conv21(x)
168 | x22 = self.mobilenet0_conv22(x)
169 | x = self.mobilenet0_conv23(x22)
170 | x = self.mobilenet0_conv24(x)
171 | x = self.mobilenet0_conv25(x)
172 | x26 = self.mobilenet0_conv26(x)
173 | result_[1]=x10
174 | result_[2]=x22
175 | result_[3]=x26
176 | return result_
177 | if __name__ == "__main__":
178 | from thop import profile
179 | net = mobileV1()
180 | from thop import profile
181 |
182 | from thop import clever_format
183 | # x = torch.randn(1,3,320,320)
184 | input = torch.randn(1, 3, 224, 224)
185 | flops, params = profile(net, inputs=(input, ))
186 | flops, params = clever_format([flops, params], "%.3f")
187 | print(params)
188 | print(flops)
189 |
--------------------------------------------------------------------------------
/mobile_testing.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import torch.nn as nn
3 | import torch
4 | class mobileV1(nn.Module):
5 | def __init__(self):
6 | super(mobileV1, self).__init__()
7 |
8 | self.mmm = nn.Sequential(
9 | nn.Conv2d(in_channels=3, out_channels=32*4, kernel_size=7, stride=4, padding=2, bias=False),
10 | nn.BatchNorm2d(num_features=32*4, momentum=0.9),
11 | nn.ReLU(inplace=True))
12 |
13 |
14 | self.mmm1 = nn.Sequential(
15 | nn.Conv2d(in_channels=3, out_channels=3, kernel_size=7, stride=4, padding=2, bias=False),
16 | nn.BatchNorm2d(num_features=3, momentum=0.9),
17 | nn.ReLU(inplace=True))
18 | self.mmm2 = nn.Sequential(
19 | nn.Conv2d(in_channels=3, out_channels=32*4, kernel_size=1, stride=1, padding=0, bias=False),
20 | nn.BatchNorm2d(num_features=32*4, momentum=0.9),
21 | nn.ReLU(inplace=True))
22 |
23 | self.mobilenet0_conv0 = nn.Sequential(
24 | nn.Conv2d(in_channels=3, out_channels=8*4, kernel_size=3, stride=2, padding=1, bias=False),
25 | nn.BatchNorm2d(num_features=8*4, momentum=0.9),
26 | nn.ReLU(inplace=True))
27 |
28 | self.mobilenet0_conv1 = nn.Sequential(
29 | nn.Conv2d(in_channels=8*4, out_channels=8*4, kernel_size=3, stride=1, padding=1, groups=8*4, bias=False),
30 | nn.BatchNorm2d(num_features=8*4, momentum=0.9),
31 | nn.ReLU(inplace=True))
32 |
33 | self.mobilenet0_conv2 = nn.Sequential(
34 | nn.Conv2d(in_channels=8*4, out_channels=16*4, kernel_size=1, stride=1, padding=0, bias=False),
35 | nn.BatchNorm2d(num_features=16*4, momentum=0.9),
36 | nn.ReLU(inplace=True))
37 |
38 | self.mobilenet0_conv3 = nn.Sequential(
39 | nn.Conv2d(in_channels=16*4, out_channels=16*4, kernel_size=3, stride=2, padding=1, groups=16*4, bias=False),
40 | nn.BatchNorm2d(num_features=16*4, momentum=0.9),
41 | nn.ReLU(inplace=True))
42 |
43 | self.mobilenet0_conv4 = nn.Sequential(
44 | nn.Conv2d(in_channels=16*4, out_channels=32*4, kernel_size=1, stride=1, padding=0, bias=False),
45 | nn.BatchNorm2d(num_features=32*4, momentum=0.9),
46 | nn.ReLU(inplace=True))
47 |
48 | self.mobilenet0_conv5 = nn.Sequential(
49 | nn.Conv2d(in_channels=32*4, out_channels=32*4, kernel_size=3, stride=1, padding=1, groups=32*4, bias=False),
50 | nn.BatchNorm2d(num_features=32*4, momentum=0.9),
51 | nn.ReLU(inplace=True))
52 |
53 | self.mobilenet0_conv6 = nn.Sequential(
54 | nn.Conv2d(in_channels=32*4, out_channels=32*4, kernel_size=1, stride=1, padding=0, bias=False),
55 | nn.BatchNorm2d(num_features=32*4, momentum=0.9),
56 | nn.ReLU(inplace=True))
57 |
58 | self.mobilenet0_conv7 = nn.Sequential(
59 | nn.Conv2d(in_channels=32*4, out_channels=32*4, kernel_size=3, stride=2, padding=1, groups=32*4, bias=False),
60 | nn.BatchNorm2d(num_features=32*4, momentum=0.9),
61 | nn.ReLU(inplace=True))
62 |
63 | self.mobilenet0_conv8 = nn.Sequential(
64 | nn.Conv2d(in_channels=32*4, out_channels=64*4, kernel_size=1, stride=1, padding=0, bias=False),
65 | nn.BatchNorm2d(num_features=64*4, momentum=0.9),
66 | nn.ReLU(inplace=True))
67 |
68 | self.mobilenet0_conv9 = nn.Sequential(
69 | nn.Conv2d(in_channels=64*4, out_channels=64*4, kernel_size=3, stride=1, padding=1, groups=64*4, bias=False),
70 | nn.BatchNorm2d(num_features=64*4, momentum=0.9),
71 | nn.ReLU(inplace=True))
72 |
73 | self.mobilenet0_conv10 = nn.Sequential(
74 | nn.Conv2d(in_channels=64*4, out_channels=64*4, kernel_size=1, stride=1, padding=0, bias=False),
75 | nn.BatchNorm2d(num_features=64*4, momentum=0.9),
76 | nn.ReLU(inplace=True))
77 |
78 | self.mobilenet0_conv11 = nn.Sequential(
79 | nn.Conv2d(in_channels=64*4, out_channels=64*4, kernel_size=3, stride=2, padding=1, groups=64*4, bias=False),
80 | nn.BatchNorm2d(num_features=64*4, momentum=0.9),
81 | nn.ReLU(inplace=True))
82 |
83 | self.mobilenet0_conv12 = nn.Sequential(
84 | nn.Conv2d(in_channels=64*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False),
85 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
86 | nn.ReLU(inplace=True))
87 |
88 | self.mobilenet0_conv13 = nn.Sequential(
89 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False),
90 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
91 | nn.ReLU(inplace=True))
92 |
93 | self.mobilenet0_conv14 = nn.Sequential(
94 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False),
95 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
96 | nn.ReLU(inplace=True))
97 |
98 | self.mobilenet0_conv15 = nn.Sequential(
99 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False),
100 | nn.BatchNorm2d(num_features=128*4),
101 | nn.ReLU(inplace=True))
102 |
103 | self.mobilenet0_conv16 = nn.Sequential(
104 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False),
105 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
106 | nn.ReLU(inplace=True))
107 |
108 | self.mobilenet0_conv17 = nn.Sequential(
109 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False),
110 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
111 | nn.ReLU(inplace=True))
112 |
113 | self.mobilenet0_conv18 = nn.Sequential(
114 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False),
115 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
116 | nn.ReLU(inplace=True))
117 |
118 | self.mobilenet0_conv19 = nn.Sequential(
119 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False),
120 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
121 | nn.ReLU(inplace=True))
122 |
123 | self.mobilenet0_conv20 = nn.Sequential(
124 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False),
125 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
126 | nn.ReLU(inplace=True))
127 |
128 | self.mobilenet0_conv21 = nn.Sequential(
129 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=1, padding=1, groups=128*4, bias=False),
130 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
131 | nn.ReLU(inplace=True))
132 |
133 | self.mobilenet0_conv22 = nn.Sequential(
134 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=1, stride=1, padding=0, bias=False),
135 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
136 | nn.ReLU(inplace=True))
137 |
138 | self.mobilenet0_conv23 = nn.Sequential(
139 | nn.Conv2d(in_channels=128*4, out_channels=128*4, kernel_size=3, stride=2, padding=1, groups=128*4, bias=False),
140 | nn.BatchNorm2d(num_features=128*4, momentum=0.9),
141 | nn.ReLU(inplace=True))
142 |
143 | self.mobilenet0_conv24 = nn.Sequential(
144 | nn.Conv2d(in_channels=128*4, out_channels=256*4, kernel_size=1, stride=1, padding=0, bias=False),
145 | nn.BatchNorm2d(num_features=256*4, momentum=0.9),
146 | nn.ReLU(inplace=True))
147 |
148 | self.mobilenet0_conv25 = nn.Sequential(
149 | nn.Conv2d(in_channels=256*4, out_channels=256*4, kernel_size=3, stride=1, padding=1, groups=256*4, bias=False),
150 | nn.BatchNorm2d(num_features=256*4, momentum=0.9),
151 | nn.ReLU(inplace=True))
152 |
153 | self.mobilenet0_conv26 = nn.Sequential(
154 | nn.Conv2d(in_channels=256*4, out_channels=256*4, kernel_size=1, stride=1, padding=0, bias=False),
155 | nn.BatchNorm2d(num_features=256*4, momentum=0.9),
156 | nn.ReLU(inplace=True))
157 | def forward(self, x):
158 | result=OrderedDict()
159 | batchsize = x.shape[0]
160 | # k1=F.interpolate(k,(512,512),mode='nearest')
161 | # x = self.mobilenet0_conv0(x)
162 | # x = self.mobilenet0_conv1(x)
163 | # x = self.mobilenet0_conv2(x)
164 |
165 | # x = self.mobilenet0_conv3(x)
166 | # x = self.mobilenet0_conv4(x)
167 | # x=self.mmm1(x)
168 | x=self.mmm(x)
169 | # print(x.shape)
170 | x = self.mobilenet0_conv5(x)
171 | x = self.mobilenet0_conv6(x)
172 | x = self.mobilenet0_conv7(x)
173 | x = self.mobilenet0_conv8(x)
174 | x = self.mobilenet0_conv9(x)
175 | x10 = self.mobilenet0_conv10(x)
176 | x = self.mobilenet0_conv11(x10)
177 | x = self.mobilenet0_conv12(x)
178 | x = self.mobilenet0_conv13(x)
179 | x = self.mobilenet0_conv14(x)
180 | x = self.mobilenet0_conv15(x)
181 | x = self.mobilenet0_conv16(x)
182 | x = self.mobilenet0_conv17(x)
183 | x = self.mobilenet0_conv18(x)
184 | x = self.mobilenet0_conv19(x)
185 | x = self.mobilenet0_conv20(x)
186 | x = self.mobilenet0_conv21(x)
187 | x22 = self.mobilenet0_conv22(x)
188 | x = self.mobilenet0_conv23(x22)
189 | x = self.mobilenet0_conv24(x)
190 | x = self.mobilenet0_conv25(x)
191 | x26 = self.mobilenet0_conv26(x)
192 | result[1]=x10
193 | result[2]=x22
194 | result[3]=x26
195 | return result
196 | if __name__ == '__main__':
197 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
198 | net = mobileV1().to(device)
199 |
200 | #print(net)
201 | import time
202 | x = torch.randn(1,3,640,640).to(device)
203 | torch.cuda.synchronize()
204 | start=time.time()
205 | for i in range(10):
206 | net(x)
207 | torch.cuda.synchronize()
208 | print(time.time()-start)
209 | torch.save(net.state_dict(),'aaa.torch')
210 |
211 |
212 | # import torch
213 | # import torch.nn as nn
214 | # import torch.nn.functional as F
215 | # import math
216 | # import datetime
217 |
218 | # def Conv_3x3(in_channels, out_channels, stride):
219 | # return nn.Sequential(
220 | # nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False),
221 | # nn.BatchNorm2d(out_channels),
222 | # nn.ReLU6()
223 | # )
224 |
225 | # def Conv_1x1(in_channels, out_channels, stride):
226 | # return nn.Sequential(
227 | # nn.Conv2d(in_channels, out_channels, 1, stride, 0, bias=False),
228 | # nn.BatchNorm2d(out_channels),
229 | # nn.ReLU6()
230 | # )
231 |
232 | # def SepConv_3x3(in_channels, out_channels, stride):
233 | # return nn.Sequential(
234 | # nn.Conv2d(in_channels, in_channels, 3, stride, 1, groups=in_channels, bias=False),
235 | # nn.BatchNorm2d(in_channels),
236 | # nn.ReLU6(),
237 |
238 | # nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
239 | # nn.BatchNorm2d(out_channels)
240 | # )
241 |
242 | # class MBConv3_3x3(nn.Module):
243 | # def __init__(self, in_channels, out_channels, stride):
244 | # super(MBConv3_3x3, self).__init__()
245 | # mid_channels = int(3 * in_channels)
246 |
247 | # self.block = nn.Sequential(
248 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
249 | # nn.BatchNorm2d(mid_channels),
250 | # nn.ReLU6(),
251 |
252 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False),
253 | # nn.BatchNorm2d(mid_channels),
254 | # nn.ReLU6(),
255 |
256 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
257 | # nn.BatchNorm2d(out_channels)
258 | # )
259 |
260 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
261 |
262 | # def forward(self, x):
263 | # if self.use_skip_connect:
264 | # return self.block(x) + x
265 | # else:
266 | # return self.block(x)
267 |
268 | # class MBConv3_5x5(nn.Module):
269 | # def __init__(self, in_channels, out_channels, stride):
270 | # super(MBConv3_5x5, self).__init__()
271 | # mid_channels = int(3 * in_channels)
272 |
273 | # self.block = nn.Sequential(
274 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
275 | # nn.BatchNorm2d(mid_channels),
276 | # nn.ReLU6(),
277 |
278 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False),
279 | # nn.BatchNorm2d(mid_channels),
280 | # nn.ReLU6(),
281 |
282 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
283 | # nn.BatchNorm2d(out_channels)
284 | # )
285 |
286 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
287 |
288 | # def forward(self, x):
289 | # if self.use_skip_connect:
290 | # return self.block(x) + x
291 | # else:
292 | # return self.block(x)
293 |
294 | # class MBConv6_3x3(nn.Module):
295 | # def __init__(self, in_channels, out_channels, stride):
296 | # super(MBConv6_3x3, self).__init__()
297 | # mid_channels = int(6 * in_channels)
298 |
299 | # self.block = nn.Sequential(
300 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
301 | # nn.BatchNorm2d(mid_channels),
302 | # nn.ReLU6(),
303 |
304 | # nn.Conv2d(mid_channels, mid_channels, 3, stride, 1, groups=mid_channels, bias=False),
305 | # nn.BatchNorm2d(mid_channels),
306 | # nn.ReLU6(),
307 |
308 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
309 | # nn.BatchNorm2d(out_channels)
310 | # )
311 |
312 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
313 |
314 | # def forward(self, x):
315 | # if self.use_skip_connect:
316 | # return self.block(x) + x
317 | # else:
318 | # return self.block(x)
319 |
320 | # class MBConv6_5x5(nn.Module):
321 | # def __init__(self, in_channels, out_channels, stride):
322 | # super(MBConv6_5x5, self).__init__()
323 | # mid_channels = int(6 * in_channels)
324 |
325 | # self.block = nn.Sequential(
326 | # nn.Conv2d(in_channels, mid_channels, 1, 1, 0, bias=False),
327 | # nn.BatchNorm2d(mid_channels),
328 | # nn.ReLU6(),
329 |
330 | # nn.Conv2d(mid_channels, mid_channels, 5, stride, 2, groups=mid_channels, bias=False),
331 | # nn.BatchNorm2d(mid_channels),
332 | # nn.ReLU6(),
333 |
334 | # nn.Conv2d(mid_channels, out_channels, 1, 1, 0, bias=False),
335 | # nn.BatchNorm2d(out_channels)
336 | # )
337 |
338 | # self.use_skip_connect = (1 == stride and in_channels == out_channels)
339 |
340 | # def forward(self, x):
341 | # if self.use_skip_connect:
342 | # return self.block(x) + x
343 | # else:
344 | # return self.block(x)
345 |
346 | # class MnasNet(nn.Module):
347 | # def __init__(self, width_mult=1.):
348 | # super(MnasNet, self).__init__()
349 |
350 | # self.out_channels = int(1280 * width_mult)
351 |
352 | # self.conv1 = Conv_3x3(3, int(32 * width_mult), 2)
353 | # self.conv2 = SepConv_3x3(int(32 * width_mult), int(16 * width_mult), 1)
354 |
355 | # self.feature1 = nn.Sequential(
356 | # self._make_layer(MBConv3_3x3, 3, int(16 * width_mult), int(24 * width_mult), 2),
357 | # self._make_layer(MBConv3_5x5, 3, int(24 * width_mult), int(64 * width_mult), 2)
358 |
359 | # )
360 | # self.feature2=nn.Sequential(
361 |
362 | # self._make_layer(MBConv6_5x5, 3, int(64 * width_mult), int(80 * width_mult), 2),
363 | # self._make_layer(MBConv6_3x3, 2, int(80 * width_mult), int(128 * width_mult), 1)
364 |
365 | # )
366 | # self.feature3=nn.Sequential(
367 |
368 | # self._make_layer(MBConv6_5x5, 4, int(128 * width_mult), int(192 * width_mult), 2),
369 | # self._make_layer(MBConv6_3x3, 1, int(192 * width_mult), int(256 * width_mult), 1))
370 |
371 | # self._initialize_weights()
372 |
373 | # def _make_layer(self, block, blocks, in_channels, out_channels, stride=1):
374 | # strides = [stride] + [1] * (blocks - 1)
375 | # layers = []
376 | # for _stride in strides:
377 | # layers.append(block(in_channels, out_channels, _stride))
378 | # in_channels = out_channels
379 |
380 | # return nn.Sequential(*layers)
381 |
382 | # def _initialize_weights(self):
383 | # for m in self.modules():
384 | # if isinstance(m, nn.Conv2d):
385 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
386 | # m.weight.data.normal_(0, math.sqrt(2. / n))
387 | # if m.bias is not None:
388 | # m.bias.data.zero_()
389 | # elif isinstance(m, nn.BatchNorm2d):
390 | # m.weight.data.fill_(1)
391 | # m.bias.data.zero_()
392 | # elif isinstance(m, nn.Linear):
393 | # n = m.weight.size(1)
394 | # m.weight.data.normal_(0, 0.01)
395 | # m.bias.data.zero_()
396 |
397 | # def forward(self, x):
398 | # x = self.conv2(self.conv1(x))
399 | # # print(x.shape)
400 | # x = self.feature1(x)
401 | # # print(x.shape)
402 | # x = self.feature2(x)
403 | # # print(x.shape)
404 | # x = self.feature3(x)
405 | # # print(x.shape)
406 |
407 | # return x
408 |
409 | # if __name__ == '__main__':
410 | # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
411 | # net = MnasNet().to(device)
412 | # #print(net)
413 | # import time
414 | # torch.cuda.synchronize()
415 | # x = torch.randn(1,3,640,640).to(device)
416 | # start=time.time()
417 | # for i in range(10):
418 | # net(x)
419 | # torch.cuda.synchronize()
420 | # print(time.time()-start)
421 |
422 | # torch.save(net.state_dict(),'aaa.torch')
423 | # # print(net)
424 | # #print(y)
425 |
426 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | import math
4 | import time
5 | import torch.utils.model_zoo as model_zoo
6 | from utils import BasicBlock, Bottleneck, RegressionTransform
7 | from anchors import Anchors
8 | import losses
9 |
10 | model_urls = {
11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
16 | }
17 |
18 | class PyramidFeatures(nn.Module):
19 | def __init__(self, C2_size, C3_size, C4_size, C5_size, feature_size=256):
20 | super(PyramidFeatures, self).__init__()
21 |
22 | # upsample C5 to get P5 from the FPN paper
23 | self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
24 | self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
25 | self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
26 |
27 | # add P5 elementwise to C4
28 | self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
29 | self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
30 | self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
31 |
32 | # add P4 elementwise to C3
33 | self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
34 | self.P3_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
35 | self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
36 |
37 | # "P6 is obtained via a 3x3 stride-2 conv on C5"
38 | self.P6 = nn.Conv2d(C5_size, feature_size, kernel_size=3, stride=2, padding=1)
39 |
40 | # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
41 | # Retinaface does not need P7
42 | # self.P7_1 = nn.ReLU()
43 | # self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
44 |
45 | # solve C2
46 | self.P2_1 = nn.Conv2d(C2_size, feature_size, kernel_size=1, stride=1, padding=0)
47 | self.P2_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
48 |
49 | def forward(self, inputs):
50 |
51 | C2, C3, C4, C5 = inputs
52 |
53 | P5_x = self.P5_1(C5)
54 | P5_upsampled_x = self.P5_upsampled(P5_x)
55 | P5_x = self.P5_2(P5_x)
56 |
57 | P4_x = self.P4_1(C4)
58 | P4_x = P5_upsampled_x + P4_x
59 | P4_upsampled_x = self.P4_upsampled(P4_x)
60 | P4_x = self.P4_2(P4_x)
61 |
62 | P3_x = self.P3_1(C3)
63 | P3_x = P3_x + P4_upsampled_x
64 | P3_upsampled_x = self.P3_upsampled(P3_x)
65 | P3_x = self.P3_2(P3_x)
66 |
67 | P2_x = self.P2_1(C2)
68 | P2_x = P2_x + P3_upsampled_x
69 | P2_x = self.P2_2(P2_x)
70 |
71 | P6_x = self.P6(C5)
72 |
73 | return [P2_x, P3_x, P4_x, P5_x, P6_x]
74 |
75 | class ClassHead(nn.Module):
76 | def __init__(self,inchannels=512,num_anchors=3):
77 | super(ClassHead,self).__init__()
78 | self.num_anchors = num_anchors
79 | self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1)
80 |
81 | # if use focal loss instead of OHEM
82 | #self.output_act = nn.Sigmoid()
83 |
84 | # if use OHEM
85 | self.output_act = nn.LogSoftmax(dim=-1)
86 |
87 |
88 | def forward(self,x):
89 | out = self.conv1x1(x)
90 | out = out.permute(0,2,3,1)
91 | b, h, w, c = out.shape
92 | out = out.view(b, h, w, self.num_anchors, 2)
93 | #out = out.permute(0,2,3,1).contiguous().view(out.shape[0], -1, 2)
94 | out = self.output_act(out)
95 |
96 | return out.contiguous().view(out.shape[0], -1, 2)
97 |
98 | class BboxHead(nn.Module):
99 | def __init__(self,inchannels=512,num_anchors=3):
100 | super(BboxHead,self).__init__()
101 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1)
102 |
103 | def forward(self,x):
104 | out = self.conv1x1(x)
105 | out = out.permute(0,2,3,1)
106 |
107 | return out.contiguous().view(out.shape[0], -1, 4)
108 |
109 | class LandmarkHead(nn.Module):
110 | def __init__(self,inchannels=512,num_anchors=3):
111 | super(LandmarkHead,self).__init__()
112 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1)
113 |
114 | def forward(self,x):
115 | out = self.conv1x1(x)
116 | out = out.permute(0,2,3,1)
117 |
118 | return out.contiguous().view(out.shape[0], -1, 10)
119 |
120 |
121 | class ClassHead_(nn.Module):
122 | def __init__(self,inchannels=256,num_anchors=3):
123 | super(ClassHead_,self).__init__()
124 | self.num_anchors = num_anchors
125 | self.feature_head = self._make_head(self.num_anchors*2)
126 | self.output_act = nn.LogSoftmax(dim=-1)
127 |
128 | def _make_head(self,out_size):
129 | layers = []
130 | for _ in range(4):
131 | layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True)]
132 | layers += [nn.Conv2d(256, out_size, 3, padding=1)]
133 | return nn.Sequential(*layers)
134 |
135 | def forward(self,x):
136 | out = self.feature_head(x)
137 | out = out.permute(0,2,3,1)
138 | b, h, w, c = out.shape
139 | out = out.view(b, h, w, self.num_anchors, 2)
140 | #out = out.permute(0,2,3,1).contiguous().view(out.shape[0], -1, 2)
141 | out = self.output_act(out)
142 |
143 | return out.contiguous().view(out.shape[0], -1, 2)
144 |
145 | class BboxHead_(nn.Module):
146 | def __init__(self,inchannels=256,num_anchors=3):
147 | super(BboxHead_,self).__init__()
148 | self.num_anchors = num_anchors
149 | self.feature_head = self._make_head(self.num_anchors*4)
150 |
151 | def _make_head(self,out_size):
152 | layers = []
153 | for _ in range(4):
154 | layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True)]
155 | layers += [nn.Conv2d(256, out_size, 3, padding=1)]
156 | return nn.Sequential(*layers)
157 |
158 | def forward(self,x):
159 | out = self.feature_head(x)
160 | out = out.permute(0,2,3,1)
161 |
162 | return out.contiguous().view(out.shape[0], -1, 4)
163 |
164 | class LandmarkHead_(nn.Module):
165 | def __init__(self,inchannels=256,num_anchors=3):
166 | super(LandmarkHead_,self).__init__()
167 | self.num_anchors = num_anchors
168 | self.feature_head = self._make_head(self.num_anchors*10)
169 |
170 | def _make_head(self,out_size):
171 | layers = []
172 | for _ in range(4):
173 | layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True)]
174 | layers += [nn.Conv2d(256, out_size, 3, padding=1)]
175 | return nn.Sequential(*layers)
176 |
177 | def forward(self,x):
178 | out = self.feature_head(x)
179 | out = out.permute(0,2,3,1)
180 |
181 | return out.contiguous().view(out.shape[0], -1, 10)
182 |
183 |
184 | class CBR(nn.Module):
185 | def __init__(self,inchannels,outchannels):
186 | super(CBR,self).__init__()
187 | self.conv3x3 = nn.Conv2d(inchannels,outchannels,kernel_size=3,stride=1,padding=1,bias=False)
188 | self.bn = nn.BatchNorm2d(outchannels)
189 | self.relu = nn.ReLU(inplace=True)
190 |
191 | for m in self.modules():
192 | if isinstance(m, nn.BatchNorm2d):
193 | nn.init.constant_(m.weight, 1)
194 | nn.init.constant_(m.bias, 0)
195 | if isinstance(m, nn.Conv2d):
196 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
197 | #nn.init.normal_(m.weight, std=0.01)
198 |
199 | def forward(self,x):
200 | x = self.conv3x3(x)
201 | x = self.bn(x)
202 | x = self.relu(x)
203 |
204 | return x
205 |
206 | class CB(nn.Module):
207 | def __init__(self,inchannels):
208 | super(CB,self).__init__()
209 | self.conv3x3 = nn.Conv2d(inchannels,inchannels,kernel_size=3,stride=1,padding=1,bias=False)
210 | self.bn = nn.BatchNorm2d(inchannels)
211 |
212 | for m in self.modules():
213 | if isinstance(m, nn.BatchNorm2d):
214 | nn.init.constant_(m.weight, 1)
215 | nn.init.constant_(m.bias, 0)
216 | if isinstance(m, nn.Conv2d):
217 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
218 | #nn.init.normal_(m.weight, std=0.01)
219 |
220 | def forward(self,x):
221 | x = self.conv3x3(x)
222 | x = self.bn(x)
223 |
224 | return x
225 |
226 | class Concat(nn.Module):
227 | def forward(self,*feature):
228 | out = torch.cat(feature,dim=1)
229 | return out
230 |
231 | class Context(nn.Module):
232 | def __init__(self,inchannels=256):
233 | super(Context,self).__init__()
234 | self.context_plain = inchannels//2
235 | self.conv1 = CB(inchannels)
236 | self.conv2 = CBR(inchannels,self.context_plain)
237 | self.conv2_1 = CB(self.context_plain)
238 | self.conv2_2_1 = CBR(self.context_plain,self.context_plain)
239 | self.conv2_2_2 = CB(self.context_plain)
240 | self.concat = Concat()
241 | self.relu = nn.ReLU(inplace=True)
242 |
243 | def forward(self,x):
244 | f1 = self.conv1(x)
245 | f2_ = self.conv2(x)
246 | f2 = self.conv2_1(f2_)
247 | f3 = self.conv2_2_1(f2_)
248 | f3 = self.conv2_2_2(f3)
249 |
250 | #out = torch.cat([f1,f2,f3],dim=1)
251 | out = self.concat(f1,f2,f3)
252 | out = self.relu(out)
253 |
254 | return out
255 |
256 | def initialize_layer(layer):
257 | if isinstance(layer, nn.Conv2d):
258 | nn.init.normal_(layer.weight, std=0.01)
259 | if layer.bias is not None:
260 | nn.init.constant_(layer.bias, val=0)
261 |
262 | class ResNet(nn.Module):
263 |
264 | def __init__(self, num_classes, block, layers, num_anchors=3):
265 | self.inplanes = 64
266 | super(ResNet, self).__init__()
267 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
268 | self.bn1 = nn.BatchNorm2d(64)
269 | self.relu = nn.ReLU(inplace=True)
270 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
271 | self.layer1 = self._make_layer(block, 64, layers[0])
272 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
273 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
274 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
275 |
276 | if block == BasicBlock:
277 | fpn_sizes = [self.layer1[layers[0]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels,
278 | self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
279 | elif block == Bottleneck:
280 | fpn_sizes = [self.layer1[layers[0]-1].conv3.out_channels, self.layer2[layers[1]-1].conv3.out_channels,
281 | self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]
282 |
283 | self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2],fpn_sizes[3])
284 |
285 | self.context = self._make_contextlayer()
286 |
287 | self.clsHead = ClassHead_()
288 | self.bboxHead = BboxHead_()
289 | self.ldmHead = LandmarkHead_()
290 |
291 | # self.clsHead = self._make_class_head()
292 | # self.bboxHead = self._make_bbox_head()
293 | # self.ldmHead = self._make_landmark_head()
294 |
295 | self.anchors = Anchors()
296 |
297 | self.regressBoxes = RegressionTransform()
298 |
299 | self.losslayer = losses.LossLayer()
300 |
301 | self.freeze_bn()
302 |
303 | # initialize head
304 | # self.clsHead.apply(initialize_layer)
305 | # self.bboxHead.apply(initialize_layer)
306 | # self.ldmHead.apply(initialize_layer)
307 |
308 | # initialize context
309 | for layer in self.context:
310 | for m in layer.modules():
311 | if isinstance(m, nn.Conv2d):
312 | nn.init.normal_(m.weight, std=0.01)
313 | if m.bias is not None:
314 | nn.init.constant_(m.bias, 0)
315 | if isinstance(m, nn.BatchNorm2d):
316 | nn.init.constant_(m.weight, 1)
317 | nn.init.constant_(m.bias, 0)
318 |
319 | def _make_contextlayer(self,fpn_num=5,inchannels=256):
320 | context = nn.ModuleList()
321 | for i in range(fpn_num):
322 | context.append(Context())
323 |
324 | return context
325 |
326 | def _make_class_head(self,fpn_num=5,inchannels=512,anchor_num=3):
327 | classhead = nn.ModuleList()
328 | for i in range(fpn_num):
329 | classhead.append(ClassHead(inchannels,anchor_num))
330 | return classhead
331 |
332 | def _make_bbox_head(self,fpn_num=5,inchannels=512,anchor_num=3):
333 | bboxhead = nn.ModuleList()
334 | for i in range(fpn_num):
335 | bboxhead.append(BboxHead(inchannels,anchor_num))
336 | return bboxhead
337 |
338 | def _make_landmark_head(self,fpn_num=5,inchannels=512,anchor_num=3):
339 | landmarkhead = nn.ModuleList()
340 | for i in range(fpn_num):
341 | landmarkhead.append(LandmarkHead(inchannels,anchor_num))
342 | return landmarkhead
343 |
344 |
345 | def _make_layer(self, block, planes, blocks, stride=1):
346 | downsample = None
347 | if stride != 1 or self.inplanes != planes * block.expansion:
348 | downsample = nn.Sequential(
349 | nn.Conv2d(self.inplanes, planes * block.expansion,
350 | kernel_size=1, stride=stride, bias=False),
351 | nn.BatchNorm2d(planes * block.expansion),
352 | )
353 |
354 | layers = []
355 | layers.append(block(self.inplanes, planes, stride, downsample))
356 | self.inplanes = planes * block.expansion
357 | for i in range(1, blocks):
358 | layers.append(block(self.inplanes, planes))
359 |
360 | return nn.Sequential(*layers)
361 |
362 | def freeze_bn(self):
363 | '''Freeze BatchNorm layers.'''
364 | for layer in self.modules():
365 | if isinstance(layer, nn.BatchNorm2d):
366 | layer.eval()
367 |
368 | def freeze_first_layer(self):
369 | '''Freeze First layer'''
370 | for param in self.conv1.parameters():
371 | param.requires_grad = False
372 |
373 |
374 | def forward(self, inputs):
375 |
376 | if self.training:
377 | img_batch, annotations = inputs
378 | else:
379 | img_batch = inputs
380 |
381 | x = self.conv1(img_batch)
382 | x = self.bn1(x)
383 | x = self.relu(x)
384 | x = self.maxpool(x)
385 |
386 | x1 = self.layer1(x)
387 | x2 = self.layer2(x1)
388 | x3 = self.layer3(x2)
389 | x4 = self.layer4(x3)
390 |
391 | features = self.fpn([x1, x2, x3, x4])
392 | #context_features = [self.context[i](feature) for i,feature in enumerate(features)]
393 |
394 | # bbox_regressions = torch.cat([self.bboxHead[i](feature) for i,feature in enumerate(context_features)], dim=1)
395 | # ldm_regressions = torch.cat([self.ldmHead[i](feature) for i,feature in enumerate(context_features)], dim=1)
396 | # classifications = torch.cat([self.clsHead[i](feature) for i,feature in enumerate(context_features)],dim=1)
397 |
398 | bbox_regressions = torch.cat([self.bboxHead(feature) for feature in features], dim=1)
399 | ldm_regressions = torch.cat([self.ldmHead(feature) for feature in features], dim=1)
400 | classifications = torch.cat([self.clsHead(feature) for feature in features],dim=1)
401 |
402 | anchors = self.anchors(img_batch)
403 |
404 | if self.training:
405 | return self.losslayer(classifications, bbox_regressions,ldm_regressions, anchors, annotations)
406 | else:
407 | bboxes, landmarks = self.regressBoxes(anchors, bbox_regressions, ldm_regressions, img_batch)
408 |
409 | return classifications, bboxes, landmarks
410 |
411 | def resnet18(num_classes, pretrained=False, **kwargs):
412 | """Constructs a ResNet-18 model.
413 | Args:
414 | pretrained (bool): If True, returns a model pre-trained on ImageNet
415 | """
416 | model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
417 | if pretrained:
418 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'], model_dir='.'), strict=False)
419 | return model
420 |
421 |
422 | def resnet34(num_classes, pretrained=False, **kwargs):
423 | """Constructs a ResNet-34 model.
424 | Args:
425 | pretrained (bool): If True, returns a model pre-trained on ImageNet
426 | """
427 | model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
428 | if pretrained:
429 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'], model_dir='.'), strict=False)
430 | return model
431 |
432 |
433 | def resnet50(num_classes, pretrained=False, **kwargs):
434 | """Constructs a ResNet-50 model.
435 | Args:
436 | pretrained (bool): If True, returns a model pre-trained on ImageNet
437 | """
438 | model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
439 | if pretrained:
440 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], model_dir='.'), strict=False)
441 | return model
442 |
443 | def resnet101(num_classes, pretrained=False, **kwargs):
444 | """Constructs a ResNet-101 model.
445 | Args:
446 | pretrained (bool): If True, returns a model pre-trained on ImageNet
447 | """
448 | model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
449 | if pretrained:
450 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'], model_dir='.'), strict=False)
451 | return model
452 |
453 |
454 | def resnet152(num_classes, pretrained=False, **kwargs):
455 | """Constructs a ResNet-152 model.
456 | Args:
457 | pretrained (bool): If True, returns a model pre-trained on ImageNet
458 | """
459 | model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs)
460 | if pretrained:
461 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False)
462 | return model
--------------------------------------------------------------------------------
/network.torch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/network.torch
--------------------------------------------------------------------------------
/out/stage_5_68_full_model_epoch_121.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ElvishElvis/68-Retinaface-Pytorch-version/18471d90c24753324c84aa415adef605f3866031/out/stage_5_68_full_model_epoch_121.pt
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit_image==0.15.0
2 | numpy==1.16.4
3 | terminaltables==3.1.0
4 | torch==1.1.0
5 | tqdm==4.32.1
6 | opencv_python==4.1.0.25
7 | torchvision==0.3.0
8 | Pillow==6.2.0
9 | skimage==0.0
10 | tensorboardX==1.8
11 |
--------------------------------------------------------------------------------
/test_argu.py:
--------------------------------------------------------------------------------
1 | # from dataloader import TrainDataset, collater, Resizer, PadToSquare,Color,Rotate,RandomErasing,RandomFlip,ValDataset
2 | # import torchvision.transforms as transforms
3 | # import cv2
4 | # import copy
5 | # import torch.nn.functional as F
6 | # import torch
7 | # from PIL import Image
8 | # import numpy as np
9 | # import os
10 | # import skimage
11 |
12 |
13 |
14 | # dataset_train = ValDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(640),PadToSquare()]))
15 | # list__=dataset_train[99]
16 | # img=np.array(list__['img'])
17 | # print(img.shape)
18 | # # img = skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/\
19 | # # CelebA/Img/img_celeba.7z/img_celeba/101299.jpg")
20 |
21 | # box=np.array(list__['annot'])[0]
22 |
23 |
24 | # img=cv2.circle(img,(int(box[0]),int(box[1])),radius=1,color=(0,255,0),thickness=10)
25 | # img=cv2.circle(img,(int(box[2]),int(box[3])),radius=1,color=(255,0,0),thickness=10)
26 | # img=cv2.rectangle(img,(int(box[0]),int(box[1])),(int(box[2]),int(box[3])),(0,0,255),thickness=2)
27 |
28 | # for i in range(4,140,2):
29 | # try:
30 | # if(i>=100):
31 | # img=cv2.circle(img,(int(box[i]),int(box[i+1])),radius=1,color=(255,255,255),thickness=2)
32 | # else:
33 | # img=cv2.circle(img,(int(box[i]),int(box[i+1])),radius=1,color=(0,0,255),thickness=2)
34 | # # img=cv2.circle(img,(int(box[i+2]),int(box[i+3])),radius=1,color=(255,0,0),thickness=2)
35 | # # img=cv2.circle(img,(int(box[i+4]),int(box[i+5])),radius=1,color=(0,255,0),thickness=2)
36 | # # img=cv2.circle(img,(int(box[i+6]),int(box[i+7])),radius=1,color=(255,255,0),thickness=2)
37 | # except:
38 | # break
39 | # # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
40 | # cv2.imwrite('sdfas33df.jpg',img)
41 |
42 |
43 | # import torch
44 | # from torch import nn
45 |
46 |
47 | # # torch.log and math.log is e based
48 | # class AdaptiveWingLoss(nn.Module):
49 | # def __init__(self, omega=14, theta=0.5, epsilon=1, alpha=2.1):
50 | # super(AdaptiveWingLoss, self).__init__()
51 | # self.omega = omega
52 | # self.theta = theta
53 | # self.epsilon = epsilon
54 | # self.alpha = alpha
55 |
56 | # def forward(self, pred, target):
57 | # '''
58 | # :param pred: BxNxHxH
59 | # :param target: BxNxHxH
60 | # :return:
61 | # '''
62 |
63 | # y = target
64 | # y_hat = pred
65 | # delta_y = (y - y_hat).abs()
66 | # delta_y1 = delta_y[delta_y < self.theta]
67 | # delta_y2 = delta_y[delta_y >= self.theta]
68 | # y1 = y[delta_y < self.theta]
69 | # y2 = y[delta_y >= self.theta]
70 | # loss1 = self.omega * torch.log(1 + torch.pow(delta_y1 / self.omega, self.alpha - y1))
71 | # A = self.omega * (1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))) * (self.alpha - y2) * (
72 | # torch.pow(self.theta / self.epsilon, self.alpha - y2 - 1)) * (1 / self.epsilon)
73 | # C = self.theta * A - self.omega * torch.log(1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))
74 | # loss2 = A * delta_y2 - C
75 | # return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))
76 |
77 | # if __name__ == "__main__":
78 | # loss_func = AdaptiveWingLoss()
79 | # y = torch.rand(3,136)
80 | # y_hat = torch.rand(3,136)
81 | # print(y_hat)
82 | # y_hat.requires_grad_(True)
83 | # loss = loss_func(y_hat, y)
84 | # loss.backward()
85 | # print(loss)
86 |
87 |
88 | import torch
89 | import math
90 | import torch.nn as nn
91 | class WingLoss(nn.Module):
92 | def __init__(self, omega=1, epsilon=2):
93 | super(WingLoss, self).__init__()
94 | self.omega = omega
95 | self.epsilon = epsilon
96 |
97 | def forward(self, pred, target):
98 | y = target
99 | y_hat = pred
100 | delta_y = (y - y_hat).abs()
101 | print(delta_y.shape)
102 | delta_y1 = delta_y[delta_y < self.omega]
103 | delta_y2 = delta_y[delta_y >= self.omega]
104 | print(delta_y2)
105 | sdf
106 | loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
107 | C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
108 | loss2 = delta_y2 - C
109 | return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))
110 | aaa=WingLoss()
111 | a=torch.rand(1,136)*3
112 | b=torch.rand(1,136)
113 | print(aaa(a,b))
114 |
--------------------------------------------------------------------------------
/torchvision_model.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # import torch.nn as nn
3 | # import torchvision.models.detection.backbone_utils as backbone_utils
4 | # import torchvision.models.resnet as resnet
5 | # import torchvision.models._utils as _utils
6 | # import torch.nn.functional as F
7 | # from collections import OrderedDict
8 | # from anchors import Anchors
9 | # from utils import RegressionTransform
10 | # import losses
11 | # from mobile import mobileV1
12 |
13 | # class ContextModule(nn.Module):
14 | # def __init__(self,in_channels=256):
15 | # super(ContextModule,self).__init__()
16 | # self.det_conv1 = nn.Sequential(
17 | # nn.Conv2d(in_channels,in_channels,kernel_size=3,stride=1,padding=1),
18 | # nn.BatchNorm2d(in_channels)
19 | # )
20 | # self.det_context_conv1 = nn.Sequential(
21 | # nn.Conv2d(in_channels,in_channels//2,kernel_size=3,stride=1,padding=1),
22 | # nn.BatchNorm2d(in_channels//2),
23 | # nn.ReLU(inplace=True)
24 | # )
25 | # self.det_context_conv2 = nn.Sequential(
26 | # nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1),
27 | # nn.BatchNorm2d(in_channels//2)
28 | # )
29 | # self.det_context_conv3_1 = nn.Sequential(
30 | # nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1),
31 | # nn.BatchNorm2d(in_channels//2),
32 | # nn.ReLU(inplace=True)
33 | # )
34 | # self.det_context_conv3_2 = nn.Sequential(
35 | # nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1),
36 | # nn.BatchNorm2d(in_channels//2)
37 | # )
38 | # self.det_concat_relu = nn.ReLU(inplace=True)
39 |
40 | # def forward(self,x):
41 | # x1 = self.det_conv1(x)
42 | # x_ = self.det_context_conv1(x)
43 | # x2 = self.det_context_conv2(x_)
44 | # x3_ = self.det_context_conv3_1(x_)
45 | # x3 = self.det_context_conv3_2(x3_)
46 |
47 | # out = torch.cat((x1,x2,x3),1)
48 | # act_out = self.det_concat_relu(out)
49 |
50 | # return act_out
51 |
52 | # class FeaturePyramidNetwork(nn.Module):
53 | # def __init__(self,in_channels_list,out_channels):
54 | # super(FeaturePyramidNetwork,self).__init__()
55 | # self.lateral_blocks = nn.ModuleList()
56 | # self.context_blocks = nn.ModuleList()
57 | # self.aggr_blocks = nn.ModuleList()
58 | # for i, in_channels in enumerate(in_channels_list):
59 | # if in_channels == 0:
60 | # continue
61 | # lateral_block_module = nn.Sequential(
62 | # nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=1,padding=0),
63 | # nn.BatchNorm2d(out_channels),
64 | # nn.ReLU(inplace=True)
65 | # )
66 | # aggr_block_module = nn.Sequential(
67 | # nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=1,padding=1),
68 | # nn.BatchNorm2d(out_channels),
69 | # nn.ReLU(inplace=True)
70 | # )
71 | # context_block_module = ContextModule(out_channels)
72 | # self.lateral_blocks.append(lateral_block_module)
73 | # self.context_blocks.append(context_block_module)
74 | # if i > 0 :
75 | # self.aggr_blocks.append(aggr_block_module)
76 |
77 | # # initialize params of fpn layers
78 | # for m in self.modules():
79 | # if isinstance(m,nn.Conv2d):
80 | # nn.init.kaiming_uniform_(m.weight, a=1)
81 | # nn.init.constant_(m.bias, 0)
82 |
83 | # def forward(self,x):
84 | # names = list(x.keys())
85 | # x = list(x.values())
86 |
87 | # last_inner = self.lateral_blocks[-1](x[-1])
88 | # results = []
89 | # results.append(self.context_blocks[-1](last_inner))
90 | # for feature, lateral_block, context_block, aggr_block in zip(
91 | # x[:-1][::-1], self.lateral_blocks[:-1][::-1], self.context_blocks[:-1][::-1], self.aggr_blocks[::-1]
92 | # ):
93 | # if not lateral_block:
94 | # continue
95 | # lateral_feature = lateral_block(feature)
96 | # feat_shape = lateral_feature.shape[-2:]
97 | # inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")
98 | # last_inner = lateral_feature + inner_top_down
99 | # last_inner = aggr_block(last_inner)
100 | # results.insert(0, context_block(last_inner))
101 |
102 | # # make it back an OrderedDict
103 | # out = OrderedDict([(k, v) for k, v in zip(names, results)])
104 |
105 | # return out
106 |
107 | # class ClassHead(nn.Module):
108 | # def __init__(self,inchannels=64,num_anchors=3):
109 | # super(ClassHead,self).__init__()
110 | # self.num_anchors = num_anchors
111 | # self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
112 | # self.output_act = nn.LogSoftmax(dim=-1)
113 |
114 | # def forward(self,x):
115 | # out = self.conv1x1(x)
116 | # out = out.permute(0,2,3,1)
117 | # b, h, w, c = out.shape
118 | # out = out.view(b, h, w, self.num_anchors, 2)
119 | # out = self.output_act(out)
120 |
121 | # return out.contiguous().view(out.shape[0], -1, 2)
122 |
123 | # class BboxHead(nn.Module):
124 | # def __init__(self,inchannels=64,num_anchors=3):
125 | # super(BboxHead,self).__init__()
126 | # self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
127 |
128 | # def forward(self,x):
129 | # out = self.conv1x1(x)
130 | # out = out.permute(0,2,3,1)
131 |
132 | # return out.contiguous().view(out.shape[0], -1, 4)
133 |
134 | # class LandmarkHead(nn.Module):
135 | # def __init__(self,inchannels=64,num_anchors=3):
136 | # super(LandmarkHead,self).__init__()
137 | # self.conv1x1 = nn.Conv2d(inchannels,num_anchors*136,kernel_size=(1,1),stride=1,padding=0)
138 |
139 | # def forward(self,x):
140 | # out = self.conv1x1(x)
141 | # out = out.permute(0,2,3,1)
142 |
143 | # return out.contiguous().view(out.shape[0], -1, 136)
144 |
145 | # class RetinaFace(nn.Module):
146 | # def __init__(self,backbone,return_layers,anchor_nums=3):
147 | # super(RetinaFace,self).__init__()
148 | # # if backbone_name == 'resnet50':
149 | # # self.backbone = resnet.resnet50(pretrained)
150 | # # self.backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
151 | # # self.return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
152 | # assert backbone,'Backbone can not be none!'
153 | # assert len(return_layers)>0,'There must be at least one return layers'
154 | # self.body = mobileV1()
155 | # in_channels_stage2 = 32
156 | # # in_channels_stage2 = 64
157 | # in_channels_list = [
158 | # #in_channels_stage2,
159 | # in_channels_stage2 * 2,
160 | # in_channels_stage2 * 4,
161 | # in_channels_stage2 * 8,
162 | # ]
163 | # out_channels = 32
164 | # self.fpn = FeaturePyramidNetwork(in_channels_list,out_channels)
165 | # # self.ClassHead = ClassHead()
166 | # # self.BboxHead = BboxHead()
167 | # # self.LandmarkHead = LandmarkHead()
168 | # self.ClassHead = self._make_class_head()
169 | # self.BboxHead = self._make_bbox_head()
170 | # self.LandmarkHead = self._make_landmark_head()
171 | # self.anchors = Anchors()
172 | # self.regressBoxes = RegressionTransform()
173 | # self.losslayer = losses.LossLayer()
174 |
175 | # def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=3):
176 | # classhead = nn.ModuleList()
177 | # for i in range(fpn_num):
178 | # classhead.append(ClassHead(inchannels,anchor_num))
179 | # return classhead
180 |
181 | # def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=3):
182 | # bboxhead = nn.ModuleList()
183 | # for i in range(fpn_num):
184 | # bboxhead.append(BboxHead(inchannels,anchor_num))
185 | # return bboxhead
186 |
187 | # def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=3):
188 | # landmarkhead = nn.ModuleList()
189 | # for i in range(fpn_num):
190 | # landmarkhead.append(LandmarkHead(inchannels,anchor_num))
191 | # return landmarkhead
192 |
193 | # def freeze_bn(self):
194 | # '''Freeze BatchNorm layers.'''
195 | # for layer in self.modules():
196 | # if isinstance(layer, nn.BatchNorm2d):
197 | # layer.eval()
198 |
199 | # def forward(self,inputs):
200 | # if self.training:
201 | # img_batch, annotations = inputs
202 | # else:
203 | # img_batch = inputs
204 |
205 | # out = self.body(img_batch)
206 | # features = self.fpn(out)
207 |
208 | # # bbox_regressions = torch.cat([self.BboxHead(feature) for feature in features.values()], dim=1)
209 | # # ldm_regressions = torch.cat([self.LandmarkHead(feature) for feature in features.values()], dim=1)
210 | # # classifications = torch.cat([self.ClassHead(feature) for feature in features.values()],dim=1)
211 | # bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features.values())], dim=1)
212 | # # ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features.values())], dim=1)
213 | # classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features.values())],dim=1)
214 |
215 | # anchors = self.anchors(img_batch)
216 |
217 | # if self.training:
218 | # return self.losslayer(classifications, bbox_regressions, anchors, annotations)
219 | # else:
220 | # bboxes = self.regressBoxes(anchors, bbox_regressions, img_batch)
221 |
222 | # return classifications, bboxes
223 |
224 |
225 | # def create_retinaface(return_layers,backbone_name='resnet50',anchors_num=3,pretrained=True):
226 | # # backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
227 | # backbone=1
228 | # # freeze layer1
229 | # # for name, parameter in backbone.named_parameters():
230 | # # # if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
231 | # # # parameter.requires_grad_(False)
232 | # # if name == 'conv1.weight':
233 | # # # print('freeze first conv layer...')
234 | # # parameter.requires_grad_(False)
235 |
236 | # model = RetinaFace(backbone,return_layers,anchor_nums=3)
237 |
238 | # return model
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 | import torch
256 | import torch.nn as nn
257 | import torchvision.models.detection.backbone_utils as backbone_utils
258 | import torchvision.models.resnet as resnet
259 | import torchvision.models._utils as _utils
260 | import torch.nn.functional as F
261 | from collections import OrderedDict
262 | from anchors import Anchors
263 | from utils import RegressionTransform
264 | import losses
265 | from mobile import mobileV1
266 |
267 | class ContextModule(nn.Module):
268 | def __init__(self,in_channels=256):
269 | super(ContextModule,self).__init__()
270 | self.det_conv1 = nn.Sequential(
271 | nn.Conv2d(in_channels,in_channels,kernel_size=3,stride=1,padding=1),
272 | nn.BatchNorm2d(in_channels)
273 | )
274 | self.det_context_conv1 = nn.Sequential(
275 | nn.Conv2d(in_channels,in_channels//2,kernel_size=3,stride=1,padding=1),
276 | nn.BatchNorm2d(in_channels//2),
277 | nn.ReLU(inplace=True)
278 | )
279 | self.det_context_conv2 = nn.Sequential(
280 | nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1),
281 | nn.BatchNorm2d(in_channels//2)
282 | )
283 | self.det_context_conv3_1 = nn.Sequential(
284 | nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1),
285 | nn.BatchNorm2d(in_channels//2),
286 | nn.ReLU(inplace=True)
287 | )
288 | self.det_context_conv3_2 = nn.Sequential(
289 | nn.Conv2d(in_channels//2,in_channels//2,kernel_size=3,stride=1,padding=1),
290 | nn.BatchNorm2d(in_channels//2)
291 | )
292 | self.det_concat_relu = nn.ReLU(inplace=True)
293 |
294 | def forward(self,x):
295 | x1 = self.det_conv1(x)
296 | x_ = self.det_context_conv1(x)
297 | x2 = self.det_context_conv2(x_)
298 | x3_ = self.det_context_conv3_1(x_)
299 | x3 = self.det_context_conv3_2(x3_)
300 |
301 | out = torch.cat((x1,x2,x3),1)
302 | act_out = self.det_concat_relu(out)
303 |
304 | return act_out
305 |
306 | class FeaturePyramidNetwork(nn.Module):
307 | def __init__(self,in_channels_list,out_channels):
308 | super(FeaturePyramidNetwork,self).__init__()
309 | self.lateral_blocks = nn.ModuleList()
310 | self.context_blocks = nn.ModuleList()
311 | self.aggr_blocks = nn.ModuleList()
312 | for i, in_channels in enumerate(in_channels_list):
313 | if in_channels == 0:
314 | continue
315 | lateral_block_module = nn.Sequential(
316 | nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=1,padding=0),
317 | nn.BatchNorm2d(out_channels),
318 | nn.ReLU(inplace=True)
319 | )
320 | aggr_block_module = nn.Sequential(
321 | nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=1,padding=1),
322 | nn.BatchNorm2d(out_channels),
323 | nn.ReLU(inplace=True)
324 | )
325 | context_block_module = ContextModule(out_channels)
326 | self.lateral_blocks.append(lateral_block_module)
327 | self.context_blocks.append(context_block_module)
328 | if i > 0 :
329 | self.aggr_blocks.append(aggr_block_module)
330 |
331 | # initialize params of fpn layers
332 | for m in self.modules():
333 | if isinstance(m,nn.Conv2d):
334 | nn.init.kaiming_uniform_(m.weight, a=1)
335 | nn.init.constant_(m.bias, 0)
336 |
337 | def forward(self,x):
338 | names = list(x.keys())
339 | x = list(x.values())
340 |
341 | last_inner = self.lateral_blocks[-1](x[-1])
342 | results = []
343 | results.append(self.context_blocks[-1](last_inner))
344 | for feature, lateral_block, context_block, aggr_block in zip(
345 | x[:-1][::-1], self.lateral_blocks[:-1][::-1], self.context_blocks[:-1][::-1], self.aggr_blocks[::-1]
346 | ):
347 | if not lateral_block:
348 | continue
349 | lateral_feature = lateral_block(feature)
350 | feat_shape = lateral_feature.shape[-2:]
351 | inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")
352 | last_inner = lateral_feature + inner_top_down
353 | last_inner = aggr_block(last_inner)
354 | results.insert(0, context_block(last_inner))
355 |
356 | # make it back an OrderedDict
357 | out = OrderedDict([(k, v) for k, v in zip(names, results)])
358 |
359 | return out
360 |
361 | class ClassHead(nn.Module):
362 | def __init__(self,inchannels=64,num_anchors=3):
363 | super(ClassHead,self).__init__()
364 | self.num_anchors = num_anchors
365 | self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
366 | self.output_act = nn.LogSoftmax(dim=-1)
367 |
368 | def forward(self,x):
369 | out = self.conv1x1(x)
370 | out = out.permute(0,2,3,1)
371 | b, h, w, c = out.shape
372 | out = out.view(b, h, w, self.num_anchors, 2)
373 | out = self.output_act(out)
374 |
375 | return out.contiguous().view(out.shape[0], -1, 2)
376 |
377 | class BboxHead(nn.Module):
378 | def __init__(self,inchannels=64,num_anchors=3):
379 | super(BboxHead,self).__init__()
380 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
381 |
382 | def forward(self,x):
383 | out = self.conv1x1(x)
384 | out = out.permute(0,2,3,1)
385 |
386 | return out.contiguous().view(out.shape[0], -1, 4)
387 |
388 | class LandmarkHead(nn.Module):
389 | def __init__(self,inchannels=64,num_anchors=3):
390 | super(LandmarkHead,self).__init__()
391 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*136,kernel_size=(1,1),stride=1,padding=0)
392 |
393 | def forward(self,x):
394 | out = self.conv1x1(x)
395 | out = out.permute(0,2,3,1)
396 |
397 | return out.contiguous().view(out.shape[0], -1, 136)
398 |
399 | class RetinaFace(nn.Module):
400 | def __init__(self,backbone,return_layers,anchor_nums=3):
401 | super(RetinaFace,self).__init__()
402 | # if backbone_name == 'resnet50':
403 | # self.backbone = resnet.resnet50(pretrained)
404 | # self.backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
405 | # self.return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
406 | assert backbone,'Backbone can not be none!'
407 | assert len(return_layers)>0,'There must be at least one return layers'
408 | self.body = mobileV1()
409 | in_channels_stage2 = 32
410 | # in_channels_stage2 = 64
411 | in_channels_list = [
412 | #in_channels_stage2,
413 | in_channels_stage2 * 2,
414 | in_channels_stage2 * 4,
415 | in_channels_stage2 * 8,
416 | ]
417 | out_channels = 32
418 | self.fpn = FeaturePyramidNetwork(in_channels_list,out_channels)
419 | # self.ClassHead = ClassHead()
420 | # self.BboxHead = BboxHead()
421 | # self.LandmarkHead = LandmarkHead()
422 | self.ClassHead = self._make_class_head()
423 | self.BboxHead = self._make_bbox_head()
424 | self.LandmarkHead = self._make_landmark_head()
425 | self.anchors = Anchors()
426 | self.regressBoxes = RegressionTransform()
427 | self.losslayer = losses.LossLayer()
428 |
429 | def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=3):
430 | classhead = nn.ModuleList()
431 | for i in range(fpn_num):
432 | classhead.append(ClassHead(inchannels,anchor_num))
433 | return classhead
434 |
435 | def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=3):
436 | bboxhead = nn.ModuleList()
437 | for i in range(fpn_num):
438 | bboxhead.append(BboxHead(inchannels,anchor_num))
439 | return bboxhead
440 |
441 | def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=3):
442 | landmarkhead = nn.ModuleList()
443 | for i in range(fpn_num):
444 | landmarkhead.append(LandmarkHead(inchannels,anchor_num))
445 | return landmarkhead
446 |
447 | def freeze_bn(self):
448 | for layer in self.modules():
449 | if isinstance(layer, nn.BatchNorm2d):
450 | layer.eval()
451 |
452 | def forward(self,inputs):
453 | if self.training:
454 | img_batch, annotations = inputs
455 | else:
456 | img_batch = inputs
457 |
458 | out = self.body(img_batch)
459 | features = self.fpn(out)
460 |
461 | # bbox_regressions = torch.cat([self.BboxHead(feature) for feature in features.values()], dim=1)
462 | # ldm_regressions = torch.cat([self.LandmarkHead(feature) for feature in features.values()], dim=1)
463 | # classifications = torch.cat([self.ClassHead(feature) for feature in features.values()],dim=1)
464 | bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features.values())], dim=1)
465 | ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features.values())], dim=1)
466 | classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features.values())],dim=1)
467 |
468 | anchors = self.anchors(img_batch)
469 |
470 | if self.training:
471 | return self.losslayer(classifications, bbox_regressions,ldm_regressions, anchors, annotations)
472 | else:
473 | bboxes, landmarks = self.regressBoxes(anchors, bbox_regressions, ldm_regressions, img_batch)
474 |
475 | return classifications, bboxes, landmarks
476 |
477 |
478 | def create_retinaface(return_layers,backbone_name='resnet50',anchors_num=3,pretrained=True):
479 | # backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
480 | backbone=1
481 | # freeze layer1
482 | # for name, parameter in backbone.named_parameters():
483 | # # if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
484 | # # parameter.requires_grad_(False)
485 | # if name == 'conv1.weight':
486 | # # print('freeze first conv layer...')
487 | # parameter.requires_grad_(False)
488 |
489 | model = RetinaFace(backbone,return_layers,anchor_nums=3)
490 |
491 | return model
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | import torch.optim as optim
6 | from torchvision import datasets, models, transforms
7 | from dataloader import TrainDataset, collater, Resizer, PadToSquare,Color,Rotate,RandomErasing,RandomFlip, ValDataset
8 | from torch.utils.data import Dataset, DataLoader, random_split
9 | from terminaltables import AsciiTable, DoubleTable, SingleTable
10 | # from tensorboardX import SummaryWriter
11 | from torch.optim import lr_scheduler
12 | import torch.distributed as dist
13 | import eval_widerface
14 | import torchvision
15 | import model
16 | import os
17 | from torch.utils.data.distributed import DistributedSampler
18 | import torchvision_model
19 |
20 |
21 | def get_args():
22 | parser = argparse.ArgumentParser(description="Train program for retinaface.")
23 | parser.add_argument('--data_path', type=str,default='./widerface' ,help='Path for dataset,default WIDERFACE')
24 | parser.add_argument('--batch', type=int, default=32, help='Batch size')
25 | parser.add_argument('--epochs', type=int, default=121, help='Max training epochs')
26 | parser.add_argument('--shuffle', type=bool, default=True, help='Shuffle dataset or not')
27 | parser.add_argument('--img_size', type=int, default=640, help='Input image size')
28 | parser.add_argument('--verbose', type=int, default=20, help='Log verbose')
29 | parser.add_argument('--save_step', type=int, default=10, help='Save every save_step epochs')
30 | parser.add_argument('--eval_step', type=int, default=10, help='Evaluate every eval_step epochs')
31 | parser.add_argument('--save_path', type=str, default='./out', help='Model save path')
32 | parser.add_argument('--training', help='the training mode or not ( True for Training, False for eval', type=bool, default=True)
33 | args = parser.parse_args()
34 | print(args)
35 |
36 | return args
37 |
38 |
39 | def main():
40 | args = get_args()
41 | if not os.path.exists(args.save_path):
42 | os.mkdir(args.save_path)
43 | log_path = os.path.join(args.save_path,'log')
44 | if not os.path.exists(log_path):
45 | os.mkdir(log_path)
46 |
47 |
48 | data_path = args.data_path
49 | # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),()]))
50 | dataset_train = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([RandomErasing(),RandomFlip(),Rotate(),Color(),Resizer(),PadToSquare()]))
51 | # dataset_train = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(),PadToSquare()]))
52 | dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=args.batch, collate_fn=collater,shuffle=True)
53 | # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()]))
54 | dataset_val = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(640),PadToSquare()]))
55 | dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater)
56 |
57 | total_batch = len(dataloader_train)
58 |
59 |
60 |
61 | # Create torchvision model
62 | return_layers = {'layer2':1,'layer3':2,'layer4':3}
63 | retinaface = torchvision_model.create_retinaface(return_layers)
64 | retinaface_ = retinaface.cuda()
65 | retinaface = torch.nn.DataParallel(retinaface_).cuda()
66 | retinaface.training = True
67 | base_lr=1e-7
68 |
69 | # pre_train = torch.load('network.torch')
70 | # cur=retinaface.state_dict()
71 | # for k, v in cur.items():
72 | # if k[12:] in pre_train:
73 | # print(k[12:])
74 | # cur[k]=pre_train[k[12:]]
75 | # retinaface.load_state_dict(cur)
76 | retinaface.load_state_dict(torch.load("/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_121.pt"))
77 | lr=base_lr
78 | # optimizer=torch.optim.Adam(retinaface.parameters(),lr=lr)
79 | # fix encoder
80 | for name, value in retinaface.named_parameters():
81 | if 'Landmark' in name:
82 | value.requires_grad = False
83 | lr_cos = lambda n: 0.5 * (1 + np.cos((n) / (args.epochs) * np.pi)) * base_lr
84 | params = filter(lambda p: p.requires_grad==True, retinaface.parameters())
85 | body=filter(lambda p: p.requires_grad==False, retinaface.parameters())
86 | optimizer = torch.optim.Adam([
87 | {'params': body, 'lr': lr*3},
88 | {'params': params, 'lr': lr}]
89 | )
90 | #evaluation the current model
91 | if (args.training==False):
92 | print("not pretrain")
93 | recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface)
94 | print('Recall:',recall)
95 | print('Precision:',precision)
96 | print("landmark: ",str(landmakr))
97 | print("miss: "+ str(miss))
98 | return
99 | ##
100 | print('Start to train.')
101 |
102 | epoch_loss = []
103 | iteration = 0
104 | retinaface=retinaface.cuda()
105 | for epoch in range(args.epochs):
106 | lr=lr_cos(epoch)
107 |
108 | retinaface.train()
109 |
110 | # Training
111 | for iter_num,data in enumerate(dataloader_train):
112 | optimizer.zero_grad()
113 | classification_loss, bbox_regression_loss,ldm_regression_loss = retinaface([data['img'].cuda().float(), data['annot']])
114 | classification_loss = classification_loss.mean()
115 | bbox_regression_loss = bbox_regression_loss.mean()
116 | ldm_regression_loss = ldm_regression_loss.mean()
117 |
118 | # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss
119 | loss = classification_loss + 0.15*bbox_regression_loss + 0.25*ldm_regression_loss
120 |
121 | loss.backward()
122 | optimizer.step()
123 |
124 | if iter_num % args.verbose == 0:
125 | log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, args.epochs, iter_num, total_batch)
126 | table_data = [
127 | ['loss name','value'],
128 | ['total_loss',str(loss.item())],
129 | ['classification',str(classification_loss.item())],
130 | ['bbox',str(bbox_regression_loss.item())],
131 | ['landmarks',str(ldm_regression_loss.item())]
132 | ]
133 | table = AsciiTable(table_data)
134 | log_str +=table.table
135 | print(log_str)
136 | iteration +=1
137 |
138 |
139 | # Eval
140 | if epoch % args.eval_step == 0:
141 | with open("aaa.txt", 'a') as f:
142 | f.write('-------- RetinaFace Pytorch --------'+'\n')
143 | f.write ('Evaluating epoch {}'.format(epoch)+'\n')
144 | f.write('total_loss:'+str(loss.item())+'\n')
145 | f.write('classification'+str(classification_loss.item())+'\n')
146 | f.write('bbox'+str(bbox_regression_loss.item())+'\n')
147 | f.write('landmarks'+str(ldm_regression_loss.item())+'\n')
148 |
149 | f.close()
150 | print('-------- RetinaFace Pytorch --------')
151 | print ('Evaluating epoch {}'.format(epoch))
152 | recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface)
153 | print('Recall:',recall)
154 | print('Precision:',precision)
155 | print("landmark: ",str(landmakr))
156 | print("miss: "+ str(miss))
157 |
158 | with open("aaa.txt", 'a') as f:
159 | f.write('-------- RetinaFace Pytorch --------(not pretrain)'+'\n')
160 | f.write ('Evaluating epoch {}'.format(epoch)+'\n')
161 | f.write('Recall:'+str(recall)+'\n')
162 | f.write('Precision:'+str(precision)+'\n')
163 | f.write("landmark: "+str(landmakr)+'\n')
164 | f.write("miss: "+ str(miss)+'\n')
165 | f.close()
166 | # Save model
167 | if (epoch) % args.save_step == 0:
168 | torch.save(retinaface.state_dict(), args.save_path + '/stage_5_68_full_model_epoch_{}.pt'.format(epoch + 1))
169 |
170 | # writer.close()
171 |
172 |
173 | if __name__=='__main__':
174 | main()
175 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # import torch.nn as nn
3 | # import numpy as np
4 |
5 | # def conv3x3(in_planes, out_planes, stride=1):
6 | # """3x3 convolution with padding"""
7 | # return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
8 | # padding=1, bias=False)
9 |
10 | # class BasicBlock(nn.Module):
11 | # expansion = 1
12 |
13 | # def __init__(self, inplanes, planes, stride=1, downsample=None):
14 | # super(BasicBlock, self).__init__()
15 | # self.conv1 = conv3x3(inplanes, planes, stride)
16 | # self.bn1 = nn.BatchNorm2d(planes)
17 | # self.relu = nn.ReLU(inplace=True)
18 | # self.conv2 = conv3x3(planes, planes)
19 | # self.bn2 = nn.BatchNorm2d(planes)
20 | # self.downsample = downsample
21 | # self.stride = stride
22 |
23 | # def forward(self, x):
24 | # residual = x
25 |
26 | # out = self.conv1(x)
27 | # out = self.bn1(out)
28 | # out = self.relu(out)
29 |
30 | # out = self.conv2(out)
31 | # out = self.bn2(out)
32 |
33 | # if self.downsample is not None:
34 | # residual = self.downsample(x)
35 |
36 | # out += residual
37 | # out = self.relu(out)
38 |
39 | # return out
40 |
41 |
42 | # class Bottleneck(nn.Module):
43 | # expansion = 4
44 |
45 | # def __init__(self, inplanes, planes, stride=1, downsample=None):
46 | # super(Bottleneck, self).__init__()
47 | # self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
48 | # self.bn1 = nn.BatchNorm2d(planes)
49 | # self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
50 | # padding=1, bias=False)
51 | # self.bn2 = nn.BatchNorm2d(planes)
52 | # self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
53 | # self.bn3 = nn.BatchNorm2d(planes * 4)
54 | # self.relu = nn.ReLU(inplace=True)
55 | # self.downsample = downsample
56 | # self.stride = stride
57 |
58 | # def forward(self, x):
59 | # residual = x
60 |
61 | # out = self.conv1(x)
62 | # out = self.bn1(out)
63 | # out = self.relu(out)
64 |
65 | # out = self.conv2(out)
66 | # out = self.bn2(out)
67 | # out = self.relu(out)
68 |
69 | # out = self.conv3(out)
70 | # out = self.bn3(out)
71 |
72 | # if self.downsample is not None:
73 | # residual = self.downsample(x)
74 |
75 | # out += residual
76 | # out = self.relu(out)
77 |
78 | # return out
79 |
80 | # class RegressionTransform(nn.Module):
81 | # def __init__(self,mean=None,std_box=None,std_ldm=None):
82 | # super(RegressionTransform, self).__init__()
83 | # if mean is None:
84 | # #self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
85 | # self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
86 | # else:
87 | # self.mean = mean
88 | # if std_box is None:
89 | # #self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
90 | # self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
91 | # else:
92 | # self.std_box = std_box
93 | # if std_ldm is None:
94 | # #self.std_ldm = (torch.ones(1,10) * 0.1).cuda()
95 | # self.std_ldm = (torch.ones(1,136) * 0.1)
96 |
97 | # def forward(self,anchors,bbox_deltas,img):
98 | # widths = anchors[:, :, 2] - anchors[:, :, 0]
99 | # heights = anchors[:, :, 3] - anchors[:, :, 1]
100 | # ctr_x = anchors[:, :, 0] + 0.5 * widths
101 | # ctr_y = anchors[:, :, 1] + 0.5 * heights
102 |
103 | # # Rescale
104 | # # ldm_deltas = ldm_deltas * self.std_ldm.cuda()
105 | # bbox_deltas = bbox_deltas * self.std_box.cuda()
106 |
107 | # bbox_dx = bbox_deltas[:, :, 0]
108 | # bbox_dy = bbox_deltas[:, :, 1]
109 | # bbox_dw = bbox_deltas[:, :, 2]
110 | # bbox_dh = bbox_deltas[:, :, 3]
111 |
112 | # # get predicted boxes
113 | # pred_ctr_x = ctr_x + bbox_dx * widths
114 | # pred_ctr_y = ctr_y + bbox_dy * heights
115 | # pred_w = torch.exp(bbox_dw) * widths
116 | # pred_h = torch.exp(bbox_dh) * heights
117 |
118 | # pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
119 | # pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
120 | # pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
121 | # pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
122 |
123 |
124 | # # pred_landmarks=[]
125 |
126 | # # for i in range(0,136):
127 | # # if i %2==0:
128 | # # candidate=ctr_x + ldm_deltas[:,:,i] * widths
129 | # # else:
130 | # # candidate=ctr_y + ldm_deltas[:,:,i] * heights
131 | # # pred_landmarks.append(candidate)
132 |
133 | # # # pred_landmarks=torch.stack((pred_landmarks),dim=2)
134 | # pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
135 |
136 |
137 | # # clip bboxes and landmarks
138 | # B,C,H,W = img.shape
139 |
140 | # pred_boxes[:,:,::2] = torch.clamp(pred_boxes[:,:,::2], min=0, max=W)
141 | # pred_boxes[:,:,1::2] = torch.clamp(pred_boxes[:,:,1::2], min=0, max=H)
142 | # # # pred_landmarks[:,:,::2] = torch.clamp(pred_landmarks[:,:,::2], min=0, max=W)
143 | # # # pred_landmarks[:,:,1::2] = torch.clamp(pred_landmarks[:,:,1::2], min=0, max=H)
144 |
145 | # # return pred_boxes, pred_landmarks
146 | # return pred_boxes
147 |
148 |
149 | # def nms(boxes,scores,iou_threshold):
150 | # boxes = boxes.cpu().numpy()
151 | # score = scores.cpu().numpy()
152 |
153 | # # coordinates of bounding boxes
154 | # start_x = boxes[:, 0]
155 | # start_y = boxes[:, 1]
156 | # end_x = boxes[:, 2]
157 | # end_y = boxes[:, 3]
158 |
159 | # # Picked bounding boxes
160 | # picked_boxes = []
161 | # picked_score = []
162 |
163 | # # Compute areas of bounding boxes
164 | # areas = (end_x - start_x + 1) * (end_y - start_y + 1)
165 |
166 | # # Sort by confidence score of bounding boxes
167 | # order = np.argsort(score)
168 |
169 | # # Iterate bounding boxes
170 | # while order.size > 0:
171 | # # The index of largest confidence score
172 | # index = order[-1]
173 |
174 | # # Pick the bounding box with largest confidence score
175 | # picked_boxes.append(boxes[index])
176 | # picked_score.append(score[index])
177 | # a=start_x[index]
178 | # b=order[:-1]
179 | # c=start_x[order[:-1]]
180 | # # Compute ordinates of intersection-over-union(IOU)
181 | # x1 = np.maximum(start_x[index], start_x[order[:-1]])
182 | # x2 = np.minimum(end_x[index], end_x[order[:-1]])
183 | # y1 = np.maximum(start_y[index], start_y[order[:-1]])
184 | # y2 = np.minimum(end_y[index], end_y[order[:-1]])
185 |
186 | # # Compute areas of intersection-over-union
187 | # w = np.maximum(0.0, x2 - x1 + 1)
188 | # h = np.maximum(0.0, y2 - y1 + 1)
189 | # intersection = w * h
190 |
191 | # # Compute the ratio between intersection and union
192 | # ratio = intersection / (areas[index] + areas[order[:-1]] - intersection)
193 |
194 | # left = np.where(ratio < iou_threshold)
195 | # order = order[left]
196 |
197 | # picked_boxes = torch.Tensor(picked_boxes)
198 | # picked_score = torch.Tensor(picked_score)
199 | # return picked_boxes, picked_score
200 |
201 |
202 |
203 | import torch
204 | import torch.nn as nn
205 | import numpy as np
206 |
207 | def conv3x3(in_planes, out_planes, stride=1):
208 | """3x3 convolution with padding"""
209 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
210 | padding=1, bias=False)
211 |
212 | class BasicBlock(nn.Module):
213 | expansion = 1
214 |
215 | def __init__(self, inplanes, planes, stride=1, downsample=None):
216 | super(BasicBlock, self).__init__()
217 | self.conv1 = conv3x3(inplanes, planes, stride)
218 | self.bn1 = nn.BatchNorm2d(planes)
219 | self.relu = nn.ReLU(inplace=True)
220 | self.conv2 = conv3x3(planes, planes)
221 | self.bn2 = nn.BatchNorm2d(planes)
222 | self.downsample = downsample
223 | self.stride = stride
224 |
225 | def forward(self, x):
226 | residual = x
227 |
228 | out = self.conv1(x)
229 | out = self.bn1(out)
230 | out = self.relu(out)
231 |
232 | out = self.conv2(out)
233 | out = self.bn2(out)
234 |
235 | if self.downsample is not None:
236 | residual = self.downsample(x)
237 |
238 | out += residual
239 | out = self.relu(out)
240 |
241 | return out
242 |
243 |
244 | class Bottleneck(nn.Module):
245 | expansion = 4
246 |
247 | def __init__(self, inplanes, planes, stride=1, downsample=None):
248 | super(Bottleneck, self).__init__()
249 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
250 | self.bn1 = nn.BatchNorm2d(planes)
251 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
252 | padding=1, bias=False)
253 | self.bn2 = nn.BatchNorm2d(planes)
254 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
255 | self.bn3 = nn.BatchNorm2d(planes * 4)
256 | self.relu = nn.ReLU(inplace=True)
257 | self.downsample = downsample
258 | self.stride = stride
259 |
260 | def forward(self, x):
261 | residual = x
262 |
263 | out = self.conv1(x)
264 | out = self.bn1(out)
265 | out = self.relu(out)
266 |
267 | out = self.conv2(out)
268 | out = self.bn2(out)
269 | out = self.relu(out)
270 |
271 | out = self.conv3(out)
272 | out = self.bn3(out)
273 |
274 | if self.downsample is not None:
275 | residual = self.downsample(x)
276 |
277 | out += residual
278 | out = self.relu(out)
279 |
280 | return out
281 |
282 | class RegressionTransform(nn.Module):
283 | def __init__(self,mean=None,std_box=None,std_ldm=None):
284 | super(RegressionTransform, self).__init__()
285 | if mean is None:
286 | #self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
287 | self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
288 | else:
289 | self.mean = mean
290 | if std_box is None:
291 | #self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
292 | self.std_box = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
293 | else:
294 | self.std_box = std_box
295 | if std_ldm is None:
296 | #self.std_ldm = (torch.ones(1,10) * 0.1).cuda()
297 | self.std_ldm = (torch.ones(1,136) * 0.1)
298 |
299 | def forward(self,anchors,bbox_deltas,ldm_deltas,img):
300 | widths = anchors[:, :, 2] - anchors[:, :, 0]
301 | heights = anchors[:, :, 3] - anchors[:, :, 1]
302 | ctr_x = anchors[:, :, 0] + 0.5 * widths
303 | ctr_y = anchors[:, :, 1] + 0.5 * heights
304 |
305 | # Rescale
306 | ldm_deltas = ldm_deltas * self.std_ldm.cuda()
307 | bbox_deltas = bbox_deltas * self.std_box.cuda()
308 |
309 | bbox_dx = bbox_deltas[:, :, 0]
310 | bbox_dy = bbox_deltas[:, :, 1]
311 | bbox_dw = bbox_deltas[:, :, 2]
312 | bbox_dh = bbox_deltas[:, :, 3]
313 |
314 | # get predicted boxes
315 | pred_ctr_x = ctr_x + bbox_dx * widths
316 | pred_ctr_y = ctr_y + bbox_dy * heights
317 | pred_w = torch.exp(bbox_dw) * widths
318 | pred_h = torch.exp(bbox_dh) * heights
319 |
320 | pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
321 | pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
322 | pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
323 | pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
324 |
325 |
326 | pred_landmarks=[]
327 |
328 | for i in range(0,136):
329 | if i %2==0:
330 | candidate=ctr_x + ldm_deltas[:,:,i] * widths
331 | else:
332 | candidate=ctr_y + ldm_deltas[:,:,i] * heights
333 | pred_landmarks.append(candidate)
334 |
335 | pred_landmarks=torch.stack((pred_landmarks),dim=2)
336 | pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
337 |
338 |
339 | # clip bboxes and landmarks
340 | B,C,H,W = img.shape
341 |
342 | pred_boxes[:,:,::2] = torch.clamp(pred_boxes[:,:,::2], min=0, max=W)
343 | pred_boxes[:,:,1::2] = torch.clamp(pred_boxes[:,:,1::2], min=0, max=H)
344 | pred_landmarks[:,:,::2] = torch.clamp(pred_landmarks[:,:,::2], min=0, max=W)
345 | pred_landmarks[:,:,1::2] = torch.clamp(pred_landmarks[:,:,1::2], min=0, max=H)
346 |
347 | return pred_boxes, pred_landmarks
348 |
349 |
350 | def nms(boxes,scores,iou_threshold):
351 | boxes = boxes.cpu().numpy()
352 | score = scores.cpu().numpy()
353 |
354 | # coordinates of bounding boxes
355 | start_x = boxes[:, 0]
356 | start_y = boxes[:, 1]
357 | end_x = boxes[:, 2]
358 | end_y = boxes[:, 3]
359 |
360 | # Picked bounding boxes
361 | picked_boxes = []
362 | picked_score = []
363 |
364 | # Compute areas of bounding boxes
365 | areas = (end_x - start_x + 1) * (end_y - start_y + 1)
366 |
367 | # Sort by confidence score of bounding boxes
368 | order = np.argsort(score)
369 |
370 | # Iterate bounding boxes
371 | while order.size > 0:
372 | # The index of largest confidence score
373 | index = order[-1]
374 |
375 | # Pick the bounding box with largest confidence score
376 | picked_boxes.append(boxes[index])
377 | picked_score.append(score[index])
378 | a=start_x[index]
379 | b=order[:-1]
380 | c=start_x[order[:-1]]
381 | # Compute ordinates of intersection-over-union(IOU)
382 | x1 = np.maximum(start_x[index], start_x[order[:-1]])
383 | x2 = np.minimum(end_x[index], end_x[order[:-1]])
384 | y1 = np.maximum(start_y[index], start_y[order[:-1]])
385 | y2 = np.minimum(end_y[index], end_y[order[:-1]])
386 |
387 | # Compute areas of intersection-over-union
388 | w = np.maximum(0.0, x2 - x1 + 1)
389 | h = np.maximum(0.0, y2 - y1 + 1)
390 | intersection = w * h
391 |
392 | # Compute the ratio between intersection and union
393 | ratio = intersection / (areas[index] + areas[order[:-1]] - intersection)
394 |
395 | left = np.where(ratio < iou_threshold)
396 | order = order[left]
397 |
398 | picked_boxes = torch.Tensor(picked_boxes)
399 | picked_score = torch.Tensor(picked_score)
400 | return picked_boxes, picked_score
401 |
402 |
403 |
--------------------------------------------------------------------------------
/video_detect.py:
--------------------------------------------------------------------------------
1 |
2 | import argparse
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import numpy as np
7 | import skimage
8 | from skimage import io
9 | from PIL import Image
10 | import cv2
11 | import torchvision
12 | import eval_widerface
13 | import torchvision_model
14 | import os
15 | import skimage
16 | from dataloader import ValDataset, Resizer, PadToSquare,ValDataset_CeleB
17 | from torchvision import datasets, models, transforms
18 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
19 | def pad_to_square(img, pad_value):
20 | _, h, w = img.shape
21 | dim_diff = np.abs(h - w)
22 | # (upper / left) padding and (lower / right) padding
23 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
24 | # Determine padding
25 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
26 | # Add padding
27 | img = F.pad(img, pad, "constant", value=pad_value)
28 |
29 | return img, pad
30 |
31 | def resize(image, size):
32 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
33 | return image
34 |
35 | def get_args():
36 | parser = argparse.ArgumentParser(description="Detect program for retinaface.")
37 | parser.add_argument('--image_path', type=str, default='WechatIMG10.jpeg', help='Path for image to detect')
38 | parser.add_argument('--model_path', type=str, help='Path for model',default="/versa/elvishelvis/RetinaYang/out/stage_4_68_full_model_epoch_61.pt")
39 | parser.add_argument('--save_path', type=str, default='./out', help='Path for result image')
40 | parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
41 | args = parser.parse_args()
42 |
43 | return args
44 |
45 | def main(nummmmmm):
46 | args = get_args()
47 |
48 | # Create the model
49 | # if args.depth == 18:
50 | # RetinaFace = model.resnet18(num_classes=2, pretrained=True)
51 | # elif args.depth == 34:
52 | # RetinaFace = model.resnet34(num_classes=2, pretrained=True)
53 | # elif args.depth == 50:
54 | # RetinaFace = model.resnet50(num_classes=2, pretrained=True)
55 | # elif args.depth == 101:
56 | # RetinaFace = model.resnet101(num_classes=2, pretrained=True)
57 | # elif args.depth == 152:
58 | # RetinaFace = model.resnet152(num_classes=2, pretrained=True)
59 | # else:
60 | # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
61 |
62 | # Create torchvision model
63 |
64 | return_layers = {'layer2':1,'layer3':2,'layer4':3}
65 | RetinaFace = torchvision_model.create_retinaface(return_layers)
66 | device= torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
67 |
68 | # Load trained model
69 | retina_dict = RetinaFace.state_dict()
70 | pre_state_dict = torch.load('stage_5_68_full_model_epoch_121.pt',map_location='cpu')
71 | pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict}
72 | RetinaFace.load_state_dict(pretrained_dict)
73 | RetinaFace.to(device)
74 |
75 | import time
76 |
77 | video = cv2.VideoCapture(0)
78 | # Read image
79 | while True:
80 | start=time.time()
81 | ret, img = video.read()
82 | img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
83 | img = torch.from_numpy(img)
84 | img = img.permute(2,0,1)
85 | resized_img=img.float()
86 | # resized_img = resize(img.float(),(360,640))
87 | # print(resized_img.shape)
88 | input_img = resized_img.float().unsqueeze(0)
89 |
90 | picked_boxes, picked_landmarks = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
91 | # print(picked_boxes)
92 | np_img = resized_img.cpu().permute(1,2,0).numpy()
93 | np_img.astype(int)
94 | img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB)
95 |
96 | for j, boxes in enumerate(picked_boxes):
97 | if boxes is not None:
98 | for box,landmark in zip(boxes,picked_landmarks[j]):
99 | cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2)
100 | for i in range(0,136,2):
101 | cv2.circle(img,(landmark[i],landmark[i+1]),radius=1,color=(0,0,255),thickness=2)
102 | cv2.imshow('RetinaFace-Pytorch',img)
103 | print(time.time()-start)
104 | if cv2.waitKey(1) & 0xFF == ord('q'):
105 | break
106 | if __name__=='__main__':
107 | main(20)
108 |
--------------------------------------------------------------------------------