├── ckpt
    └── faceboxes.pt
├── picture
    ├── discROC_unpub.png
    ├── img_416_result.jpg
    └── img_463_result.jpg
├── .gitignore
├── createWiderFace.py
├── readme.md
├── multibox_layer.py
├── notes.md
├── trainvisdom.py
├── multibox_loss.py
├── networks.py
├── predict.py
├── dataset.py
└── encoderl.py


/ckpt/faceboxes.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/ckpt/faceboxes.pt


--------------------------------------------------------------------------------
/picture/discROC_unpub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/picture/discROC_unpub.png


--------------------------------------------------------------------------------
/picture/img_416_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/picture/img_416_result.jpg


--------------------------------------------------------------------------------
/picture/img_463_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/picture/img_463_result.jpg


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /*.jpg
2 | weight/faceboxes.pt
3 | weight/faceboxes200.pt
4 | weight/faceboxes_300.pt
5 | __pycache__/*
6 | *.pyc
7 | *.sh
8 | 
9 | 


--------------------------------------------------------------------------------
/createWiderFace.py:
--------------------------------------------------------------------------------
 1 | path = '/home/lxg/codedata/widerFace/wider_face_split/'
 2 | 
 3 | with open(path+'wider_face_train_bbx_gt.txt') as f:
 4 |     lines = f.readlines()
 5 |     nums = len(lines)
 6 | 
 7 | output = open(path+'box_label.txt', 'w')
 8 | 
 9 | for i in range(nums):
10 |     # if i == 10:
11 |     #     break
12 | 
13 |     line = lines[i]
14 |     if 'jpg' not in line:
15 |         continue
16 |     
17 |     im_name = line.strip()
18 |     face_num = int(lines[i+1].strip())
19 |     im_name = im_name + ' ' + str(face_num) + ' '
20 | 
21 |     for j in range(face_num):
22 |         line = lines[i+2+j] 
23 |         splited = line.strip().split()
24 |         im_name = im_name + splited[0] + ' '
25 |         im_name = im_name + splited[1] + ' '
26 |         im_name = im_name + splited[2] + ' '
27 |         im_name = im_name + splited[3] + ' '
28 |         im_name = im_name + '1' + ' '
29 |         
30 |     # print(i)    
31 |     output.writelines('widerFace/WIDER_train/images/' + im_name+'\n')
32 | 
33 | # create aflw data
34 | # with open('label.txt') as f:
35 | #     lines = f.readlines()
36 | # output = open('label_path.txt', 'w') 
37 | # for line in lines:
38 | #     output.writelines('data/all/' + line)
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Faceboxes
 2 | [faceBoxes: a cpu real-time face detector with hight accuracy](https://arxiv.org/abs/1708.05234)  
 3 | 
 4 | Faceboxes is a SSD style object detector, it is designed for fast face detect, has a lightweight yet powerful network structure.
 5 | 
 6 | ## update
 7 | 
 8 | <!-- I forked from [lxg2015](https://github.com/lxg2015), we did it together before, now i want to update something -->
 9 | 
10 | 1. Better network, our convolution module should be con_bn_relu like, not just conv.
11 | 
12 | 2. Double batch size, it used ~7G memory during train
13 | 
14 | 3. Add use_gpu Flag in predict, add detect_gpu function
15 | 
16 | Performance is better than before!
17 | 
18 | Efficiency is not good as official, 60FPS on 1080ti, much slower on CPU, it maybe slow in decoder.
19 | 
20 | ## usage
21 | visdom  
22 | pytorch 0.2   
23 | torchvision  
24 | 
25 | our data annotation
26 | ```
27 | data/all/image01468.jpg 1 119 185 139 139 1
28 | data/all/image01449.jpg 2 9 39 74 74 1 409 93 77 77 1
29 | ```
30 | format：
31 | ```
32 | path/image_name.jpg num_face x y w h 1 x y w h 1
33 | ```
34 | 
35 | ## Result
36 | ![face1](picture/img_416_result.jpg)
37 | ![face2](picture/img_463_result.jpg)
38 | 
39 | ## Fddb
40 | 和原论文的结果有些差距，主要问题可能是出现在数据增强部分。
41 | 
42 | tips：这里的另一条曲线DDFD(Multi-view Face Detection Using Deep Convolutional Neural Networks)只是拿来做个参考  
43 | 
44 | ![fddb](picture/discROC_unpub.png) 
45 | 


--------------------------------------------------------------------------------
/multibox_layer.py:
--------------------------------------------------------------------------------
 1 | #encoding:utf-8
 2 | import math
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.init as init
 7 | import torch.nn.functional as F
 8 | 
 9 | from torch.autograd import Variable
10 | 
11 | class MultiBoxLayer(nn.Module):
12 | 	num_classes = 2
13 | 	num_anchors = [21,1,1]
14 | 	in_planes = [128,256,256]
15 | 
16 | 	def __init__(self):
17 | 		super(MultiBoxLayer,self).__init__()
18 | 		
19 | 		self.loc_layers = nn.ModuleList()
20 | 		self.conf_layers = nn.ModuleList()
21 | 		for i in range(len(self.in_planes)):
22 | 			self.loc_layers.append(nn.Conv2d(self.in_planes[i],self.num_anchors[i]*4, kernel_size=3,padding=1))
23 | 			self.conf_layers.append(nn.Conv2d(self.in_planes[i],self.num_anchors[i]*2,kernel_size=3,padding=1))
24 | 
25 | 	def forward(self,xs):
26 | 		'''
27 | 		xs:list of 之前的featuremap list
28 | 		retrun: loc_preds: [N,21842,4]
29 | 				conf_preds:[N,24842,2]
30 | 		'''
31 | 		y_locs=[]
32 | 		y_confs = []
33 | 		for i,x in enumerate(xs):
34 | 			y_loc = self.loc_layers[i](x) # N,anhors*4,H,W
35 | 			N = y_loc.size(0)
36 | 			y_loc = y_loc.permute(0,2,3,1).contiguous()
37 | 			y_loc = y_loc.view(N,-1,4)
38 | 			y_locs.append(y_loc)
39 | 
40 | 			y_conf = self.conf_layers[i](x)
41 | 			y_conf = y_conf.permute(0,2,3,1).contiguous()
42 | 			y_conf = y_conf.view(N,-1,2)
43 | 			y_confs.append(y_conf)
44 | 			
45 | 		loc_preds = torch.cat(y_locs,1)
46 | 		conf_preds = torch.cat(y_confs,1)
47 | 		return loc_preds,conf_preds


--------------------------------------------------------------------------------
/notes.md:
--------------------------------------------------------------------------------
 1 | ## aflw
 2 | <!-- 20 epoch -->
 3 | Epoch [20/20], Iter [1055/1057] Loss: 1.8820
 4 | 
 5 | <!-- 40 epoch -->
 6 | loc_loss:0.984644 conf_loss:1.205544, pos_num:78
 7 | loc_loss:0.698231 conf_loss:1.439163, pos_num:163
 8 | loc_loss:0.644837 conf_loss:1.328891, pos_num:143
 9 | loc_loss:0.420935 conf_loss:1.332006, pos_num:149
10 | loc_loss:0.246193 conf_loss:0.883853, pos_num:242
11 | Epoch [20/20], Iter [1055/1057] Loss: 1.1300
12 | 
13 | ## wider face, aflw
14 | <!-- 100epoch -->
15 | loc_loss:1.417856 conf_loss:2.061183, pos_num:152
16 | loc_loss:0.655475 conf_loss:1.698712, pos_num:65
17 | loc_loss:1.235528 conf_loss:2.012040, pos_num:144
18 | loc_loss:0.798853 conf_loss:1.713999, pos_num:109
19 | loc_loss:0.823239 conf_loss:1.953249, pos_num:318
20 | Epoch [50/50], Iter [1700/1701] Loss: 2.7765, average_loss: 2.8849
21 | <!-- 150epoch -->
22 | loc_loss:0.802718 conf_loss:1.943955, pos_num:284
23 | loc_loss:0.867129 conf_loss:1.820582, pos_num:420
24 | loc_loss:0.885825 conf_loss:1.830107, pos_num:358
25 | loc_loss:0.811850 conf_loss:1.881572, pos_num:501
26 | loc_loss:0.975667 conf_loss:1.921641, pos_num:540
27 | Epoch [50/50], Iter [680/681] Loss: 2.8973, average_loss: 2.5820
28 | 
29 | <!--继续训练  -->
30 | 46	2.27359845486
31 | 47	2.27207741518
32 | 48	2.26043195595
33 | 49	2.26634732234
34 | 
35 | <!-- 修改了数据集选择的策略，小于20px的框被舍弃，结果loss一下提高了很多-->
36 | loc_loss:2.556887 conf_loss:2.489368, pos_num:68
37 | loc_loss:2.234761 conf_loss:2.448641, pos_num:111
38 | loc_loss:2.569000 conf_loss:2.495923, pos_num:105
39 | loc_loss:2.542970 conf_loss:2.470961, pos_num:74
40 | loc_loss:2.530408 conf_loss:2.485945, pos_num:79
41 | Epoch [1/50], Iter [1045/1677] Loss: 5.0164, average_loss: 5.0478
42 | loc_loss:2.033609 conf_loss:2.263520, pos_num:173
43 | loc_loss:2.454619 conf_loss:2.263516, pos_num:109
44 | loc_loss:2.247968 conf_loss:2.262204, pos_num:257
45 | loc_loss:2.366087 conf_loss:2.263796, pos_num:50
46 | loc_loss:2.315195 conf_loss:2.261710, pos_num:192
47 | Epoch [10/50], Iter [735/1677] Loss: 4.5769, average_loss: 4.5744
48 | 
49 | 
50 | # adjust
51 | - 修改1  
52 | 复现时，2倍致密是原scale的1/4，是2倍scale的anchor的1/8，写成了1/4，已更改
53 | - 修改2  
54 | 为每一个box label都添加了与之IOU最大的box，不管IOU阈值是多少，这样导致有inf loc loss出现，是因为targets的宽和高有的为0，也就是dataset代码中random_crop有问题，添加了对box_label的宽和高限制为10像素后，问题不再出现。
55 | - 修改3
56 | 使用Adam
57 | 
58 | 不明白为什么突然loss爆炸了
59 | ```s
60 | loc_loss:115.657501 conf_loss:39.798553, pos_num:2528
61 | <!-- 300 epoch -->
62 | Epoch [300/300], Iter [400/403] Loss: 3.4930, average_loss: 3.5764
63 | loc_loss:1.732548 conf_loss:1.807370, pos_num:1120
64 | loc_loss:1.832072 conf_loss:2.002608, pos_num:1711
65 | loc_loss:1.265184 conf_loss:1.525407, pos_num:624
66 | ```


--------------------------------------------------------------------------------
/trainvisdom.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 3 | import torch
 4 | from torch.utils.data import DataLoader
 5 | import torchvision.transforms as transforms
 6 | from torchvision import models
 7 | from torch.autograd import Variable
 8 | 
 9 | from networks import FaceBox
10 | from multibox_loss import MultiBoxLoss
11 | from dataset import ListDataset
12 | 
13 | import visdom
14 | import numpy as np
15 | 
16 | use_gpu = torch.cuda.is_available()
17 | file_root = '/home/lxg/codedata/'
18 | 
19 | learning_rate = 0.001
20 | num_epochs = 300
21 | batch_size = 64
22 | 
23 | net = FaceBox()
24 | if use_gpu:
25 |     net.cuda()
26 | 
27 | print('load model...')
28 | # net.load_state_dict(torch.load('weight/faceboxes.pt'))
29 | 
30 | criterion = MultiBoxLoss()
31 | 
32 | # optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0003)
33 | optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-4)
34 | 
35 | train_dataset = ListDataset(root=file_root,list_file='label/box_label.txt',train=True,transform = [transforms.ToTensor()] )
36 | train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=5)
37 | print('the dataset has %d images' % (len(train_dataset)))
38 | print('the batch_size is %d' % (batch_size))
39 | 
40 | num_iter = 0
41 | vis = visdom.Visdom()
42 | win = vis.line(Y=np.array([0]), X=np.array([0]))
43 | 
44 | net.train()
45 | for epoch in range(num_epochs):
46 |     if epoch == 190 or epoch == 250:
47 |         learning_rate *= 0.1
48 |     for param_group in optimizer.param_groups:
49 |         param_group['lr'] = learning_rate
50 |     
51 |     print('\n\nStarting epoch %d / %d' % (epoch + 1, num_epochs))
52 |     print('Learning Rate for this epoch: {}'.format(learning_rate))
53 |     
54 |     total_loss = 0.
55 | 
56 |     for i,(images,loc_targets,conf_targets) in enumerate(train_loader):
57 |         images = Variable(images)
58 |         loc_targets = Variable(loc_targets)
59 |         conf_targets = Variable(conf_targets)
60 |         if use_gpu:
61 |             images,loc_targets,conf_targets = images.cuda(),loc_targets.cuda(),conf_targets.cuda()
62 |         
63 |         loc_preds, conf_preds = net(images)
64 |         loss = criterion(loc_preds,loc_targets,conf_preds,conf_targets)
65 |         total_loss += loss.data[0]
66 |         
67 |         optimizer.zero_grad()
68 |         loss.backward()
69 |         optimizer.step()
70 |         if (i+1) % 5 == 0:
71 |             print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f, average_loss: %.4f' 
72 |             %(epoch+1, num_epochs, i+1, len(train_loader), loss.data[0], total_loss / (i+1)))
73 |             num_iter = num_iter + 1
74 |             vis.line(Y=np.array([total_loss / (i+1)]), X=np.array([num_iter]), 
75 |                     win=win,
76 |                     update='append')
77 |     
78 |     if not os.path.exists('weight/'):
79 |         os.mkdir('weight')  
80 |     print('saving model ...')  
81 |     torch.save(net.state_dict(),'weight/faceboxes.pt')
82 |     
83 | 
84 | 


--------------------------------------------------------------------------------
/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | #encoding:utf-8
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.init as init
  7 | import torch.nn.functional as F
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | class MultiBoxLoss(nn.Module):
 12 | 	num_classes = 2
 13 | 	def __init__(self):
 14 | 		super(MultiBoxLoss,self).__init__()
 15 | 
 16 | 	def cross_entropy_loss(self, x, y):
 17 | 		x = x.detach()
 18 | 		y = y.detach()
 19 | 		xmax = x.data.max()
 20 | 		#xmax = xmax.detach()
 21 | 		log_sum_exp = torch.log(torch.sum(torch.exp(x-xmax), 1, keepdim=True)) + xmax
 22 | 		return log_sum_exp - x.gather(1, y.view(-1,1))
 23 | 
 24 | 	def hard_negative_mining(self,conf_loss,pos):
 25 | 		'''
 26 | 		conf_loss [N*21482,]
 27 | 		pos [N,21482]
 28 | 		return negative indice
 29 | 		'''
 30 | 		batch_size,num_boxes = pos.size()
 31 | 		conf_loss[pos.view(-1,1)] = 0 #去掉正样本,the rest are neg conf_loss
 32 | 		conf_loss = conf_loss.view(batch_size,-1)
 33 | 
 34 | 		_,idx = conf_loss.sort(1,descending=True)
 35 | 		_,rank = idx.sort(1)
 36 | 
 37 | 		num_pos = pos.long().sum(1,keepdim=True)
 38 | 		num_neg = torch.clamp(3*num_pos, max=num_boxes-1)
 39 | 
 40 | 		neg = rank < num_neg.expand_as(rank)
 41 | 		return neg
 42 | 
 43 | 	def forward(self,loc_preds,loc_targets,conf_preds,conf_targets):
 44 | 		'''
 45 | 		loc_preds[batch,21842,4]
 46 | 		loc_targets[batch,21842,4]
 47 | 		conf_preds[batch,21842,2]
 48 | 		conf_targets[batch,21842]
 49 | 		'''
 50 | 		batch_size,num_boxes, _ = loc_preds.size()
 51 | 		#print(batch_size,num_boxes)
 52 | 		#print('ok1')
 53 | 		pos = conf_targets>0 #大于0的地方，说明匹配到了人脸框
 54 | 		num_pos = pos.long().sum(1, keepdim=True)
 55 | 		#print(torch.sum(pos))
 56 | 		#print(conf_targets.size())
 57 | 		num_matched_boxes = pos.data.long().sum()
 58 | 		if num_matched_boxes == 0:
 59 | 			return Variable(torch.Tensor([0]),requires_grad=True)
 60 | 		#print('ok2')
 61 | 		pos_mask1 = pos.unsqueeze(2).expand_as(loc_preds)
 62 | 		#print(pos_mask1.size())
 63 | 		# print('pos_mask1 sum {}'.format(torch.sum(pos_mask1)))
 64 | 		pos_loc_preds = loc_preds[pos_mask1].view(-1,4)
 65 | 		pos_loc_targets = loc_targets[pos_mask1].view(-1,4)
 66 | 		# for i in range(num_matched_boxes):
 67 | 			# print(i, pos_loc_preds[i,:], pos_loc_targets[i,:])
 68 | 
 69 | 		loc_loss = F.smooth_l1_loss(pos_loc_preds,pos_loc_targets,size_average=False)
 70 | 		# if loc_loss.data[0] > 10000: #是因为preds有非常大的，导致loss很大，所以是正常的。
 71 | 		# 	print('preds', pos_loc_preds)
 72 | 		# 	print('targets', pos_loc_targets)
 73 | 		#print('ok3')
 74 | 		#temp_conf_loss = Variable(requires_grad=False)
 75 | 		conf_loss = self.cross_entropy_loss(conf_preds.view(-1,self.num_classes),
 76 | 									conf_targets.view(-1,1))
 77 | 		#print('conf_loss size {}'.format(conf_loss.size()))
 78 | 		neg = self.hard_negative_mining(conf_loss, pos)
 79 | 		pos_mask = pos.unsqueeze(2).expand_as(conf_preds)
 80 | 
 81 | 		neg_mask = neg.unsqueeze(2).expand_as(conf_preds)
 82 | 		# print('sum neg mask {} size {}'.format(torch.sum(neg_mask),neg_mask.size()))
 83 | 		# print('sum pos mask {} size {}'.format(torch.sum(pos_mask),pos_mask.size()))
 84 | 		#print(neg_mask)
 85 | 		mask = (pos_mask+neg_mask).gt(0)
 86 | 		# print('sum mask {} size {}'.format(torch.sum(mask),mask.size()))
 87 | 
 88 | 		pos_and_neg = (pos+neg).gt(0)
 89 | 		# print('sum neg {} size {}'.format(torch.sum(neg),neg.size()))
 90 | 		# print('sum pos {}'.format(torch.sum(pos)))
 91 | 		# print('sum pos_and_neg {}'.format(torch.sum(pos_and_neg)))
 92 | 		# print('preds shape {}'.format(conf_preds.size()))
 93 | 		preds = conf_preds[mask].view(-1,self.num_classes)
 94 | 		targets = conf_targets[pos_and_neg]
 95 | 		conf_loss = F.cross_entropy(preds,targets,size_average=False)
 96 | 
 97 | 		N = num_pos.data.sum()
 98 | 		loc_loss /= N
 99 | 		conf_loss /= N
100 | 		print('loc_loss:%f conf_loss:%f, pos_num:%d' % (loc_loss.data[0], conf_loss.data[0], N))
101 | 		return loc_loss+conf_loss
102 | 


--------------------------------------------------------------------------------
/networks.py:
--------------------------------------------------------------------------------
  1 | #encoding:utf-8
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Function
  6 | from torch.autograd import Variable
  7 | 
  8 | from multibox_layer import MultiBoxLayer
  9 | 
 10 | 
 11 | def conv_bn_relu(in_channels,out_channels,kernel_size,stride=1,padding=0):
 12 |     return nn.Sequential(
 13 |         nn.Conv2d(in_channels,out_channels,kernel_size=kernel_size,padding=padding,stride=stride),
 14 |         nn.BatchNorm2d(out_channels),
 15 |         nn.ReLU(True)
 16 |     )
 17 | 
 18 | # class CReLU(nn.Module):
 19 | # 	def __init__(self):
 20 | # 		super(CReLU, self).__init__()
 21 | # 	def forward(self, input):
 22 | # 		return torch.cat((F.relu(input), F.relu(-input)), 1)
 23 | 
 24 | class Inception(nn.Module):
 25 | 	def __init__(self):
 26 | 		super(Inception,self).__init__()
 27 | 		# self.conv1 = nn.Conv2d(128,32,kernel_size=1)
 28 | 		self.conv1 = conv_bn_relu(128,32,1)
 29 | 		# self.conv2 = nn.Conv2d(128,32,kernel_size=1)
 30 | 		self.conv2 = conv_bn_relu(128,32,1)
 31 | 		# self.conv3 = nn.Conv2d(128,24,kernel_size=1)
 32 | 		self.conv3 = conv_bn_relu(128,24,1)
 33 | 		# self.conv4 = nn.Conv2d(24,32,kernel_size=3,padding=1)
 34 | 		self.conv4 = conv_bn_relu(24,32,3,padding=1)
 35 | 		# self.conv5 = nn.Conv2d(128,24,kernel_size=1)
 36 | 		self.conv5 = conv_bn_relu(128,24,1)
 37 | 		# self.conv6 = nn.Conv2d(24,32,kernel_size=3,padding=1)
 38 | 		self.conv6 = conv_bn_relu(24,32,3,padding=1)
 39 | 		# self.conv7 = nn.Conv2d(32,32,kernel_size=3,padding=1)
 40 | 		self.conv7 = conv_bn_relu(32,32,3,padding=1)
 41 | 	def forward(self,x):
 42 | 		x1 = self.conv1(x)
 43 | 		
 44 | 		x2 = F.max_pool2d(x,kernel_size=3,stride=1,padding=1)
 45 | 		x2 = self.conv2(x2)
 46 | 
 47 | 		x3 = self.conv3(x)
 48 | 		x3 = self.conv4(x3)
 49 | 		
 50 | 		x4 = self.conv5(x)
 51 | 		x4 = self.conv6(x4)
 52 | 		x4 = self.conv7(x4)
 53 | 
 54 | 		output = torch.cat([x1,x2,x3,x4],1)
 55 | 		return output
 56 | 
 57 | 
 58 | class FaceBox(nn.Module):
 59 | 	input_size = 1024
 60 | 	def __init__(self):
 61 | 		super(FaceBox, self).__init__()
 62 | 
 63 | 		#model
 64 | 		self.conv1 = nn.Conv2d(3,24,kernel_size=7,stride=4,padding=3)
 65 | 		self.bn1 = nn.BatchNorm2d(24)
 66 | 		self.conv2 = nn.Conv2d(48,64,kernel_size=5,stride=2,padding=2)
 67 | 		self.bn2 = nn.BatchNorm2d(64)
 68 | 
 69 | 		self.inception1 = Inception()
 70 | 		self.inception2 = Inception()
 71 | 		self.inception3 = Inception()
 72 | 
 73 | 		# self.conv3_1 = nn.Conv2d(128,128,kernel_size=1)
 74 | 		self.conv3_1 = conv_bn_relu(128,128,1)
 75 |         # self.conv3_2 = nn.Conv2d(128,256,kernel_size=3,stride=2,padding=1)
 76 | 		self.conv3_2 = conv_bn_relu(128,256,3,2,1)
 77 |         # self.conv4_1 = nn.Conv2d(256,128,kernel_size=1)
 78 | 		self.conv4_1 = conv_bn_relu(256,128,1)
 79 |         # self.conv4_2 = nn.Conv2d(128,256,kernel_size=3,stride=2,padding=1)
 80 | 		self.conv4_2 = conv_bn_relu(128,256,3,2,1)
 81 | 
 82 | 		self.multilbox = MultiBoxLayer()
 83 | 
 84 | 	def forward(self,x):
 85 | 		hs = []
 86 | 		
 87 | 		x = self.conv1(x)
 88 | 		x = self.bn1(x)
 89 | 		x = F.relu(torch.cat([x, -x], 1)) #C.Relu
 90 | 		
 91 | 		x = F.max_pool2d(x,kernel_size=3,stride=2,padding=1)
 92 | 		x = self.conv2(x)
 93 | 		x = self.bn2(x)
 94 | 		x = F.relu(torch.cat([x, -x], 1)) #C.Relu
 95 | 		
 96 | 		x = F.max_pool2d(x,kernel_size=3,stride=2,padding=1)
 97 | 		x = self.inception1(x)
 98 | 		x = self.inception2(x)
 99 | 		x = self.inception3(x)
100 | 		
101 | 		# print('x1', x.size())
102 | 		hs.append(x)
103 | 		x = self.conv3_1(x)
104 | 		x = self.conv3_2(x)
105 | 		# print('x2', x.size())
106 | 		hs.append(x)
107 | 		x = self.conv4_1(x)
108 | 		x = self.conv4_2(x)
109 | 		# print('x3', x.size())
110 | 		hs.append(x)
111 | 		loc_preds, conf_preds = self.multilbox(hs)
112 | 
113 | 		
114 | 		return loc_preds, conf_preds
115 | 
116 | if __name__ == '__main__':
117 | 	model = FaceBox()
118 | 	data = Variable(torch.randn(1,3,1024,1024)) 
119 | 	loc, conf = model(data)
120 | 	print('loc', loc.size())
121 | 	print('conf', conf.size())
122 | 	
123 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | from networks import FaceBox
  2 | from encoderl import DataEncoder
  3 | 
  4 | import torch
  5 | from torch.autograd import Variable
  6 | import torch.nn.functional as F
  7 | import cv2
  8 | from tqdm import tqdm
  9 | print('opencv version', cv2.__version__)
 10 | 
 11 | use_gpu = True
 12 | 
 13 | def detect(im):
 14 |     im = cv2.resize(im, (1024,1024))
 15 |     im_tensor = torch.from_numpy(im.transpose((2,0,1)))
 16 |     im_tensor = im_tensor.float().div(255)
 17 |     # print(im_tensor.shape)
 18 |     loc, conf = net(Variable(torch.unsqueeze(im_tensor, 0), volatile=True))
 19 |     boxes, labels, probs = data_encoder.decode(loc.data.squeeze(0),
 20 |                                                 F.softmax(conf.squeeze(0)).data)
 21 |     return boxes, probs
 22 | 
 23 | def detect_gpu(im):
 24 |     im = cv2.resize(im, (1024,1024))
 25 |     im_tensor = torch.from_numpy(im.transpose((2,0,1)))
 26 |     im_tensor = im_tensor.float().div(255)
 27 |     # print(im_tensor.shape)
 28 |     loc, conf = net(Variable(torch.unsqueeze(im_tensor, 0), volatile=True).cuda())
 29 |     loc, conf = loc.cpu(), conf.cpu()
 30 |     boxes, labels, probs = data_encoder.decode(loc.data.squeeze(0),
 31 |                                                 F.softmax(conf.squeeze(0)).data)
 32 |     return boxes, probs
 33 | 
 34 | def testVideo(file):
 35 |     cap = cv2.VideoCapture(file)
 36 |     if not cap.isOpened():
 37 |         print("video cann't open")
 38 | 
 39 |     _, im = cap.read()
 40 |     h,w,_ = im.shape
 41 | 
 42 |     while True:
 43 |         _,im = cap.read()
 44 |         boxes,_ = detect(im)
 45 | 
 46 |         print(boxes)
 47 |         for box in boxes:
 48 |             x1 = int(box[0]*w)
 49 |             x2 = int(box[2]*w)
 50 |             y1 = int(box[1]*h)
 51 |             y2 = int(box[3]*h)
 52 |             print(x1, y1, x2, y2, w, h)
 53 |             cv2.rectangle(im,(x1,y1),(x2,y2),(0,255,0),2)
 54 | 
 55 |         cv2.imshow("video", im)
 56 |         cv2.waitKey(2)
 57 | 
 58 | def testIm(file):
 59 |     im = cv2.imread(file)
 60 |     if im is None:
 61 |         print("can not open image:", file)
 62 |         return
 63 |     h,w,_ = im.shape
 64 |     boxes, probs = detect(im)
 65 |     print(boxes)
 66 |     for i, (box) in enumerate(boxes):
 67 |         print('i', i, 'box', box)
 68 |         x1 = int(box[0]*w)
 69 |         x2 = int(box[2]*w)
 70 |         y1 = int(box[1]*h)
 71 |         y2 = int(box[3]*h)
 72 |         print(x1, y1, x2, y2, w, h)
 73 |         cv2.rectangle(im,(x1,y1+4),(x2,y2),(0,0,255),2)
 74 |         cv2.putText(im, str(round(probs[i],2)), (x1,y1), font, 0.4, (0,0,255))
 75 |     cv2.imwrite('photo.jpg', im)
 76 |     # cv2.waitKey(0)
 77 |     return im
 78 | 
 79 | def testImList(path, file_name):
 80 |     with open(path+file_name) as f:
 81 |         file_list = f.readlines()
 82 | 
 83 |     for item in file_list:
 84 |         testIm(path+item.strip()+'.jpg')
 85 | 
 86 | def saveFddbData(path, file_name):
 87 |     '''
 88 |     Args:
 89 |         file_name: fddb image list
 90 |     '''
 91 |     with open(path+file_name) as f:
 92 |         file_list = f.readlines()
 93 |     f_write = open('predict.txt', 'w')
 94 |     
 95 |     image_num = 0
 96 |     for item in tqdm(file_list):
 97 |         item = item.strip()
 98 |         if not ('/' in item):
 99 |             continue
100 |         image_num += 1
101 |         im = cv2.imread(path+item+'.jpg')
102 |         if im is None:
103 |             print('can not open image', item)
104 |             return
105 |         h,w,_ = im.shape
106 |         if use_gpu:
107 |             boxes, probs = detect_gpu(im)
108 |         else:
109 |             boxes, probs = detect(im)
110 |         f_write.write(item+'\n')
111 |         f_write.write(str(boxes.size(0))+'\n')
112 |         # print('image_num', image_num, 'box_num', boxes.size(0))
113 |         for i, (box) in enumerate(boxes):
114 |             x1 = box[0]*w
115 |             x2 = box[2]*w
116 |             y1 = box[1]*h
117 |             y2 = box[3]*h
118 |             f_write.write(str(x1)+'\t'+str(y1)+'\t'+str(x2-x1)+'\t'+str(y2-y1)+'\t'+str(probs[i])+'\t'+'1\n')
119 |     f_write.close()
120 | 
121 | def getFddbList(path, file_name):
122 |     with open(path+file_name) as f:
123 |         file_list = f.readlines()
124 |     f_write = open(path+'fddblist.txt', 'w')
125 |     for item in file_list:
126 |         if '/' in item:
127 |             f_write.write(item)
128 |     f_write.close()
129 |     print('get fddb list done')
130 | 
131 | if __name__ == '__main__':
132 |     net = FaceBox()
133 |     net.load_state_dict(torch.load('weight/faceboxes.pt', map_location=lambda storage, loc:storage))
134 |     
135 |     if use_gpu:
136 |         net.cuda()
137 |     net.eval()
138 |     data_encoder = DataEncoder()
139 | 
140 |     font = cv2.FONT_HERSHEY_SCRIPT_SIMPLEX
141 |     
142 |     # given video path, predict and show 
143 |     path = "/home/lxg/codedata/faceVideo/1208.mp4"
144 |     # testVideo(path)    
145 | 
146 |     # given image path, predict and show
147 |     root_path = "/home/lxg/codedata/widerFace/WIDER_train/images/0--Parade/"
148 |     picture = '0_Parade_marchingband_1_495.jpg'
149 |     # testIm(root_path + picture)
150 | 
151 |     # given image path, predict and show
152 |     fddb_path = "/home/lxg/codedata/fddb/2002/07/19/big/"
153 |     picture = 'img_463.jpg'
154 |     im = testIm(fddb_path + picture)
155 |     # cv2.imwrite('picture/'+picture, im)
156 | 
157 |     # given image file list, predict and show
158 |     path = '/home/lxg/codedata/fddb/'
159 |     file_name = 'FDDB-folds/FDDB-fold-01.txt'
160 |     # testImList(path, file_name)
161 | 
162 |     # get fddb preddict and write them to predict.txt
163 |     path = '/home/lxg/codedata/fddb/'
164 |     file_name = 'fddb.txt'
165 |     # saveFddbData(path, file_name)
166 |     # getFddbList(path, file_name)


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | #encoding:utf-8
  2 | '''
  3 | txt描述文件 image_name.jpg num x y w h 1 x y w h 1 这样就是说一张图片中有两个人脸
  4 | '''
  5 | import os
  6 | import sys
  7 | import os.path
  8 | 
  9 | import random
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | import torch.utils.data as data
 14 | import torchvision.transforms as transforms
 15 | 
 16 | import cv2
 17 | 
 18 | from encoderl import DataEncoder
 19 | 
 20 | class ListDataset(data.Dataset):
 21 | 	image_size=1024
 22 | 
 23 | 	def __init__(self,root,list_file,train,transform):
 24 | 		print('data init')
 25 | 		self.root=root
 26 | 		self.train = train
 27 | 		self.transform=transform
 28 | 		self.fnames = []
 29 | 		self.boxes = []
 30 | 		self.labels = []
 31 | 		self.small_threshold = 10./self.image_size  # face that small than threshold will be ignored
 32 | 		self.data_encoder = DataEncoder()
 33 | 
 34 | 		with open(list_file) as f:
 35 | 			lines  = f.readlines()
 36 | 
 37 | 		for line in lines:
 38 | 			splited = line.strip().split()
 39 | 			self.fnames.append(splited[0])
 40 | 			num_faces = int(splited[1])
 41 | 			box=[]
 42 | 			label=[]
 43 | 			for i in range(num_faces):
 44 | 				x = float(splited[2+5*i])
 45 | 				y = float(splited[3+5*i])
 46 | 				w = float(splited[4+5*i])
 47 | 				h = float(splited[5+5*i])
 48 | 				c = int(splited[6+5*i])
 49 | 				box.append([x,y,x+w,y+h])
 50 | 				label.append(c)
 51 | 			self.boxes.append(torch.Tensor(box))
 52 | 			self.labels.append(torch.LongTensor(label))
 53 | 		self.num_samples = len(self.boxes)
 54 | 
 55 | 	def __getitem__(self,idx):
 56 | 		fname = self.fnames[idx]
 57 | 		# print(os.path.join(self.root+fname))
 58 | 		img = cv2.imread(os.path.join(self.root+fname))
 59 | 		assert img is not None
 60 | 		
 61 | 		boxes = self.boxes[idx].clone()
 62 | 		labels = self.labels[idx].clone()
 63 | 
 64 | 		if self.train:
 65 | 			img, boxes, labels = self.random_crop(img, boxes, labels)
 66 | 			img = self.random_bright(img)
 67 | 			img, boxes = self.random_flip(img, boxes)
 68 | 			boxwh = boxes[:,2:] - boxes[:,:2]
 69 | 			# print('boxwh', boxwh)
 70 | 			
 71 | 		h,w,_ = img.shape
 72 | 		img = cv2.resize(img,(self.image_size,self.image_size))
 73 | 		
 74 | 		boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)
 75 | 		for t in self.transform:
 76 | 			img = t(img)
 77 | 
 78 | 		loc_target,conf_target = self.data_encoder.encode(boxes,labels)
 79 | 
 80 | 		return img,loc_target,conf_target
 81 | 
 82 | 	def random_getim(self):
 83 | 		idx = random.randrange(0,self.num_samples)
 84 | 		fname = self.fnames[idx]
 85 | 		img = cv2.imread(os.path.join(self.root+fname))
 86 | 		boxes = self.boxes[idx].clone()
 87 | 		labels = self.labels[idx]
 88 | 		
 89 | 		return img, boxes, labels
 90 | 
 91 | 	def __len__(self):
 92 | 		return self.num_samples
 93 | 	
 94 | 	def random_flip(self, im, boxes):
 95 | 		if random.random() < 0.5:
 96 | 			im_lr = np.fliplr(im).copy()
 97 | 			h,w,_ = im.shape
 98 | 			xmin = w - boxes[:,2]
 99 | 			xmax = w - boxes[:,0]
100 | 			boxes[:,0] = xmin
101 | 			boxes[:,2] = xmax
102 | 			return im_lr, boxes
103 | 		return im, boxes
104 | 
105 | 	def random_crop(self, im, boxes, labels):
106 | 		imh, imw, _ = im.shape
107 | 		short_size = min(imw, imh)
108 | 		while True:
109 | 			mode = random.choice([None, 0.3, 0.5, 0.7, 0.9])
110 | 			if mode is None:
111 | 				boxes_uniform = boxes / torch.Tensor([imw,imh,imw,imh]).expand_as(boxes)
112 | 				boxwh = boxes_uniform[:,2:] - boxes_uniform[:,:2]
113 | 				mask = (boxwh[:,0] > self.small_threshold) & (boxwh[:,1] > self.small_threshold) 
114 | 				if not mask.any():
115 | 					print('default image have none box bigger than small_threshold')
116 | 					im, boxes, labels = self.random_getim()
117 | 					imh, imw, _ = im.shape
118 | 					short_size = min(imw,imh)
119 | 					continue
120 | 				selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
121 | 				selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))
122 | 				return im, selected_boxes, selected_labels
123 | 			
124 | 			for _ in range(10):
125 | 				w = random.randrange(int(0.3*short_size), short_size)
126 | 				h = w
127 | 
128 | 				x = random.randrange(imw - w)
129 | 				y = random.randrange(imh - h)
130 | 				roi = torch.Tensor([[x, y, x+w, y+h]])
131 | 
132 | 				center = (boxes[:,:2] + boxes[:,2:]) / 2
133 | 				roi2 = roi.expand(len(center), 4)
134 | 				mask = (center > roi2[:,:2]) & (center < roi2[:,2:])
135 | 				mask = mask[:,0] & mask[:,1]
136 | 				if not mask.any():
137 | 					continue
138 | 				
139 | 				selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
140 | 				img = im[y:y+h,x:x+w,:]
141 | 				selected_boxes[:,0].add_(-x).clamp_(min=0, max=w)
142 | 				selected_boxes[:,1].add_(-y).clamp_(min=0, max=h)
143 | 				selected_boxes[:,2].add_(-x).clamp_(min=0, max=w)
144 | 				selected_boxes[:,3].add_(-y).clamp_(min=0, max=h)
145 | 				# print('croped')
146 | 				
147 | 				boxes_uniform = selected_boxes / torch.Tensor([w,h,w,h]).expand_as(selected_boxes)
148 | 				boxwh = boxes_uniform[:,2:] - boxes_uniform[:,:2]
149 | 				mask = (boxwh[:,0] > self.small_threshold) & (boxwh[:,1] > self.small_threshold) 
150 | 				if not mask.any():
151 | 					print('crop image have none box bigger than small_threshold')
152 | 					im, boxes, labels = self.random_getim()
153 | 					imh, imw, _ = im.shape
154 | 					short_size = min(imw,imh)
155 | 					continue
156 | 				selected_boxes_selected = selected_boxes.index_select(0, mask.nonzero().squeeze(1))
157 | 				selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))
158 | 				return img, selected_boxes_selected, selected_labels
159 | 
160 | 	def random_bright(self, im, delta=16):
161 | 		alpha = random.random()
162 | 		if alpha > 0.3:
163 | 			im = im * alpha + random.randrange(-delta,delta)
164 | 			im = im.clip(min=0,max=255).astype(np.uint8)
165 | 		return im
166 | 
167 | 	def testGet(self, idx):
168 | 		fname = self.fnames[idx]
169 | 		img = cv2.imread(os.path.join(self.root,fname))
170 | 		cv2.imwrite('test_encoder_source.jpg', img)
171 | 		boxes = self.boxes[idx].clone()
172 | 		# print(boxes)
173 | 		labels = self.labels[idx].clone()
174 | 
175 | 		for box in boxes:
176 | 			cv2.rectangle(img, (int(box[0]),int(box[1])), (int(box[2]),int(box[3])), (0,0,255))
177 | 		cv2.imwrite(fname, img)
178 | 
179 | 		if self.train:
180 | 			img, boxes, labels = self.random_crop(img, boxes, labels)
181 | 			img = self.random_bright(img)
182 | 			img, boxes = self.random_flip(img, boxes)
183 | 
184 | 		h,w,_ = img.shape
185 | 		boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)
186 | 
187 | 		img = cv2.resize(img,(self.image_size,self.image_size))
188 | 		for t in self.transform:
189 | 			img = t(img)
190 | 
191 | 		print(idx, fname, boxes)
192 | 
193 | 		return img, boxes, labels
194 | 
195 | if __name__ == '__main__':
196 | 	file_root = '/home/lxg/codedata/aflw/'
197 | 	train_dataset = ListDataset(root=file_root,list_file='box_label.txt',train=True,transform = [transforms.ToTensor()] )
198 | 	print('the dataset has %d image' % (len(train_dataset)))
199 | 	for i in range(len(train_dataset)):
200 | 		print(i)
201 | 		item = random.randrange(0, len(train_dataset))
202 | 		item = item
203 | 		img, boxes, labels = train_dataset.testGet(item)
204 | 		# img, boxes = train_dataset[item]
205 | 		img = img.numpy().transpose(1,2,0).copy()*255
206 | 		train_dataset.data_encoder.test_encode(boxes, img, labels)
207 | 
208 | 		boxes = boxes.numpy().tolist()
209 | 		w,h,_ = img.shape
210 | 		# print('img', img.shape)
211 | 		# print('boxes', boxes.shape)
212 | 
213 | 		for box in boxes:
214 | 			x1 = int(box[0]*w)
215 | 			y1 = int(box[1]*h)
216 | 			x2 = int(box[2]*w)
217 | 			y2 = int(box[3]*h)
218 | 			cv2.rectangle(img, (x1,y1), (x2,y2), (0,0,255))
219 | 			boxw = x2-x1
220 | 			boxh = y2-y1
221 | 			print(boxw,boxh, box)
222 | 			if boxw is 0 or boxh is 0:
223 | 				raise 'zero width'
224 | 			
225 | 		cv2.imwrite('test'+str(i)+'.jpg', img)
226 | 		if i == 0:
227 | 			break


--------------------------------------------------------------------------------
/encoderl.py:
--------------------------------------------------------------------------------
  1 | #encoding:utf-8
  2 | 
  3 | import torch
  4 | import math
  5 | import itertools
  6 | import cv2
  7 | import numpy as np
  8 | 
  9 | class DataEncoder:
 10 | 	def __init__(self):
 11 | 		'''
 12 | 		compute default boxes
 13 | 		'''
 14 | 		scale = 1024.
 15 | 		steps = [s / scale for s in (32, 64, 128)]
 16 | 		sizes = [s / scale for s in (32, 256, 512)] # 当32改为64时，achor与label匹配的正样本数目更多
 17 | 		aspect_ratios = ((1,2,4), (1,), (1,))
 18 | 		feature_map_sizes = (32, 16, 8)
 19 | 
 20 | 		density = [[-3,-1,1,3],[-1,1],[0]] # density for output layer1
 21 | 		# density = [[0],[0],[0]] # density for output layer1
 22 | 		
 23 | 		num_layers = len(feature_map_sizes)
 24 | 		boxes = []
 25 | 		for i in range(num_layers):
 26 | 			fmsize = feature_map_sizes[i]
 27 | 			# print(len(boxes))
 28 | 			for h,w in itertools.product(range(fmsize), repeat=2):
 29 | 				cx = (w + 0.5)*steps[i]
 30 | 				cy = (h + 0.5)*steps[i]
 31 | 
 32 | 				s = sizes[i]
 33 | 				for j,ar in enumerate(aspect_ratios[i]):
 34 | 					if i == 0:
 35 | 						for dx,dy in itertools.product(density[j], repeat=2):
 36 | 							boxes.append((cx+dx/8.*s*ar, cy+dy/8.*s*ar, s*ar, s*ar))
 37 | 					else:
 38 | 						boxes.append((cx, cy, s*ar, s*ar))
 39 | 		
 40 | 		self.default_boxes = torch.Tensor(boxes)
 41 | 	
 42 | 	def test_iou(self):
 43 | 		box1 = torch.Tensor([0,0,10,10])
 44 | 		box1 = box1[None,:]
 45 | 		box2 = torch.Tensor([[5,0,15,10],[5,0,15,10]])
 46 | 		print('iou', self.iou(box1, box2))
 47 | 
 48 | 	def iou(self, box1, box2):
 49 | 		'''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
 50 | 
 51 | 		Args:
 52 | 		  box1: (tensor) bounding boxes, sized [N,4].
 53 | 		  box2: (tensor) bounding boxes, sized [M,4].
 54 | 
 55 | 		Return:
 56 | 		  (tensor) iou, sized [N,M].
 57 | 		'''
 58 | 		N = box1.size(0)
 59 | 		M = box2.size(0)
 60 | 
 61 | 		lt = torch.max( # left top
 62 | 			box1[:,:2].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
 63 | 			box2[:,:2].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
 64 | 		)
 65 | 
 66 | 		rb = torch.min( # right bottom
 67 | 			box1[:,2:].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
 68 | 			box2[:,2:].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
 69 | 		)
 70 | 
 71 | 		wh = rb - lt  # [N,M,2]
 72 | 		wh[wh<0] = 0  # clip at 0
 73 | 		inter = wh[:,:,0] * wh[:,:,1]  # [N,M]
 74 | 
 75 | 		area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # [N,]
 76 | 		area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # [M,]
 77 | 		area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
 78 | 		area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]
 79 | 
 80 | 		iou = inter / (area1 + area2 - inter)
 81 | 		return iou
 82 | 
 83 | 	def test_encode(self, boxes, img, label):
 84 | 		# box = torch.Tensor([ 0.4003,0.0000,0.8409,0.4295])
 85 | 		# box = box[None,:]
 86 | 		# label = torch.LongTensor([1])
 87 | 		# label = label[None,:]
 88 | 		loc, conf = self.encode(boxes, label)
 89 | 		print('conf', type(conf), conf.size(), conf.long().sum())
 90 | 		print('loc', loc)
 91 | 		# img = cv2.imread('test1.jpg')
 92 | 		w,h,_ = img.shape
 93 | 		for box in boxes:
 94 | 			cv2.rectangle(img, (int(box[0]*w),int(box[1]*w)), (int(box[2]*w), int(box[3]*w)), (0,255,0))
 95 | 		
 96 | 		print(type(conf))
 97 | 		for i in range(len(self.default_boxes)):
 98 | 			if conf[i] != 0:
 99 | 				print(i)
100 | 	
101 | 		im = img.copy()
102 | 		# for i in range(42):
103 | 		# 	print(self.default_boxes[i]*w)
104 | 
105 | 		for i in range(32*32*21):
106 | 			box_item = self.default_boxes[i]*w
107 | 			centerx, centery = int(box_item[0]), int(box_item[1])
108 | 			if conf[i] != 0:
109 | 				cv2.circle(im, (centerx, centery), 4, (0,255,0))
110 | 			else:
111 | 				cv2.circle(im, (centerx, centery), 1, (0,0,255))
112 | 		box = self.default_boxes[0]
113 | 		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
114 | 		box = self.default_boxes[16]
115 | 		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
116 | 		box = self.default_boxes[20]
117 | 		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
118 | 		cv2.imwrite('test_encoder_0.jpg', im)
119 | 
120 | 		im = img.copy()
121 | 		for i in range(32*32*21, 32*32*21+16*16):
122 | 			box_item = self.default_boxes[i]*w
123 | 			centerx, centery = int(box_item[0]), int(box_item[1])
124 | 			if conf[i] != 0:
125 | 				cv2.circle(im, (centerx, centery), 4, (0,255,0))
126 | 			else:
127 | 				cv2.circle(im, (centerx, centery), 2, (0,0,255))
128 | 		box = self.default_boxes[32*32*21]
129 | 		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
130 | 		cv2.imwrite('test_encoder_1.jpg', im)
131 | 
132 | 		im = img.copy()
133 | 		for i in range(32*32*21+16*16, len(self.default_boxes)):
134 | 			box_item = self.default_boxes[i]*w
135 | 			centerx, centery = int(box_item[0]), int(box_item[1])
136 | 			if conf[i] != 0:
137 | 				cv2.circle(im, (centerx, centery), 4, (0,255,0))
138 | 			else:
139 | 				cv2.circle(im, (centerx, centery), 2, (0,0,255))
140 | 		box = self.default_boxes[32*32*21+16*16]
141 | 		cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0))
142 | 		cv2.imwrite('test_encoder_2.jpg', im)
143 | 
144 | 		# for i in range(conf.size(0)):
145 | 			# if conf[i].numpy != 0:
146 | 				# print()
147 | 
148 | 	def encode(self,boxes,classes,threshold=0.35):
149 | 		'''
150 | 		boxes:[num_obj, 4]
151 | 		default_box (x1,y1,x2,y2)
152 | 		return:boxes: (tensor) [num_obj,21824,4]
153 | 		classes:class label [obj,]
154 | 		'''
155 | 		boxes_org = boxes
156 | 		
157 | 		#print(boxes,classes)
158 | 		default_boxes = self.default_boxes #[21824,4]
159 | 		num_default_boxes = default_boxes.size(0)
160 | 		num_obj=boxes.size(0)  #人脸个数
161 | 		#print('num_faces {}'.format(num_obj))
162 | 		iou = self.iou(
163 | 			boxes,
164 | 			torch.cat([default_boxes[:,:2] - default_boxes[:,2:]/2,
165 | 						default_boxes[:,:2] + default_boxes[:,2:]/2], 1))
166 | 		# iou = self.iou(boxes, default_boxes)
167 | 		#print('iou size {}'.format(iou.size()))
168 | 		max_iou, max_iou_index = iou.max(1) #为每一个bounding box不管IOU大小，都设置一个与之IOU最大的default_box
169 | 		iou, max_index= iou.max(0) #每一个default_boxes对应到与之IOU最大的bounding box上
170 | 		
171 | 		#print(max(iou))
172 | 		max_index.squeeze_(0)  # torch.LongTensor 21824
173 | 		iou.squeeze_(0)
174 | 		# print('boxes', boxes.size(), boxes, 'max_index', max_index)
175 | 
176 | 		max_index[max_iou_index] = torch.LongTensor(range(num_obj))
177 | 
178 | 
179 | 		boxes = boxes[max_index] # [21824,4] 是图像label
180 | 		variances = [0.1, 0.2]
181 | 		cxcy = (boxes[:,:2] + boxes[:,2:])/2 - default_boxes[:,:2] # [21824,2]
182 | 		cxcy /= variances[0] * default_boxes[:,2:]
183 | 		wh = (boxes[:,2:] - boxes[:,:2]) / default_boxes[:,2:] # [21824,2]  为什么会出现0宽度？？
184 | 		wh = torch.log(wh) / variances[1] # Variable
185 | 		
186 | 		inf_flag = wh.abs() > 10000
187 | 		if(inf_flag.long().sum() is not 0):
188 | 			print('inf_flag has true', wh, boxes)
189 | 			print('org_boxes', boxes_org)
190 | 			print('max_iou', max_iou, 'max_iou_index', max_iou_index)
191 | 			raise 'inf error'
192 | 
193 | 		loc = torch.cat([cxcy, wh], 1) # [21824,4]
194 | 		conf = classes[max_index] #其实都是1 [21824,]
195 | 		conf[iou < threshold] = 0 #iou小的设为背景
196 | 		conf[max_iou_index] = 1 # 这么设置有问题，loc loss 会导致有inf loss，从而干扰训练，
197 | 								# 去掉后，损失降的更稳定些，是因为widerFace数据集里有的label
198 | 								# 做的宽度为0，但是没有被滤掉，是因为max(1)必须为每一个object选择一个
199 | 								# 与之对应的default_box，需要修改数据集里的label。
200 | 		# ('targets', Variable containing:
201 |  		# 318.7500   -1.2500      -inf      -inf
202 | 		# org_boxes 0.1338  0.3801  0.1338  0.3801
203 | 
204 | 		return loc,conf
205 | 
206 | 	def nms(self,bboxes,scores,threshold=0.5):
207 | 		'''
208 | 		bboxes(tensor) [N,4]
209 | 		scores(tensor) [N,]
210 | 		'''
211 | 		x1 = bboxes[:,0]
212 | 		y1 = bboxes[:,1]
213 | 		x2 = bboxes[:,2]
214 | 		y2 = bboxes[:,3]
215 | 		areas = (x2-x1) * (y2-y1)
216 | 
217 | 		_,order = scores.sort(0,descending=True)
218 | 		keep = []
219 | 		while order.numel() > 0:
220 | 			i = order[0]
221 | 			keep.append(i)
222 | 
223 | 			if order.numel() == 1:
224 | 				break
225 | 
226 | 			xx1 = x1[order[1:]].clamp(min=x1[i])
227 | 			yy1 = y1[order[1:]].clamp(min=y1[i])
228 | 			xx2 = x2[order[1:]].clamp(max=x2[i])
229 | 			yy2 = y2[order[1:]].clamp(max=y2[i])
230 | 
231 | 			w = (xx2-xx1).clamp(min=0)
232 | 			h = (yy2-yy1).clamp(min=0)
233 | 			inter = w*h
234 | 
235 | 			ovr = inter / (areas[i] + areas[order[1:]] - inter)
236 | 			ids = (ovr<=threshold).nonzero().squeeze()
237 | 			if ids.numel() == 0:
238 | 				break
239 | 			order = order[ids+1]
240 | 		return torch.LongTensor(keep)
241 | 
242 | 	def decode(self,loc,conf):
243 | 		'''
244 | 		將预测出的 loc/conf转换成真实的人脸框
245 | 		loc [21842,4]
246 | 		conf [21824,2]
247 | 		'''
248 | 		variances = [0.1, 0.2]
249 | 		cxcy = loc[:,:2] * variances[0] * self.default_boxes[:,2:] + self.default_boxes[:,:2]
250 | 		wh  = torch.exp(loc[:,2:] * variances[1]) * self.default_boxes[:,2:]
251 | 		boxes = torch.cat([cxcy-wh/2,cxcy+wh/2],1) #[21824,4]
252 | 		
253 | 		conf[:,0] = 0.4
254 | 
255 | 		max_conf, labels = conf.max(1) #[21842,1]
256 | 		# print(max_conf)
257 | 		# print('labels', labels.long().sum())
258 | 		if labels.long().sum() is 0:
259 | 			sconf, slabel = conf.max(0)
260 | 			max_conf[slabel[0:5]] = sconf[0:5]
261 | 			labels[slabel[0:5]] = 1
262 | 
263 | 		ids = labels.nonzero().squeeze(1)
264 | 		# print('ids', ids)
265 | 		# print('boxes', boxes.size(), boxes[ids])
266 | 
267 | 		keep = self.nms(boxes[ids],max_conf[ids])#.squeeze(1))
268 | 
269 | 		return boxes[ids][keep], labels[ids][keep], max_conf[ids][keep]
270 | 
271 | if __name__ == '__main__':
272 | 	dataencoder = DataEncoder()
273 | 	# dataencoder.test_iou()
274 | 	dataencoder.test_encode()
275 | 	# print((dataencoder.default_boxes))
276 | 	# boxes = torch.Tensor([[-8,-8,24,24],[400,400,500,500]])/1024
277 | 	# dataencoder.encode(boxes,torch.Tensor([1,1]))
278 | 


--------------------------------------------------------------------------------