├── ckpt └── faceboxes.pt ├── picture ├── discROC_unpub.png ├── img_416_result.jpg └── img_463_result.jpg ├── .gitignore ├── createWiderFace.py ├── readme.md ├── multibox_layer.py ├── notes.md ├── trainvisdom.py ├── multibox_loss.py ├── networks.py ├── predict.py ├── dataset.py └── encoderl.py /ckpt/faceboxes.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/ckpt/faceboxes.pt -------------------------------------------------------------------------------- /picture/discROC_unpub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/picture/discROC_unpub.png -------------------------------------------------------------------------------- /picture/img_416_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/picture/img_416_result.jpg -------------------------------------------------------------------------------- /picture/img_463_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxg2015/faceboxes/HEAD/picture/img_463_result.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /*.jpg 2 | weight/faceboxes.pt 3 | weight/faceboxes200.pt 4 | weight/faceboxes_300.pt 5 | __pycache__/* 6 | *.pyc 7 | *.sh 8 | 9 | -------------------------------------------------------------------------------- /createWiderFace.py: -------------------------------------------------------------------------------- 1 | path = '/home/lxg/codedata/widerFace/wider_face_split/' 2 | 3 | with open(path+'wider_face_train_bbx_gt.txt') as f: 4 | lines = f.readlines() 5 | nums = len(lines) 6 | 7 | output = open(path+'box_label.txt', 'w') 8 | 9 | for i in range(nums): 10 | # if i == 10: 11 | # break 12 | 13 | line = lines[i] 14 | if 'jpg' not in line: 15 | continue 16 | 17 | im_name = line.strip() 18 | face_num = int(lines[i+1].strip()) 19 | im_name = im_name + ' ' + str(face_num) + ' ' 20 | 21 | for j in range(face_num): 22 | line = lines[i+2+j] 23 | splited = line.strip().split() 24 | im_name = im_name + splited[0] + ' ' 25 | im_name = im_name + splited[1] + ' ' 26 | im_name = im_name + splited[2] + ' ' 27 | im_name = im_name + splited[3] + ' ' 28 | im_name = im_name + '1' + ' ' 29 | 30 | # print(i) 31 | output.writelines('widerFace/WIDER_train/images/' + im_name+'\n') 32 | 33 | # create aflw data 34 | # with open('label.txt') as f: 35 | # lines = f.readlines() 36 | # output = open('label_path.txt', 'w') 37 | # for line in lines: 38 | # output.writelines('data/all/' + line) 39 | 40 | 41 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Faceboxes 2 | [faceBoxes: a cpu real-time face detector with hight accuracy](https://arxiv.org/abs/1708.05234) 3 | 4 | Faceboxes is a SSD style object detector, it is designed for fast face detect, has a lightweight yet powerful network structure. 5 | 6 | ## update 7 | 8 | 9 | 10 | 1. Better network, our convolution module should be con_bn_relu like, not just conv. 11 | 12 | 2. Double batch size, it used ~7G memory during train 13 | 14 | 3. Add use_gpu Flag in predict, add detect_gpu function 15 | 16 | Performance is better than before! 17 | 18 | Efficiency is not good as official, 60FPS on 1080ti, much slower on CPU, it maybe slow in decoder. 19 | 20 | ## usage 21 | visdom 22 | pytorch 0.2 23 | torchvision 24 | 25 | our data annotation 26 | ``` 27 | data/all/image01468.jpg 1 119 185 139 139 1 28 | data/all/image01449.jpg 2 9 39 74 74 1 409 93 77 77 1 29 | ``` 30 | format: 31 | ``` 32 | path/image_name.jpg num_face x y w h 1 x y w h 1 33 | ``` 34 | 35 | ## Result 36 | ![face1](picture/img_416_result.jpg) 37 | ![face2](picture/img_463_result.jpg) 38 | 39 | ## Fddb 40 | 和原论文的结果有些差距,主要问题可能是出现在数据增强部分。 41 | 42 | tips:这里的另一条曲线DDFD(Multi-view Face Detection Using Deep Convolutional Neural Networks)只是拿来做个参考 43 | 44 | ![fddb](picture/discROC_unpub.png) 45 | -------------------------------------------------------------------------------- /multibox_layer.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.init as init 7 | import torch.nn.functional as F 8 | 9 | from torch.autograd import Variable 10 | 11 | class MultiBoxLayer(nn.Module): 12 | num_classes = 2 13 | num_anchors = [21,1,1] 14 | in_planes = [128,256,256] 15 | 16 | def __init__(self): 17 | super(MultiBoxLayer,self).__init__() 18 | 19 | self.loc_layers = nn.ModuleList() 20 | self.conf_layers = nn.ModuleList() 21 | for i in range(len(self.in_planes)): 22 | self.loc_layers.append(nn.Conv2d(self.in_planes[i],self.num_anchors[i]*4, kernel_size=3,padding=1)) 23 | self.conf_layers.append(nn.Conv2d(self.in_planes[i],self.num_anchors[i]*2,kernel_size=3,padding=1)) 24 | 25 | def forward(self,xs): 26 | ''' 27 | xs:list of 之前的featuremap list 28 | retrun: loc_preds: [N,21842,4] 29 | conf_preds:[N,24842,2] 30 | ''' 31 | y_locs=[] 32 | y_confs = [] 33 | for i,x in enumerate(xs): 34 | y_loc = self.loc_layers[i](x) # N,anhors*4,H,W 35 | N = y_loc.size(0) 36 | y_loc = y_loc.permute(0,2,3,1).contiguous() 37 | y_loc = y_loc.view(N,-1,4) 38 | y_locs.append(y_loc) 39 | 40 | y_conf = self.conf_layers[i](x) 41 | y_conf = y_conf.permute(0,2,3,1).contiguous() 42 | y_conf = y_conf.view(N,-1,2) 43 | y_confs.append(y_conf) 44 | 45 | loc_preds = torch.cat(y_locs,1) 46 | conf_preds = torch.cat(y_confs,1) 47 | return loc_preds,conf_preds -------------------------------------------------------------------------------- /notes.md: -------------------------------------------------------------------------------- 1 | ## aflw 2 | 3 | Epoch [20/20], Iter [1055/1057] Loss: 1.8820 4 | 5 | 6 | loc_loss:0.984644 conf_loss:1.205544, pos_num:78 7 | loc_loss:0.698231 conf_loss:1.439163, pos_num:163 8 | loc_loss:0.644837 conf_loss:1.328891, pos_num:143 9 | loc_loss:0.420935 conf_loss:1.332006, pos_num:149 10 | loc_loss:0.246193 conf_loss:0.883853, pos_num:242 11 | Epoch [20/20], Iter [1055/1057] Loss: 1.1300 12 | 13 | ## wider face, aflw 14 | 15 | loc_loss:1.417856 conf_loss:2.061183, pos_num:152 16 | loc_loss:0.655475 conf_loss:1.698712, pos_num:65 17 | loc_loss:1.235528 conf_loss:2.012040, pos_num:144 18 | loc_loss:0.798853 conf_loss:1.713999, pos_num:109 19 | loc_loss:0.823239 conf_loss:1.953249, pos_num:318 20 | Epoch [50/50], Iter [1700/1701] Loss: 2.7765, average_loss: 2.8849 21 | 22 | loc_loss:0.802718 conf_loss:1.943955, pos_num:284 23 | loc_loss:0.867129 conf_loss:1.820582, pos_num:420 24 | loc_loss:0.885825 conf_loss:1.830107, pos_num:358 25 | loc_loss:0.811850 conf_loss:1.881572, pos_num:501 26 | loc_loss:0.975667 conf_loss:1.921641, pos_num:540 27 | Epoch [50/50], Iter [680/681] Loss: 2.8973, average_loss: 2.5820 28 | 29 | 30 | 46 2.27359845486 31 | 47 2.27207741518 32 | 48 2.26043195595 33 | 49 2.26634732234 34 | 35 | 36 | loc_loss:2.556887 conf_loss:2.489368, pos_num:68 37 | loc_loss:2.234761 conf_loss:2.448641, pos_num:111 38 | loc_loss:2.569000 conf_loss:2.495923, pos_num:105 39 | loc_loss:2.542970 conf_loss:2.470961, pos_num:74 40 | loc_loss:2.530408 conf_loss:2.485945, pos_num:79 41 | Epoch [1/50], Iter [1045/1677] Loss: 5.0164, average_loss: 5.0478 42 | loc_loss:2.033609 conf_loss:2.263520, pos_num:173 43 | loc_loss:2.454619 conf_loss:2.263516, pos_num:109 44 | loc_loss:2.247968 conf_loss:2.262204, pos_num:257 45 | loc_loss:2.366087 conf_loss:2.263796, pos_num:50 46 | loc_loss:2.315195 conf_loss:2.261710, pos_num:192 47 | Epoch [10/50], Iter [735/1677] Loss: 4.5769, average_loss: 4.5744 48 | 49 | 50 | # adjust 51 | - 修改1 52 | 复现时,2倍致密是原scale的1/4,是2倍scale的anchor的1/8,写成了1/4,已更改 53 | - 修改2 54 | 为每一个box label都添加了与之IOU最大的box,不管IOU阈值是多少,这样导致有inf loc loss出现,是因为targets的宽和高有的为0,也就是dataset代码中random_crop有问题,添加了对box_label的宽和高限制为10像素后,问题不再出现。 55 | - 修改3 56 | 使用Adam 57 | 58 | 不明白为什么突然loss爆炸了 59 | ```s 60 | loc_loss:115.657501 conf_loss:39.798553, pos_num:2528 61 | 62 | Epoch [300/300], Iter [400/403] Loss: 3.4930, average_loss: 3.5764 63 | loc_loss:1.732548 conf_loss:1.807370, pos_num:1120 64 | loc_loss:1.832072 conf_loss:2.002608, pos_num:1711 65 | loc_loss:1.265184 conf_loss:1.525407, pos_num:624 66 | ``` -------------------------------------------------------------------------------- /trainvisdom.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 3 | import torch 4 | from torch.utils.data import DataLoader 5 | import torchvision.transforms as transforms 6 | from torchvision import models 7 | from torch.autograd import Variable 8 | 9 | from networks import FaceBox 10 | from multibox_loss import MultiBoxLoss 11 | from dataset import ListDataset 12 | 13 | import visdom 14 | import numpy as np 15 | 16 | use_gpu = torch.cuda.is_available() 17 | file_root = '/home/lxg/codedata/' 18 | 19 | learning_rate = 0.001 20 | num_epochs = 300 21 | batch_size = 64 22 | 23 | net = FaceBox() 24 | if use_gpu: 25 | net.cuda() 26 | 27 | print('load model...') 28 | # net.load_state_dict(torch.load('weight/faceboxes.pt')) 29 | 30 | criterion = MultiBoxLoss() 31 | 32 | # optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0003) 33 | optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-4) 34 | 35 | train_dataset = ListDataset(root=file_root,list_file='label/box_label.txt',train=True,transform = [transforms.ToTensor()] ) 36 | train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=5) 37 | print('the dataset has %d images' % (len(train_dataset))) 38 | print('the batch_size is %d' % (batch_size)) 39 | 40 | num_iter = 0 41 | vis = visdom.Visdom() 42 | win = vis.line(Y=np.array([0]), X=np.array([0])) 43 | 44 | net.train() 45 | for epoch in range(num_epochs): 46 | if epoch == 190 or epoch == 250: 47 | learning_rate *= 0.1 48 | for param_group in optimizer.param_groups: 49 | param_group['lr'] = learning_rate 50 | 51 | print('\n\nStarting epoch %d / %d' % (epoch + 1, num_epochs)) 52 | print('Learning Rate for this epoch: {}'.format(learning_rate)) 53 | 54 | total_loss = 0. 55 | 56 | for i,(images,loc_targets,conf_targets) in enumerate(train_loader): 57 | images = Variable(images) 58 | loc_targets = Variable(loc_targets) 59 | conf_targets = Variable(conf_targets) 60 | if use_gpu: 61 | images,loc_targets,conf_targets = images.cuda(),loc_targets.cuda(),conf_targets.cuda() 62 | 63 | loc_preds, conf_preds = net(images) 64 | loss = criterion(loc_preds,loc_targets,conf_preds,conf_targets) 65 | total_loss += loss.data[0] 66 | 67 | optimizer.zero_grad() 68 | loss.backward() 69 | optimizer.step() 70 | if (i+1) % 5 == 0: 71 | print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f, average_loss: %.4f' 72 | %(epoch+1, num_epochs, i+1, len(train_loader), loss.data[0], total_loss / (i+1))) 73 | num_iter = num_iter + 1 74 | vis.line(Y=np.array([total_loss / (i+1)]), X=np.array([num_iter]), 75 | win=win, 76 | update='append') 77 | 78 | if not os.path.exists('weight/'): 79 | os.mkdir('weight') 80 | print('saving model ...') 81 | torch.save(net.state_dict(),'weight/faceboxes.pt') 82 | 83 | 84 | -------------------------------------------------------------------------------- /multibox_loss.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.init as init 7 | import torch.nn.functional as F 8 | 9 | from torch.autograd import Variable 10 | 11 | class MultiBoxLoss(nn.Module): 12 | num_classes = 2 13 | def __init__(self): 14 | super(MultiBoxLoss,self).__init__() 15 | 16 | def cross_entropy_loss(self, x, y): 17 | x = x.detach() 18 | y = y.detach() 19 | xmax = x.data.max() 20 | #xmax = xmax.detach() 21 | log_sum_exp = torch.log(torch.sum(torch.exp(x-xmax), 1, keepdim=True)) + xmax 22 | return log_sum_exp - x.gather(1, y.view(-1,1)) 23 | 24 | def hard_negative_mining(self,conf_loss,pos): 25 | ''' 26 | conf_loss [N*21482,] 27 | pos [N,21482] 28 | return negative indice 29 | ''' 30 | batch_size,num_boxes = pos.size() 31 | conf_loss[pos.view(-1,1)] = 0 #去掉正样本,the rest are neg conf_loss 32 | conf_loss = conf_loss.view(batch_size,-1) 33 | 34 | _,idx = conf_loss.sort(1,descending=True) 35 | _,rank = idx.sort(1) 36 | 37 | num_pos = pos.long().sum(1,keepdim=True) 38 | num_neg = torch.clamp(3*num_pos, max=num_boxes-1) 39 | 40 | neg = rank < num_neg.expand_as(rank) 41 | return neg 42 | 43 | def forward(self,loc_preds,loc_targets,conf_preds,conf_targets): 44 | ''' 45 | loc_preds[batch,21842,4] 46 | loc_targets[batch,21842,4] 47 | conf_preds[batch,21842,2] 48 | conf_targets[batch,21842] 49 | ''' 50 | batch_size,num_boxes, _ = loc_preds.size() 51 | #print(batch_size,num_boxes) 52 | #print('ok1') 53 | pos = conf_targets>0 #大于0的地方,说明匹配到了人脸框 54 | num_pos = pos.long().sum(1, keepdim=True) 55 | #print(torch.sum(pos)) 56 | #print(conf_targets.size()) 57 | num_matched_boxes = pos.data.long().sum() 58 | if num_matched_boxes == 0: 59 | return Variable(torch.Tensor([0]),requires_grad=True) 60 | #print('ok2') 61 | pos_mask1 = pos.unsqueeze(2).expand_as(loc_preds) 62 | #print(pos_mask1.size()) 63 | # print('pos_mask1 sum {}'.format(torch.sum(pos_mask1))) 64 | pos_loc_preds = loc_preds[pos_mask1].view(-1,4) 65 | pos_loc_targets = loc_targets[pos_mask1].view(-1,4) 66 | # for i in range(num_matched_boxes): 67 | # print(i, pos_loc_preds[i,:], pos_loc_targets[i,:]) 68 | 69 | loc_loss = F.smooth_l1_loss(pos_loc_preds,pos_loc_targets,size_average=False) 70 | # if loc_loss.data[0] > 10000: #是因为preds有非常大的,导致loss很大,所以是正常的。 71 | # print('preds', pos_loc_preds) 72 | # print('targets', pos_loc_targets) 73 | #print('ok3') 74 | #temp_conf_loss = Variable(requires_grad=False) 75 | conf_loss = self.cross_entropy_loss(conf_preds.view(-1,self.num_classes), 76 | conf_targets.view(-1,1)) 77 | #print('conf_loss size {}'.format(conf_loss.size())) 78 | neg = self.hard_negative_mining(conf_loss, pos) 79 | pos_mask = pos.unsqueeze(2).expand_as(conf_preds) 80 | 81 | neg_mask = neg.unsqueeze(2).expand_as(conf_preds) 82 | # print('sum neg mask {} size {}'.format(torch.sum(neg_mask),neg_mask.size())) 83 | # print('sum pos mask {} size {}'.format(torch.sum(pos_mask),pos_mask.size())) 84 | #print(neg_mask) 85 | mask = (pos_mask+neg_mask).gt(0) 86 | # print('sum mask {} size {}'.format(torch.sum(mask),mask.size())) 87 | 88 | pos_and_neg = (pos+neg).gt(0) 89 | # print('sum neg {} size {}'.format(torch.sum(neg),neg.size())) 90 | # print('sum pos {}'.format(torch.sum(pos))) 91 | # print('sum pos_and_neg {}'.format(torch.sum(pos_and_neg))) 92 | # print('preds shape {}'.format(conf_preds.size())) 93 | preds = conf_preds[mask].view(-1,self.num_classes) 94 | targets = conf_targets[pos_and_neg] 95 | conf_loss = F.cross_entropy(preds,targets,size_average=False) 96 | 97 | N = num_pos.data.sum() 98 | loc_loss /= N 99 | conf_loss /= N 100 | print('loc_loss:%f conf_loss:%f, pos_num:%d' % (loc_loss.data[0], conf_loss.data[0], N)) 101 | return loc_loss+conf_loss 102 | -------------------------------------------------------------------------------- /networks.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Function 6 | from torch.autograd import Variable 7 | 8 | from multibox_layer import MultiBoxLayer 9 | 10 | 11 | def conv_bn_relu(in_channels,out_channels,kernel_size,stride=1,padding=0): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels,out_channels,kernel_size=kernel_size,padding=padding,stride=stride), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU(True) 16 | ) 17 | 18 | # class CReLU(nn.Module): 19 | # def __init__(self): 20 | # super(CReLU, self).__init__() 21 | # def forward(self, input): 22 | # return torch.cat((F.relu(input), F.relu(-input)), 1) 23 | 24 | class Inception(nn.Module): 25 | def __init__(self): 26 | super(Inception,self).__init__() 27 | # self.conv1 = nn.Conv2d(128,32,kernel_size=1) 28 | self.conv1 = conv_bn_relu(128,32,1) 29 | # self.conv2 = nn.Conv2d(128,32,kernel_size=1) 30 | self.conv2 = conv_bn_relu(128,32,1) 31 | # self.conv3 = nn.Conv2d(128,24,kernel_size=1) 32 | self.conv3 = conv_bn_relu(128,24,1) 33 | # self.conv4 = nn.Conv2d(24,32,kernel_size=3,padding=1) 34 | self.conv4 = conv_bn_relu(24,32,3,padding=1) 35 | # self.conv5 = nn.Conv2d(128,24,kernel_size=1) 36 | self.conv5 = conv_bn_relu(128,24,1) 37 | # self.conv6 = nn.Conv2d(24,32,kernel_size=3,padding=1) 38 | self.conv6 = conv_bn_relu(24,32,3,padding=1) 39 | # self.conv7 = nn.Conv2d(32,32,kernel_size=3,padding=1) 40 | self.conv7 = conv_bn_relu(32,32,3,padding=1) 41 | def forward(self,x): 42 | x1 = self.conv1(x) 43 | 44 | x2 = F.max_pool2d(x,kernel_size=3,stride=1,padding=1) 45 | x2 = self.conv2(x2) 46 | 47 | x3 = self.conv3(x) 48 | x3 = self.conv4(x3) 49 | 50 | x4 = self.conv5(x) 51 | x4 = self.conv6(x4) 52 | x4 = self.conv7(x4) 53 | 54 | output = torch.cat([x1,x2,x3,x4],1) 55 | return output 56 | 57 | 58 | class FaceBox(nn.Module): 59 | input_size = 1024 60 | def __init__(self): 61 | super(FaceBox, self).__init__() 62 | 63 | #model 64 | self.conv1 = nn.Conv2d(3,24,kernel_size=7,stride=4,padding=3) 65 | self.bn1 = nn.BatchNorm2d(24) 66 | self.conv2 = nn.Conv2d(48,64,kernel_size=5,stride=2,padding=2) 67 | self.bn2 = nn.BatchNorm2d(64) 68 | 69 | self.inception1 = Inception() 70 | self.inception2 = Inception() 71 | self.inception3 = Inception() 72 | 73 | # self.conv3_1 = nn.Conv2d(128,128,kernel_size=1) 74 | self.conv3_1 = conv_bn_relu(128,128,1) 75 | # self.conv3_2 = nn.Conv2d(128,256,kernel_size=3,stride=2,padding=1) 76 | self.conv3_2 = conv_bn_relu(128,256,3,2,1) 77 | # self.conv4_1 = nn.Conv2d(256,128,kernel_size=1) 78 | self.conv4_1 = conv_bn_relu(256,128,1) 79 | # self.conv4_2 = nn.Conv2d(128,256,kernel_size=3,stride=2,padding=1) 80 | self.conv4_2 = conv_bn_relu(128,256,3,2,1) 81 | 82 | self.multilbox = MultiBoxLayer() 83 | 84 | def forward(self,x): 85 | hs = [] 86 | 87 | x = self.conv1(x) 88 | x = self.bn1(x) 89 | x = F.relu(torch.cat([x, -x], 1)) #C.Relu 90 | 91 | x = F.max_pool2d(x,kernel_size=3,stride=2,padding=1) 92 | x = self.conv2(x) 93 | x = self.bn2(x) 94 | x = F.relu(torch.cat([x, -x], 1)) #C.Relu 95 | 96 | x = F.max_pool2d(x,kernel_size=3,stride=2,padding=1) 97 | x = self.inception1(x) 98 | x = self.inception2(x) 99 | x = self.inception3(x) 100 | 101 | # print('x1', x.size()) 102 | hs.append(x) 103 | x = self.conv3_1(x) 104 | x = self.conv3_2(x) 105 | # print('x2', x.size()) 106 | hs.append(x) 107 | x = self.conv4_1(x) 108 | x = self.conv4_2(x) 109 | # print('x3', x.size()) 110 | hs.append(x) 111 | loc_preds, conf_preds = self.multilbox(hs) 112 | 113 | 114 | return loc_preds, conf_preds 115 | 116 | if __name__ == '__main__': 117 | model = FaceBox() 118 | data = Variable(torch.randn(1,3,1024,1024)) 119 | loc, conf = model(data) 120 | print('loc', loc.size()) 121 | print('conf', conf.size()) 122 | 123 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from networks import FaceBox 2 | from encoderl import DataEncoder 3 | 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.nn.functional as F 7 | import cv2 8 | from tqdm import tqdm 9 | print('opencv version', cv2.__version__) 10 | 11 | use_gpu = True 12 | 13 | def detect(im): 14 | im = cv2.resize(im, (1024,1024)) 15 | im_tensor = torch.from_numpy(im.transpose((2,0,1))) 16 | im_tensor = im_tensor.float().div(255) 17 | # print(im_tensor.shape) 18 | loc, conf = net(Variable(torch.unsqueeze(im_tensor, 0), volatile=True)) 19 | boxes, labels, probs = data_encoder.decode(loc.data.squeeze(0), 20 | F.softmax(conf.squeeze(0)).data) 21 | return boxes, probs 22 | 23 | def detect_gpu(im): 24 | im = cv2.resize(im, (1024,1024)) 25 | im_tensor = torch.from_numpy(im.transpose((2,0,1))) 26 | im_tensor = im_tensor.float().div(255) 27 | # print(im_tensor.shape) 28 | loc, conf = net(Variable(torch.unsqueeze(im_tensor, 0), volatile=True).cuda()) 29 | loc, conf = loc.cpu(), conf.cpu() 30 | boxes, labels, probs = data_encoder.decode(loc.data.squeeze(0), 31 | F.softmax(conf.squeeze(0)).data) 32 | return boxes, probs 33 | 34 | def testVideo(file): 35 | cap = cv2.VideoCapture(file) 36 | if not cap.isOpened(): 37 | print("video cann't open") 38 | 39 | _, im = cap.read() 40 | h,w,_ = im.shape 41 | 42 | while True: 43 | _,im = cap.read() 44 | boxes,_ = detect(im) 45 | 46 | print(boxes) 47 | for box in boxes: 48 | x1 = int(box[0]*w) 49 | x2 = int(box[2]*w) 50 | y1 = int(box[1]*h) 51 | y2 = int(box[3]*h) 52 | print(x1, y1, x2, y2, w, h) 53 | cv2.rectangle(im,(x1,y1),(x2,y2),(0,255,0),2) 54 | 55 | cv2.imshow("video", im) 56 | cv2.waitKey(2) 57 | 58 | def testIm(file): 59 | im = cv2.imread(file) 60 | if im is None: 61 | print("can not open image:", file) 62 | return 63 | h,w,_ = im.shape 64 | boxes, probs = detect(im) 65 | print(boxes) 66 | for i, (box) in enumerate(boxes): 67 | print('i', i, 'box', box) 68 | x1 = int(box[0]*w) 69 | x2 = int(box[2]*w) 70 | y1 = int(box[1]*h) 71 | y2 = int(box[3]*h) 72 | print(x1, y1, x2, y2, w, h) 73 | cv2.rectangle(im,(x1,y1+4),(x2,y2),(0,0,255),2) 74 | cv2.putText(im, str(round(probs[i],2)), (x1,y1), font, 0.4, (0,0,255)) 75 | cv2.imwrite('photo.jpg', im) 76 | # cv2.waitKey(0) 77 | return im 78 | 79 | def testImList(path, file_name): 80 | with open(path+file_name) as f: 81 | file_list = f.readlines() 82 | 83 | for item in file_list: 84 | testIm(path+item.strip()+'.jpg') 85 | 86 | def saveFddbData(path, file_name): 87 | ''' 88 | Args: 89 | file_name: fddb image list 90 | ''' 91 | with open(path+file_name) as f: 92 | file_list = f.readlines() 93 | f_write = open('predict.txt', 'w') 94 | 95 | image_num = 0 96 | for item in tqdm(file_list): 97 | item = item.strip() 98 | if not ('/' in item): 99 | continue 100 | image_num += 1 101 | im = cv2.imread(path+item+'.jpg') 102 | if im is None: 103 | print('can not open image', item) 104 | return 105 | h,w,_ = im.shape 106 | if use_gpu: 107 | boxes, probs = detect_gpu(im) 108 | else: 109 | boxes, probs = detect(im) 110 | f_write.write(item+'\n') 111 | f_write.write(str(boxes.size(0))+'\n') 112 | # print('image_num', image_num, 'box_num', boxes.size(0)) 113 | for i, (box) in enumerate(boxes): 114 | x1 = box[0]*w 115 | x2 = box[2]*w 116 | y1 = box[1]*h 117 | y2 = box[3]*h 118 | f_write.write(str(x1)+'\t'+str(y1)+'\t'+str(x2-x1)+'\t'+str(y2-y1)+'\t'+str(probs[i])+'\t'+'1\n') 119 | f_write.close() 120 | 121 | def getFddbList(path, file_name): 122 | with open(path+file_name) as f: 123 | file_list = f.readlines() 124 | f_write = open(path+'fddblist.txt', 'w') 125 | for item in file_list: 126 | if '/' in item: 127 | f_write.write(item) 128 | f_write.close() 129 | print('get fddb list done') 130 | 131 | if __name__ == '__main__': 132 | net = FaceBox() 133 | net.load_state_dict(torch.load('weight/faceboxes.pt', map_location=lambda storage, loc:storage)) 134 | 135 | if use_gpu: 136 | net.cuda() 137 | net.eval() 138 | data_encoder = DataEncoder() 139 | 140 | font = cv2.FONT_HERSHEY_SCRIPT_SIMPLEX 141 | 142 | # given video path, predict and show 143 | path = "/home/lxg/codedata/faceVideo/1208.mp4" 144 | # testVideo(path) 145 | 146 | # given image path, predict and show 147 | root_path = "/home/lxg/codedata/widerFace/WIDER_train/images/0--Parade/" 148 | picture = '0_Parade_marchingband_1_495.jpg' 149 | # testIm(root_path + picture) 150 | 151 | # given image path, predict and show 152 | fddb_path = "/home/lxg/codedata/fddb/2002/07/19/big/" 153 | picture = 'img_463.jpg' 154 | im = testIm(fddb_path + picture) 155 | # cv2.imwrite('picture/'+picture, im) 156 | 157 | # given image file list, predict and show 158 | path = '/home/lxg/codedata/fddb/' 159 | file_name = 'FDDB-folds/FDDB-fold-01.txt' 160 | # testImList(path, file_name) 161 | 162 | # get fddb preddict and write them to predict.txt 163 | path = '/home/lxg/codedata/fddb/' 164 | file_name = 'fddb.txt' 165 | # saveFddbData(path, file_name) 166 | # getFddbList(path, file_name) -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | ''' 3 | txt描述文件 image_name.jpg num x y w h 1 x y w h 1 这样就是说一张图片中有两个人脸 4 | ''' 5 | import os 6 | import sys 7 | import os.path 8 | 9 | import random 10 | import numpy as np 11 | 12 | import torch 13 | import torch.utils.data as data 14 | import torchvision.transforms as transforms 15 | 16 | import cv2 17 | 18 | from encoderl import DataEncoder 19 | 20 | class ListDataset(data.Dataset): 21 | image_size=1024 22 | 23 | def __init__(self,root,list_file,train,transform): 24 | print('data init') 25 | self.root=root 26 | self.train = train 27 | self.transform=transform 28 | self.fnames = [] 29 | self.boxes = [] 30 | self.labels = [] 31 | self.small_threshold = 10./self.image_size # face that small than threshold will be ignored 32 | self.data_encoder = DataEncoder() 33 | 34 | with open(list_file) as f: 35 | lines = f.readlines() 36 | 37 | for line in lines: 38 | splited = line.strip().split() 39 | self.fnames.append(splited[0]) 40 | num_faces = int(splited[1]) 41 | box=[] 42 | label=[] 43 | for i in range(num_faces): 44 | x = float(splited[2+5*i]) 45 | y = float(splited[3+5*i]) 46 | w = float(splited[4+5*i]) 47 | h = float(splited[5+5*i]) 48 | c = int(splited[6+5*i]) 49 | box.append([x,y,x+w,y+h]) 50 | label.append(c) 51 | self.boxes.append(torch.Tensor(box)) 52 | self.labels.append(torch.LongTensor(label)) 53 | self.num_samples = len(self.boxes) 54 | 55 | def __getitem__(self,idx): 56 | fname = self.fnames[idx] 57 | # print(os.path.join(self.root+fname)) 58 | img = cv2.imread(os.path.join(self.root+fname)) 59 | assert img is not None 60 | 61 | boxes = self.boxes[idx].clone() 62 | labels = self.labels[idx].clone() 63 | 64 | if self.train: 65 | img, boxes, labels = self.random_crop(img, boxes, labels) 66 | img = self.random_bright(img) 67 | img, boxes = self.random_flip(img, boxes) 68 | boxwh = boxes[:,2:] - boxes[:,:2] 69 | # print('boxwh', boxwh) 70 | 71 | h,w,_ = img.shape 72 | img = cv2.resize(img,(self.image_size,self.image_size)) 73 | 74 | boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes) 75 | for t in self.transform: 76 | img = t(img) 77 | 78 | loc_target,conf_target = self.data_encoder.encode(boxes,labels) 79 | 80 | return img,loc_target,conf_target 81 | 82 | def random_getim(self): 83 | idx = random.randrange(0,self.num_samples) 84 | fname = self.fnames[idx] 85 | img = cv2.imread(os.path.join(self.root+fname)) 86 | boxes = self.boxes[idx].clone() 87 | labels = self.labels[idx] 88 | 89 | return img, boxes, labels 90 | 91 | def __len__(self): 92 | return self.num_samples 93 | 94 | def random_flip(self, im, boxes): 95 | if random.random() < 0.5: 96 | im_lr = np.fliplr(im).copy() 97 | h,w,_ = im.shape 98 | xmin = w - boxes[:,2] 99 | xmax = w - boxes[:,0] 100 | boxes[:,0] = xmin 101 | boxes[:,2] = xmax 102 | return im_lr, boxes 103 | return im, boxes 104 | 105 | def random_crop(self, im, boxes, labels): 106 | imh, imw, _ = im.shape 107 | short_size = min(imw, imh) 108 | while True: 109 | mode = random.choice([None, 0.3, 0.5, 0.7, 0.9]) 110 | if mode is None: 111 | boxes_uniform = boxes / torch.Tensor([imw,imh,imw,imh]).expand_as(boxes) 112 | boxwh = boxes_uniform[:,2:] - boxes_uniform[:,:2] 113 | mask = (boxwh[:,0] > self.small_threshold) & (boxwh[:,1] > self.small_threshold) 114 | if not mask.any(): 115 | print('default image have none box bigger than small_threshold') 116 | im, boxes, labels = self.random_getim() 117 | imh, imw, _ = im.shape 118 | short_size = min(imw,imh) 119 | continue 120 | selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1)) 121 | selected_labels = labels.index_select(0, mask.nonzero().squeeze(1)) 122 | return im, selected_boxes, selected_labels 123 | 124 | for _ in range(10): 125 | w = random.randrange(int(0.3*short_size), short_size) 126 | h = w 127 | 128 | x = random.randrange(imw - w) 129 | y = random.randrange(imh - h) 130 | roi = torch.Tensor([[x, y, x+w, y+h]]) 131 | 132 | center = (boxes[:,:2] + boxes[:,2:]) / 2 133 | roi2 = roi.expand(len(center), 4) 134 | mask = (center > roi2[:,:2]) & (center < roi2[:,2:]) 135 | mask = mask[:,0] & mask[:,1] 136 | if not mask.any(): 137 | continue 138 | 139 | selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1)) 140 | img = im[y:y+h,x:x+w,:] 141 | selected_boxes[:,0].add_(-x).clamp_(min=0, max=w) 142 | selected_boxes[:,1].add_(-y).clamp_(min=0, max=h) 143 | selected_boxes[:,2].add_(-x).clamp_(min=0, max=w) 144 | selected_boxes[:,3].add_(-y).clamp_(min=0, max=h) 145 | # print('croped') 146 | 147 | boxes_uniform = selected_boxes / torch.Tensor([w,h,w,h]).expand_as(selected_boxes) 148 | boxwh = boxes_uniform[:,2:] - boxes_uniform[:,:2] 149 | mask = (boxwh[:,0] > self.small_threshold) & (boxwh[:,1] > self.small_threshold) 150 | if not mask.any(): 151 | print('crop image have none box bigger than small_threshold') 152 | im, boxes, labels = self.random_getim() 153 | imh, imw, _ = im.shape 154 | short_size = min(imw,imh) 155 | continue 156 | selected_boxes_selected = selected_boxes.index_select(0, mask.nonzero().squeeze(1)) 157 | selected_labels = labels.index_select(0, mask.nonzero().squeeze(1)) 158 | return img, selected_boxes_selected, selected_labels 159 | 160 | def random_bright(self, im, delta=16): 161 | alpha = random.random() 162 | if alpha > 0.3: 163 | im = im * alpha + random.randrange(-delta,delta) 164 | im = im.clip(min=0,max=255).astype(np.uint8) 165 | return im 166 | 167 | def testGet(self, idx): 168 | fname = self.fnames[idx] 169 | img = cv2.imread(os.path.join(self.root,fname)) 170 | cv2.imwrite('test_encoder_source.jpg', img) 171 | boxes = self.boxes[idx].clone() 172 | # print(boxes) 173 | labels = self.labels[idx].clone() 174 | 175 | for box in boxes: 176 | cv2.rectangle(img, (int(box[0]),int(box[1])), (int(box[2]),int(box[3])), (0,0,255)) 177 | cv2.imwrite(fname, img) 178 | 179 | if self.train: 180 | img, boxes, labels = self.random_crop(img, boxes, labels) 181 | img = self.random_bright(img) 182 | img, boxes = self.random_flip(img, boxes) 183 | 184 | h,w,_ = img.shape 185 | boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes) 186 | 187 | img = cv2.resize(img,(self.image_size,self.image_size)) 188 | for t in self.transform: 189 | img = t(img) 190 | 191 | print(idx, fname, boxes) 192 | 193 | return img, boxes, labels 194 | 195 | if __name__ == '__main__': 196 | file_root = '/home/lxg/codedata/aflw/' 197 | train_dataset = ListDataset(root=file_root,list_file='box_label.txt',train=True,transform = [transforms.ToTensor()] ) 198 | print('the dataset has %d image' % (len(train_dataset))) 199 | for i in range(len(train_dataset)): 200 | print(i) 201 | item = random.randrange(0, len(train_dataset)) 202 | item = item 203 | img, boxes, labels = train_dataset.testGet(item) 204 | # img, boxes = train_dataset[item] 205 | img = img.numpy().transpose(1,2,0).copy()*255 206 | train_dataset.data_encoder.test_encode(boxes, img, labels) 207 | 208 | boxes = boxes.numpy().tolist() 209 | w,h,_ = img.shape 210 | # print('img', img.shape) 211 | # print('boxes', boxes.shape) 212 | 213 | for box in boxes: 214 | x1 = int(box[0]*w) 215 | y1 = int(box[1]*h) 216 | x2 = int(box[2]*w) 217 | y2 = int(box[3]*h) 218 | cv2.rectangle(img, (x1,y1), (x2,y2), (0,0,255)) 219 | boxw = x2-x1 220 | boxh = y2-y1 221 | print(boxw,boxh, box) 222 | if boxw is 0 or boxh is 0: 223 | raise 'zero width' 224 | 225 | cv2.imwrite('test'+str(i)+'.jpg', img) 226 | if i == 0: 227 | break -------------------------------------------------------------------------------- /encoderl.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | 3 | import torch 4 | import math 5 | import itertools 6 | import cv2 7 | import numpy as np 8 | 9 | class DataEncoder: 10 | def __init__(self): 11 | ''' 12 | compute default boxes 13 | ''' 14 | scale = 1024. 15 | steps = [s / scale for s in (32, 64, 128)] 16 | sizes = [s / scale for s in (32, 256, 512)] # 当32改为64时,achor与label匹配的正样本数目更多 17 | aspect_ratios = ((1,2,4), (1,), (1,)) 18 | feature_map_sizes = (32, 16, 8) 19 | 20 | density = [[-3,-1,1,3],[-1,1],[0]] # density for output layer1 21 | # density = [[0],[0],[0]] # density for output layer1 22 | 23 | num_layers = len(feature_map_sizes) 24 | boxes = [] 25 | for i in range(num_layers): 26 | fmsize = feature_map_sizes[i] 27 | # print(len(boxes)) 28 | for h,w in itertools.product(range(fmsize), repeat=2): 29 | cx = (w + 0.5)*steps[i] 30 | cy = (h + 0.5)*steps[i] 31 | 32 | s = sizes[i] 33 | for j,ar in enumerate(aspect_ratios[i]): 34 | if i == 0: 35 | for dx,dy in itertools.product(density[j], repeat=2): 36 | boxes.append((cx+dx/8.*s*ar, cy+dy/8.*s*ar, s*ar, s*ar)) 37 | else: 38 | boxes.append((cx, cy, s*ar, s*ar)) 39 | 40 | self.default_boxes = torch.Tensor(boxes) 41 | 42 | def test_iou(self): 43 | box1 = torch.Tensor([0,0,10,10]) 44 | box1 = box1[None,:] 45 | box2 = torch.Tensor([[5,0,15,10],[5,0,15,10]]) 46 | print('iou', self.iou(box1, box2)) 47 | 48 | def iou(self, box1, box2): 49 | '''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2]. 50 | 51 | Args: 52 | box1: (tensor) bounding boxes, sized [N,4]. 53 | box2: (tensor) bounding boxes, sized [M,4]. 54 | 55 | Return: 56 | (tensor) iou, sized [N,M]. 57 | ''' 58 | N = box1.size(0) 59 | M = box2.size(0) 60 | 61 | lt = torch.max( # left top 62 | box1[:,:2].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2] 63 | box2[:,:2].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2] 64 | ) 65 | 66 | rb = torch.min( # right bottom 67 | box1[:,2:].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2] 68 | box2[:,2:].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2] 69 | ) 70 | 71 | wh = rb - lt # [N,M,2] 72 | wh[wh<0] = 0 # clip at 0 73 | inter = wh[:,:,0] * wh[:,:,1] # [N,M] 74 | 75 | area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1]) # [N,] 76 | area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1]) # [M,] 77 | area1 = area1.unsqueeze(1).expand_as(inter) # [N,] -> [N,1] -> [N,M] 78 | area2 = area2.unsqueeze(0).expand_as(inter) # [M,] -> [1,M] -> [N,M] 79 | 80 | iou = inter / (area1 + area2 - inter) 81 | return iou 82 | 83 | def test_encode(self, boxes, img, label): 84 | # box = torch.Tensor([ 0.4003,0.0000,0.8409,0.4295]) 85 | # box = box[None,:] 86 | # label = torch.LongTensor([1]) 87 | # label = label[None,:] 88 | loc, conf = self.encode(boxes, label) 89 | print('conf', type(conf), conf.size(), conf.long().sum()) 90 | print('loc', loc) 91 | # img = cv2.imread('test1.jpg') 92 | w,h,_ = img.shape 93 | for box in boxes: 94 | cv2.rectangle(img, (int(box[0]*w),int(box[1]*w)), (int(box[2]*w), int(box[3]*w)), (0,255,0)) 95 | 96 | print(type(conf)) 97 | for i in range(len(self.default_boxes)): 98 | if conf[i] != 0: 99 | print(i) 100 | 101 | im = img.copy() 102 | # for i in range(42): 103 | # print(self.default_boxes[i]*w) 104 | 105 | for i in range(32*32*21): 106 | box_item = self.default_boxes[i]*w 107 | centerx, centery = int(box_item[0]), int(box_item[1]) 108 | if conf[i] != 0: 109 | cv2.circle(im, (centerx, centery), 4, (0,255,0)) 110 | else: 111 | cv2.circle(im, (centerx, centery), 1, (0,0,255)) 112 | box = self.default_boxes[0] 113 | cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0)) 114 | box = self.default_boxes[16] 115 | cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0)) 116 | box = self.default_boxes[20] 117 | cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0)) 118 | cv2.imwrite('test_encoder_0.jpg', im) 119 | 120 | im = img.copy() 121 | for i in range(32*32*21, 32*32*21+16*16): 122 | box_item = self.default_boxes[i]*w 123 | centerx, centery = int(box_item[0]), int(box_item[1]) 124 | if conf[i] != 0: 125 | cv2.circle(im, (centerx, centery), 4, (0,255,0)) 126 | else: 127 | cv2.circle(im, (centerx, centery), 2, (0,0,255)) 128 | box = self.default_boxes[32*32*21] 129 | cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0)) 130 | cv2.imwrite('test_encoder_1.jpg', im) 131 | 132 | im = img.copy() 133 | for i in range(32*32*21+16*16, len(self.default_boxes)): 134 | box_item = self.default_boxes[i]*w 135 | centerx, centery = int(box_item[0]), int(box_item[1]) 136 | if conf[i] != 0: 137 | cv2.circle(im, (centerx, centery), 4, (0,255,0)) 138 | else: 139 | cv2.circle(im, (centerx, centery), 2, (0,0,255)) 140 | box = self.default_boxes[32*32*21+16*16] 141 | cv2.rectangle(im, (0,0), (int(box[2]*w), int(box[3]*w)), (0,255,0)) 142 | cv2.imwrite('test_encoder_2.jpg', im) 143 | 144 | # for i in range(conf.size(0)): 145 | # if conf[i].numpy != 0: 146 | # print() 147 | 148 | def encode(self,boxes,classes,threshold=0.35): 149 | ''' 150 | boxes:[num_obj, 4] 151 | default_box (x1,y1,x2,y2) 152 | return:boxes: (tensor) [num_obj,21824,4] 153 | classes:class label [obj,] 154 | ''' 155 | boxes_org = boxes 156 | 157 | #print(boxes,classes) 158 | default_boxes = self.default_boxes #[21824,4] 159 | num_default_boxes = default_boxes.size(0) 160 | num_obj=boxes.size(0) #人脸个数 161 | #print('num_faces {}'.format(num_obj)) 162 | iou = self.iou( 163 | boxes, 164 | torch.cat([default_boxes[:,:2] - default_boxes[:,2:]/2, 165 | default_boxes[:,:2] + default_boxes[:,2:]/2], 1)) 166 | # iou = self.iou(boxes, default_boxes) 167 | #print('iou size {}'.format(iou.size())) 168 | max_iou, max_iou_index = iou.max(1) #为每一个bounding box不管IOU大小,都设置一个与之IOU最大的default_box 169 | iou, max_index= iou.max(0) #每一个default_boxes对应到与之IOU最大的bounding box上 170 | 171 | #print(max(iou)) 172 | max_index.squeeze_(0) # torch.LongTensor 21824 173 | iou.squeeze_(0) 174 | # print('boxes', boxes.size(), boxes, 'max_index', max_index) 175 | 176 | max_index[max_iou_index] = torch.LongTensor(range(num_obj)) 177 | 178 | 179 | boxes = boxes[max_index] # [21824,4] 是图像label 180 | variances = [0.1, 0.2] 181 | cxcy = (boxes[:,:2] + boxes[:,2:])/2 - default_boxes[:,:2] # [21824,2] 182 | cxcy /= variances[0] * default_boxes[:,2:] 183 | wh = (boxes[:,2:] - boxes[:,:2]) / default_boxes[:,2:] # [21824,2] 为什么会出现0宽度?? 184 | wh = torch.log(wh) / variances[1] # Variable 185 | 186 | inf_flag = wh.abs() > 10000 187 | if(inf_flag.long().sum() is not 0): 188 | print('inf_flag has true', wh, boxes) 189 | print('org_boxes', boxes_org) 190 | print('max_iou', max_iou, 'max_iou_index', max_iou_index) 191 | raise 'inf error' 192 | 193 | loc = torch.cat([cxcy, wh], 1) # [21824,4] 194 | conf = classes[max_index] #其实都是1 [21824,] 195 | conf[iou < threshold] = 0 #iou小的设为背景 196 | conf[max_iou_index] = 1 # 这么设置有问题,loc loss 会导致有inf loss,从而干扰训练, 197 | # 去掉后,损失降的更稳定些,是因为widerFace数据集里有的label 198 | # 做的宽度为0,但是没有被滤掉,是因为max(1)必须为每一个object选择一个 199 | # 与之对应的default_box,需要修改数据集里的label。 200 | # ('targets', Variable containing: 201 | # 318.7500 -1.2500 -inf -inf 202 | # org_boxes 0.1338 0.3801 0.1338 0.3801 203 | 204 | return loc,conf 205 | 206 | def nms(self,bboxes,scores,threshold=0.5): 207 | ''' 208 | bboxes(tensor) [N,4] 209 | scores(tensor) [N,] 210 | ''' 211 | x1 = bboxes[:,0] 212 | y1 = bboxes[:,1] 213 | x2 = bboxes[:,2] 214 | y2 = bboxes[:,3] 215 | areas = (x2-x1) * (y2-y1) 216 | 217 | _,order = scores.sort(0,descending=True) 218 | keep = [] 219 | while order.numel() > 0: 220 | i = order[0] 221 | keep.append(i) 222 | 223 | if order.numel() == 1: 224 | break 225 | 226 | xx1 = x1[order[1:]].clamp(min=x1[i]) 227 | yy1 = y1[order[1:]].clamp(min=y1[i]) 228 | xx2 = x2[order[1:]].clamp(max=x2[i]) 229 | yy2 = y2[order[1:]].clamp(max=y2[i]) 230 | 231 | w = (xx2-xx1).clamp(min=0) 232 | h = (yy2-yy1).clamp(min=0) 233 | inter = w*h 234 | 235 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 236 | ids = (ovr<=threshold).nonzero().squeeze() 237 | if ids.numel() == 0: 238 | break 239 | order = order[ids+1] 240 | return torch.LongTensor(keep) 241 | 242 | def decode(self,loc,conf): 243 | ''' 244 | 將预测出的 loc/conf转换成真实的人脸框 245 | loc [21842,4] 246 | conf [21824,2] 247 | ''' 248 | variances = [0.1, 0.2] 249 | cxcy = loc[:,:2] * variances[0] * self.default_boxes[:,2:] + self.default_boxes[:,:2] 250 | wh = torch.exp(loc[:,2:] * variances[1]) * self.default_boxes[:,2:] 251 | boxes = torch.cat([cxcy-wh/2,cxcy+wh/2],1) #[21824,4] 252 | 253 | conf[:,0] = 0.4 254 | 255 | max_conf, labels = conf.max(1) #[21842,1] 256 | # print(max_conf) 257 | # print('labels', labels.long().sum()) 258 | if labels.long().sum() is 0: 259 | sconf, slabel = conf.max(0) 260 | max_conf[slabel[0:5]] = sconf[0:5] 261 | labels[slabel[0:5]] = 1 262 | 263 | ids = labels.nonzero().squeeze(1) 264 | # print('ids', ids) 265 | # print('boxes', boxes.size(), boxes[ids]) 266 | 267 | keep = self.nms(boxes[ids],max_conf[ids])#.squeeze(1)) 268 | 269 | return boxes[ids][keep], labels[ids][keep], max_conf[ids][keep] 270 | 271 | if __name__ == '__main__': 272 | dataencoder = DataEncoder() 273 | # dataencoder.test_iou() 274 | dataencoder.test_encode() 275 | # print((dataencoder.default_boxes)) 276 | # boxes = torch.Tensor([[-8,-8,24,24],[400,400,500,500]])/1024 277 | # dataencoder.encode(boxes,torch.Tensor([1,1])) 278 | --------------------------------------------------------------------------------