├── dataset ├── __init__.py ├── __init__.pyc ├── fddb_crawler.pyc └── fddb_crawler.py ├── data ├── img_1.jpg └── results │ └── img_1_result.jpg ├── LICENSE ├── example └── demo_sliding_windows.py ├── README.md ├── data.py ├── train_calibration_net.py ├── train_detection_net.py ├── model.py └── detection.py /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/img_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/data/img_1.jpg -------------------------------------------------------------------------------- /dataset/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/dataset/__init__.pyc -------------------------------------------------------------------------------- /dataset/fddb_crawler.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/dataset/fddb_crawler.pyc -------------------------------------------------------------------------------- /data/results/img_1_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/data/results/img_1_result.jpg -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 DennisLiu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /example/demo_sliding_windows.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import cv2 3 | 4 | def sliding_window(image, stepSize, windowSize): 5 | # slide a window across the image 6 | for y in xrange(0, image.shape[0], stepSize): 7 | for x in xrange(0, image.shape[1], stepSize): 8 | # yield the current window 9 | yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]]) 10 | 11 | 12 | img_path = '/home/share/data/FDDB/2002/07/19/big/img_130.jpg' 13 | img = cv2.imread(img_path) 14 | 15 | 16 | pyramid_t = 3 17 | win_size = (48,48) 18 | win_stride = 10 19 | 20 | 21 | # Generate Gaussian pyramid for img 22 | imgPyramids = [img.copy()] 23 | for i in range(1, pyramid_t): 24 | imgPyramids.append(cv2.pyrDown(imgPyramids[i - 1])) 25 | for i in range(pyramid_t): 26 | image = imgPyramids[i] 27 | for (x, y, window) in sliding_window(image, stepSize=win_stride, windowSize=win_size): 28 | # if the window does not meet our desired window size, ignore it 29 | if window.shape[0] != win_size[0] or window.shape[1] != win_size[1]: 30 | continue 31 | 32 | clone = image.copy() 33 | cv2.rectangle(clone, (x, y), (x + win_size[0], y + win_size[1]), (255, 0, 0), 2) 34 | face = image[y : y+win_size[1] , x : x+win_size[0]] 35 | plt.imshow(clone) 36 | plt.show(block = False) 37 | plt.pause(0.1) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CNN_Face_Detection 2 | Repository for "A Convolutional Neural Network Cascade for Face Detection", implemented with Python interface. 3 | 4 | ## About 5 | This repo implemented the [paper](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf) in python/tensorflow , providing the interface to contruct the cascade structure , including function of **detection networks** , **calibration networks** , **image pyramids** and **non maximum supression** . 6 | 7 | ## Requirement 8 | * Thensorflow : [Tensorflow installation guild](https://www.tensorflow.org/install/) 9 | * Opencv : [OpenCV installation guild](https://pypi.python.org/pypi/opencv-python) 10 | 11 | ## Useful Operations 12 | * Import Detector 13 | ```python 14 | # make sure detection.py is in program folder 15 | from detection import Detector 16 | ``` 17 | * Restore the pretrained model , [download_link](https://drive.google.com/file/d/170IlbvlBxrrtML_j3rmLFMDNIgX44Rgw/view?usp=sharing) 18 | 19 | ```python 20 | # given paths from both models to Detector , and It will load the model on 21 | # your memory (or gpu memory). 22 | det_mod_path = 'models/det_net_.ckpt' 23 | cal_mod_path = 'models/cal_net_.ckpt' 24 | detector = Detector(det_mod_path,cal_mod_path) 25 | ``` 26 | 27 | * Processing image pyramids 28 | ```python 29 | # bboxes is all bounding boxes of sliding windows , It’s include position 30 | # and probability of face (default is -0.1) 31 | # bboxes = [ , , , , ] 32 | bboxes = detector.img_pyramids(image) 33 | ``` 34 | * Non Maximum Suppression 35 | ```python 36 | # iou_thresh is the overlapping threshold of iou in non maximum suppression 37 | # In returning bboxes , function will set the box’s probability = 0.0 which have 38 | # been filtered. 39 | bboxes = detector.non_max_sup(bboxes,iou_thresh = 0.5) 40 | ``` 41 | 42 | * Predict the bounding boxes on detection/calibration net 43 | ```python 44 | # predict function will predict all the bounding boxes which probability is not 45 | # zero , function will set the box’s probability from prediction and return the 46 | # final bboxes . 47 | # flags of net : ‘net12’ , ‘net24’ , ‘net48’ , ‘net12_cal’ , ‘net24_cal’ , ‘net48_cal’ . 48 | # threshold : the threshold of preditction. 49 | bboxes = detector.predict(img,bboxes,net = 'net12',threshold = 0.9) 50 | ``` 51 | 52 | ### Results 53 | ![image](https://github.com/liumusicforever/CNN_Face_Detection/blob/master/data/results/img_1_result.jpg) 54 | 55 | 56 | ## Implementation Issue 57 | ### 12-net and 24-net is too small ? 58 | When I was training models , **finding size of 12-net and 24-net was so hard to convergence** , maybe the size of network is too small to learn pattern , so I change network size of net12 and net24 to 48*48 finally. But still confuse about it ! 59 | ### Necessary for calibration network ? 60 | The accuracy of the calibration almost only 0.8 , **It result the calibration of bounding box after network may making mistake , and bounding box will be removed in next stage** , so sometimes I have better result without calibration net. 61 | 62 | 63 | ## License 64 | 65 | MIT LICENSE 66 | 67 | ## Reference 68 | 69 | Haoxiang Li, Zhe Lin, Xiaohui Shen, Jonathan Brandt, Gang Hua ; A Convolutional Neural Network Cascade for Face Detection ; The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2015, pp. 5325-5334 70 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from tensorflow.contrib.data import Dataset, Iterator 5 | 6 | 7 | class DataSet: 8 | def __init__(self, data_path_list , train_rate = 0.9): 9 | self.data_path_list = data_path_list 10 | self.train_rate = train_rate 11 | def get_dataset(self,batch,size = (48,48,3)): 12 | self.size = size 13 | dataset = self.data_path_list 14 | 15 | from random import shuffle 16 | shuffle(dataset) 17 | 18 | 19 | train_set = dataset[0:int(len(dataset)*self.train_rate)] 20 | val_set = dataset[int(len(dataset)*self.train_rate):] 21 | 22 | # pading last batch 23 | if len(train_set) % batch != 0 : 24 | for i in range(batch - (len(train_set) % batch)): 25 | train_set.append(train_set[0]) 26 | if len(val_set) % batch != 0 : 27 | for i in range(batch - (len(val_set) % batch)): 28 | val_set.append(val_set[0]) 29 | 30 | 31 | train_imgs = tf.constant( [data[0] for data in train_set]) 32 | train_labels = tf.constant([data[1] for data in train_set]) 33 | 34 | val_imgs = tf.constant([data[0] for data in val_set]) 35 | val_labels = tf.constant([data[1] for data in val_set]) 36 | 37 | # create TensorFlow Dataset objects 38 | tr_data = Dataset.from_tensor_slices((train_imgs, train_labels)) 39 | val_data = Dataset.from_tensor_slices((val_imgs, val_labels)) 40 | 41 | tr_data = tr_data.map(self.data_loader) 42 | val_data = val_data.map(self.data_loader) 43 | 44 | return tr_data,val_data 45 | 46 | def get_iterator(self,batch = 3,size = (12,12,3)): 47 | tr_data , val_data = self.get_dataset(batch,size) 48 | 49 | tr_data = tr_data.batch(batch) 50 | val_data = val_data.batch(batch) 51 | 52 | # create TensorFlow Iterator object 53 | iterator = Iterator.from_structure(tr_data.output_types, 54 | tr_data.output_shapes) 55 | 56 | # create two initialization ops to switch between the datasets 57 | training_init_op = iterator.make_initializer(tr_data) 58 | validation_init_op = iterator.make_initializer(val_data) 59 | 60 | next_element = iterator.get_next() 61 | return iterator , training_init_op , validation_init_op , next_element 62 | 63 | def data_loader(self , img_path, label): 64 | # label format : [,] 65 | 66 | 67 | # read the img from file 68 | img_file = tf.read_file(img_path) 69 | img_decoded = tf.image.decode_jpeg(img_file, channels=self.size[2]) 70 | resized_image = tf.image.resize_images(img_decoded, [self.size[0], self.size[1]]) 71 | 72 | 73 | classes_num = 2 74 | clss = tf.one_hot(label[0], classes_num) 75 | 76 | # convert the label to one-hot encoding 77 | pattern_classes = 45 78 | pattern = tf.one_hot(label[1], pattern_classes) 79 | 80 | return resized_image, clss , pattern 81 | 82 | 83 | def test_dataset(): 84 | 85 | dataset = DataSet([['data/img_1.jpg',[0,1]], 86 | ['data/img_1.jpg',[1,5]], 87 | ['data/img_1.jpg',[0,10]], 88 | ]) 89 | _ , train_op , val_op , next_ele = dataset.get_iterator(batch = 1) 90 | sess = tf.InteractiveSession() 91 | sess.run(train_op) 92 | while True: 93 | try: 94 | inputs , targets, patterns = sess.run(next_ele) 95 | # print 'inputs',inputs 96 | 97 | print 'targets',targets 98 | print 'patterns',patterns 99 | except tf.errors.OutOfRangeError: 100 | print("End of training dataset.") 101 | break 102 | 103 | 104 | 105 | if __name__ == "__main__": 106 | test_dataset() -------------------------------------------------------------------------------- /train_calibration_net.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection " 3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf 4 | Author : Dennis Liu 5 | Modify : 2017/11/10 6 | 7 | Description : The example of training calibration nets , 8 | 9 | ''' 10 | import cv2 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | import model 15 | 16 | 17 | from data import DataSet 18 | from dataset.fddb_crawler import parse_data_info 19 | 20 | 21 | def train_cal_net(): 22 | # get only the positive training sample 23 | data_info = parse_data_info(only_positive = True) 24 | 25 | 26 | # training configuration 27 | batch = 500 28 | size = (48,48,3) 29 | start_epoch = 0 30 | end_epoch = 1000 31 | train_validation_rate = 0.9 # training set / all sample 32 | 33 | # load the pretrained model , set None if you don't have 34 | pretrained = 'models/48_cal_net_18.ckpt' 35 | 36 | # load data iterater 37 | dataset = DataSet(data_info,train_rate = train_validation_rate) 38 | _ , train_op , val_op , next_ele = dataset.get_iterator(batch,size) 39 | 40 | 41 | # load network 42 | net_12_c = model.calib_12Net(lr = 0.001,size = (12,12,3)) 43 | net_24_c = model.calib_24Net(lr = 0.001,size = (24,24,3)) 44 | net_48_c = model.calib_48Net(lr = 0.001,size = (48,48,3)) 45 | 46 | sess = tf.InteractiveSession() 47 | saver = tf.train.Saver() 48 | 49 | if pretrained: 50 | saver.restore(sess , pretrained) 51 | 52 | else: 53 | sess.run(tf.global_variables_initializer()) 54 | 55 | 56 | 57 | for epoch in xrange(start_epoch,end_epoch): 58 | loss = 0 59 | iteration = 0 60 | sess.run(train_op) 61 | # get each element of the training dataset until the end is reached 62 | while True: 63 | try: 64 | # default of the size returned from data iterator is 48 65 | inputs,clss ,pattern = sess.run(next_ele) 66 | # , <0/1> , 67 | 68 | 69 | clss = clss.reshape(batch,2) 70 | pattern = pattern.reshape(batch,45) 71 | 72 | 73 | # resize image to fit each net 74 | inputs_12 = np.array([cv2.resize(img,(net_12_c.size[0],net_12_c.size[1])) for img in inputs]) 75 | inputs_24 = np.array([cv2.resize(img,(net_24_c.size[0],net_24_c.size[1])) for img in inputs]) 76 | inputs_48 = np.array([cv2.resize(img,(net_48_c.size[0],net_48_c.size[1])) for img in inputs]) 77 | 78 | '''Put the size(48,48) into 12_cal_net and 24_cal_net ,because of the origrinal size is too small to convergence''' 79 | train_nets = [net_12_c,net_24_c,net_48_c] 80 | net_feed_dict = {net_12_c.inputs:inputs_12 , net_12_c.targets:pattern,\ 81 | net_24_c.inputs:inputs_24 , net_24_c.targets:pattern,\ 82 | net_48_c.inputs:inputs_48 , net_48_c.targets:pattern,} 83 | 84 | # training net 85 | sess.run([net.train_step for net in train_nets],\ 86 | feed_dict = net_feed_dict) 87 | # loss computation 88 | losses = sess.run([net.loss for net in train_nets],\ 89 | feed_dict = net_feed_dict) 90 | 91 | if iteration % 100 == 0: 92 | net_12_c_eva = net_12_c.evaluate(inputs_12,pattern) 93 | net_12_c_acc = sum(net_12_c_eva)/len(net_12_c_eva) 94 | net_24_c_eva = net_24_c.evaluate(inputs_24,pattern) 95 | net_24_c_acc = sum(net_24_c_eva)/len(net_24_c_eva) 96 | net_48_c_eva = net_48_c.evaluate(inputs_48,pattern) 97 | net_48_c_acc = sum(net_48_c_eva)/len(net_48_c_eva) 98 | print ('Training Epoch {} --- Iter {} --- Training Accuracy: {}%,{}%,{}% --- Training Loss: {}'\ 99 | .format(epoch , iteration , net_12_c_acc , net_24_c_acc , net_48_c_acc , losses)) 100 | 101 | iteration += 1 102 | except tf.errors.OutOfRangeError: 103 | print("End of training dataset.") 104 | break 105 | 106 | # get each element of the validation dataset until the end is reached 107 | sess.run(val_op) 108 | net_12_c_acc = [] 109 | net_24_c_acc = [] 110 | net_48_c_acc = [] 111 | while True: 112 | try: 113 | # the size returned from data iterator is 48 114 | inputs,clss ,pattern = sess.run(next_ele) 115 | clss = clss.reshape(batch,2) 116 | pattern = pattern.reshape(batch,45) 117 | 118 | # resize image to fit each net 119 | inputs_12 = np.array([cv2.resize(img,(net_12_c.size[0],net_12_c.size[1])) for img in inputs]) 120 | inputs_24 = np.array([cv2.resize(img,(net_24_c.size[0],net_24_c.size[1])) for img in inputs]) 121 | inputs_48 = np.array([cv2.resize(img,(net_48_c.size[0],net_48_c.size[1])) for img in inputs]) 122 | 123 | 124 | net_12_c_eva = net_12_c.evaluate(inputs_12,pattern) 125 | net_24_c_eva = net_24_c.evaluate(inputs_24,pattern) 126 | net_48_c_eva = net_48_c.evaluate(inputs_48,pattern) 127 | for i in range(len(net_12_c_eva)): 128 | net_12_c_acc.append(net_12_c_eva[i]) 129 | net_24_c_acc.append(net_24_c_eva[i]) 130 | net_48_c_acc.append(net_48_c_eva[i]) 131 | except tf.errors.OutOfRangeError: 132 | print("End of validation dataset.") 133 | break 134 | 135 | print ('Validation Epoch {} Validation Accuracy: {}%,{}%,{}%'\ 136 | .format(epoch , sum(net_12_c_acc)/len(net_12_c_acc),\ 137 | sum(net_24_c_acc)/len(net_24_c_acc),\ 138 | sum(net_48_c_acc)/len(net_48_c_acc))) 139 | 140 | saver = tf.train.Saver() 141 | save_path = saver.save(sess, "models/48_cal_net_{}.ckpt".format(epoch)) 142 | print ("Model saved in file: ",save_path) 143 | 144 | if __name__ == "__main__": 145 | train_cal_net() -------------------------------------------------------------------------------- /train_detection_net.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection " 3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf 4 | Author : Dennis Liu 5 | Modify : 2017/11/10 6 | 7 | Description : The example of training detection nets . 8 | 9 | ''' 10 | import cv2 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | import model 15 | 16 | 17 | from data import DataSet 18 | from dataset.fddb_crawler import parse_data_info 19 | 20 | 21 | def train_det_net(): 22 | # get all training sample 23 | data_info = parse_data_info(only_positive = False) 24 | # data_info = [,[,]] 25 | 26 | 27 | # training configuration 28 | batch = 500 29 | size = (48,48,3) 30 | start_epoch = 0 31 | end_epoch = 1000 32 | train_validation_rate = 0.9 # training set / all sample 33 | 34 | # load the pretrained model , set None if you don't have 35 | pretrained = 'models/48_net_6.ckpt' 36 | 37 | # load data iterater 38 | dataset = DataSet(data_info,train_rate = train_validation_rate) 39 | _ , train_op , val_op , next_ele = dataset.get_iterator(batch,size) 40 | 41 | 42 | # load network 43 | # learning rate is great impact in training models 44 | net_12 = model.detect_12Net(lr = 0.001,size = (12,12,3)) 45 | net_24 = model.detect_24Net(lr = 0.001,size = (24,24,3)) 46 | net_48 = model.detect_48Net(lr = 0.001,size = (48,48,3)) 47 | 48 | sess = tf.InteractiveSession() 49 | saver = tf.train.Saver() 50 | 51 | if pretrained: 52 | saver.restore(sess , pretrained) 53 | else: 54 | sess.run(tf.global_variables_initializer()) 55 | 56 | 57 | 58 | for epoch in xrange(start_epoch,end_epoch): 59 | loss = 0 60 | iteration = 0 61 | sess.run(train_op) 62 | # get each element of the training dataset until the end is reached 63 | while True: 64 | try: 65 | # default of the size returned from data iterator is 48 66 | inputs,clss ,pattern = sess.run(next_ele) 67 | # , <0/1> , 68 | 69 | 70 | clss = clss.reshape(batch,2) 71 | pattern = pattern.reshape(batch,45) 72 | 73 | 74 | # resize image to fit each net 75 | inputs_12 = np.array([cv2.resize(img,(net_12.size[0],net_12.size[1])) for img in inputs]) 76 | inputs_24 = np.array([cv2.resize(img,(net_24.size[0],net_24.size[1])) for img in inputs]) 77 | inputs_48 = np.array([cv2.resize(img,(net_48.size[0],net_48.size[1])) for img in inputs]) 78 | 79 | # forward 12net 80 | net_12_fc = net_12.get_fc(inputs_12) 81 | 82 | # forward 24net 83 | net_24_fc = net_24.get_fc(inputs_24,net_12_fc) 84 | 85 | train_nets = [net_12,net_24,net_48] 86 | net_feed_dict = {net_12.inputs:inputs_12 , net_12.targets:clss,\ 87 | net_24.inputs:inputs_24 , net_24.targets:clss,net_24.from_12:net_12_fc,\ 88 | net_48.inputs:inputs_48 , net_48.targets:clss,net_48.from_24:net_24_fc} 89 | 90 | # training net 91 | sess.run([net.train_step for net in train_nets],\ 92 | feed_dict = net_feed_dict) 93 | # loss computation 94 | losses = sess.run([net.loss for net in train_nets],\ 95 | feed_dict = net_feed_dict) 96 | 97 | if iteration % 100 == 0: 98 | net_12_eva = net_12.evaluate(inputs_12,clss) 99 | net_12_acc = sum(net_12_eva)/len(net_12_eva) 100 | net_24_eva = net_24.evaluate(inputs_24,clss,net_12_fc) 101 | net_24_acc = sum(net_24_eva)/len(net_24_eva) 102 | net_48_eva = net_48.evaluate(inputs_48,clss,net_24_fc) 103 | net_48_acc = sum(net_48_eva)/len(net_48_eva) 104 | print ('Training Epoch {} --- Iter {} --- Training Accuracy: {}%,{}%,{}% --- Training Loss: {}'\ 105 | .format(epoch , iteration , net_12_acc , net_24_acc , net_48_acc , losses)) 106 | 107 | 108 | iteration += 1 109 | except tf.errors.OutOfRangeError: 110 | # print("End of training dataset.") 111 | break 112 | 113 | # get each element of the validation dataset until the end is reached 114 | sess.run(val_op) 115 | net_12_acc = [] 116 | net_24_acc = [] 117 | net_48_acc = [] 118 | while True: 119 | try: 120 | # the size returned from data iterator is 48 121 | inputs,clss ,pattern = sess.run(next_ele) 122 | clss = clss.reshape(batch,2) 123 | 124 | # resize image to fit each net 125 | inputs_12 = np.array([cv2.resize(img,(net_12.size[0],net_12.size[1])) for img in inputs]) 126 | inputs_24 = np.array([cv2.resize(img,(net_24.size[0],net_24.size[1])) for img in inputs]) 127 | inputs_48 = np.array([cv2.resize(img,(net_48.size[0],net_48.size[1])) for img in inputs]) 128 | 129 | # forward 12net 130 | net_12_fc = net_12.get_fc(inputs_12) 131 | 132 | # forward 24net 133 | net_24_fc = net_24.get_fc(inputs_24,net_12_fc) 134 | 135 | net_12_eva = net_12.evaluate(inputs_12,clss) 136 | net_24_eva = net_24.evaluate(inputs_24,clss,net_12_fc) 137 | net_48_eva = net_48.evaluate(inputs_48,clss,net_24_fc) 138 | for i in range(len(net_12_eva)): 139 | net_12_acc.append(net_12_eva[i]) 140 | net_24_acc.append(net_24_eva[i]) 141 | net_48_acc.append(net_48_eva[i]) 142 | except tf.errors.OutOfRangeError: 143 | # print("End of validation dataset.") 144 | break 145 | 146 | print ('Validation Epoch {} Validation Accuracy: {}%,{}%,{}%'\ 147 | .format(epoch , sum(net_12_acc)/len(net_12_acc),\ 148 | sum(net_24_acc)/len(net_24_acc),\ 149 | sum(net_48_acc)/len(net_48_acc))) 150 | 151 | saver = tf.train.Saver() 152 | save_path = saver.save(sess, "models/48_net_{}.ckpt".format(epoch)) 153 | print ("Model saved in file: ", save_path) 154 | 155 | if __name__ == "__main__": 156 | train_det_net() -------------------------------------------------------------------------------- /dataset/fddb_crawler.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This program use to parsing fddb dataset and generate a trining set of 12,24,48 net 3 | 4 | author dennisliu 5 | modify 2017/11/10 6 | 7 | please modify the out_path and function name of gen_pos_sample or gen_neg_sample. 8 | ''' 9 | import os 10 | import uuid 11 | import cv2 12 | import random 13 | import matplotlib.pyplot as plt 14 | import matplotlib.patches as patches 15 | 16 | 17 | fddb_path = '/home/share/data/FDDB/' 18 | label_files = [fddb_path+'FDDB-folds/' + txt for txt in \ 19 | ['FDDB-fold-01-ellipseList.txt', 20 | 'FDDB-fold-02-ellipseList.txt', 21 | 'FDDB-fold-03-ellipseList.txt', 22 | 'FDDB-fold-04-ellipseList.txt', 23 | 'FDDB-fold-05-ellipseList.txt', 24 | 'FDDB-fold-06-ellipseList.txt', 25 | 'FDDB-fold-07-ellipseList.txt', 26 | 'FDDB-fold-08-ellipseList.txt', 27 | 'FDDB-fold-09-ellipseList.txt', 28 | 'FDDB-fold-10-ellipseList.txt']] 29 | 30 | def parse_data_info(only_positive = False,limit_num = None,pos_neg_ratio = 0.5): 31 | data_info = [] 32 | pos_num = None 33 | neg_num = None 34 | import os 35 | pos_folders = '/home/share/data/FDDB/positive_sample' 36 | neg_folders = '/home/share/data/FDDB/negative_sample' 37 | 38 | if limit_num: 39 | pos_num = int(limit_num * pos_neg_ratio) 40 | neg_num = int(limit_num * (1-pos_neg_ratio)) 41 | poses = os.listdir(pos_folders)[:pos_num] 42 | negs = os.listdir(neg_folders)[:neg_num] 43 | else: 44 | poses = os.listdir(pos_folders) 45 | negs = os.listdir(neg_folders) 46 | 47 | for img in poses: 48 | img_path = os.path.join(pos_folders,img) 49 | labels = img.replace('.jpg','').split('_') 50 | clss = int(labels[1]) 51 | pattern = int(labels[2]) 52 | data_info.append([img_path,[clss,pattern]]) 53 | if not only_positive: 54 | for img in negs: 55 | img_path = os.path.join(neg_folders,img) 56 | labels = img.replace('.jpg','').split('_') 57 | clss = int(labels[1]) 58 | pattern = int(labels[2]) 59 | data_info.append([img_path,[clss,pattern]]) 60 | 61 | return data_info 62 | 63 | def fddb_loader(fddb_path): 64 | images = [] 65 | 66 | for txt in label_files: 67 | with open(txt) as f: 68 | content = f.readlines() 69 | content = [x.strip() for x in content] 70 | idx = 0 71 | faces = 0 72 | # convert txt to list 73 | while idx < len(content): 74 | if faces == 0: 75 | filename = fddb_path + content[idx] + '.jpg' 76 | faces = int(content[idx+1]) 77 | idx += 2 78 | else: 79 | bboxes = [] 80 | for i in range(faces): 81 | bboxes.append(content[idx+i].split()) 82 | idx += faces 83 | if os.path.exists(filename) : 84 | images.append([filename,faces,bboxes]) 85 | faces = 0 86 | return images 87 | 88 | def bbox_convert(images): 89 | ''' 90 | description : 91 | convert Elliptical regions to Rectangular regions 92 | input : 93 | imgaes : [[,,]] 94 | return : 95 | result : [,] 96 | bbox format : 97 | bboxes: [,,,] 98 | ''' 99 | 100 | result = [] 101 | for i,img in enumerate(images): 102 | image = cv2.imread(img[0]) 103 | 104 | # remove when image not avalible 105 | if image is None: continue 106 | 107 | H,W = image.shape[:2] 108 | bboxes = [] 109 | for bbox in img[2]: 110 | h = float(bbox[0]) 111 | x = float(bbox[3]) 112 | w = float(bbox[1]) 113 | y = float(bbox[4]) 114 | xmin = (x-w)/W 115 | ymin = (y-h)/H 116 | xmax = (x+w)/W 117 | ymax = (y+h)/H 118 | bboxes.append([xmin,ymin,xmax,ymax]) 119 | result.append([img[0],bboxes]) 120 | return result 121 | 122 | 123 | def show(image,bboxes = None): 124 | fig,ax = plt.subplots(1) 125 | 126 | img = cv2.imread(image) 127 | H,W = img.shape[:2] 128 | if bboxes: 129 | for bbox in bboxes: 130 | xmin,ymin,xmax,ymax = bbox[:] 131 | rect = patches.Rectangle((xmin*W,ymin*H),(xmax-xmin)*W,(ymax-ymin)*H,linewidth=1,fill=False) 132 | ax.add_patch(rect) 133 | ax.imshow(img) 134 | plt.show() 135 | 136 | def gen_pos_sample(images , out_path): 137 | cali_scale = [0.83, 0.91, 1.0, 1.10, 1.21] 138 | cali_off_x = [-0.17, 0., 0.17] 139 | cali_off_y = [-0.17, 0., 0.17] 140 | 141 | for image in images: 142 | im_path = image[0] 143 | bboxes = image[1] 144 | img = cv2.imread(im_path) 145 | H,W = img.shape[:2] 146 | for bbox in bboxes: 147 | facename = str(uuid.uuid4()) 148 | for si,s in enumerate(cali_scale): 149 | for xi,x_off in enumerate(cali_off_x): 150 | for yi,y_off in enumerate(cali_off_y): 151 | xmin , ymin , xmax , ymax = bbox[:] 152 | new_xmin = xmin - x_off*(xmax-xmin)/s 153 | new_ymin = ymin - y_off*(ymax-ymin)/s 154 | new_xmax = new_xmin+(xmax-xmin)/s 155 | new_ymax = new_ymin+(ymax-ymin)/s 156 | # crop 157 | face = img[int(new_ymin*H):int(new_ymax*H),int(new_xmin*W):int(new_xmax*W)] 158 | 159 | if all(i > 10 for i in face.shape[:2]) : 160 | # annot = '{},{},{}'.format(si,xi,yi) 161 | clss = xi * len(cali_off_y) + si * len(cali_off_y) * len(cali_off_x) + yi 162 | imgname = facename + '_1_' + str(clss) + '.jpg' 163 | cv2.imwrite(out_path+'/'+imgname,face) 164 | 165 | 166 | 167 | def gen_neg_sample(images , out_path): 168 | def check_in_bbox(poses , bboxes): 169 | ''' 170 | input : 171 | poses : [,] 172 | bboxes : [,,...] 173 | return : 174 | in_range : 175 | True : position of box is in the bboxes of faces 176 | ''' 177 | in_range = False 178 | for bbox in bboxes: 179 | for pos in poses : 180 | if pos[0] > bbox[0] and pos[0] < bbox[2] and pos[1] > bbox[1] and pos[1] < bbox[3] : 181 | in_range = True 182 | else: 183 | pass 184 | return in_range 185 | # the background sampleing times 186 | sample_times = 100 187 | cali_scale = [0.5, 0.75, 1.0, 1.25, 1.50] 188 | 189 | for image in images: 190 | im_path = image[0] 191 | bboxes = image[1] 192 | img = cv2.imread(im_path) 193 | H,W = img.shape[:2] 194 | 195 | 196 | 197 | for i in range(sample_times): 198 | # random position 199 | pos_xmin = random.uniform(0, 1) 200 | pos_ymin = random.uniform(0, 1) 201 | 202 | # set region with position and mean of x abd y 203 | mean_x = sum([bbox[2]-bbox[0] for bbox in bboxes])/len(bboxes) 204 | mean_y = sum([bbox[3]-bbox[1] for bbox in bboxes])/len(bboxes) 205 | for s in cali_scale: 206 | facename = str(uuid.uuid4()) 207 | pos_xmax = pos_xmin + mean_x/s 208 | pos_ymax = pos_ymin + mean_y/s 209 | 210 | if pos_xmax > 1 or pos_ymax > 1: 211 | continue 212 | 213 | poses = [[pos_xmin,pos_ymin],[pos_xmax,pos_ymax],[pos_xmin,pos_ymax],[pos_xmax,pos_ymin],[(pos_xmin+pos_xmax)/2,(pos_ymin+pos_ymax)/2]] 214 | # check if not region in range of bboxes 215 | if not check_in_bbox(poses,bboxes): 216 | # generate negative sample 217 | face = img[int(pos_ymin*H):int(pos_ymax*H),int(pos_xmin*W):int(pos_xmax*W)] 218 | imgname = facename + '_0_99.jpg' 219 | cv2.imwrite(out_path+'/'+imgname,face) 220 | # plt.imshow(face) 221 | # plt.show() 222 | else: 223 | continue 224 | 225 | 226 | 227 | 228 | 229 | def main(): 230 | out_path = fddb_path + 'positive_sample/' 231 | if not os.path.exists(out_path): 232 | os.makedirs(out_path) 233 | 234 | # load FDDB annotation file 235 | images = fddb_loader(fddb_path) 236 | print ("total processing image : {}".format(len(images))) 237 | # convert Elliptical regions to Rectangular regions 238 | images = bbox_convert(images) 239 | 240 | # positive sample generator 241 | gen_pos_sample(images , out_path) 242 | 243 | 244 | #image = images[0][0] 245 | #bboxes = images[0][1] 246 | # show images 247 | # show(image,bboxes) 248 | 249 | if __name__ == "__main__": 250 | main() -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection " 3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf 4 | Author : Dennis Liu 5 | Modify : 2017/11/04 6 | 7 | Description : The tensorflow structure of models in paper , 12net,24net,48net (include detection and calibration) 8 | 9 | ''' 10 | 11 | 12 | import tensorflow as tf 13 | import numpy as np 14 | 15 | 16 | 17 | def weight_variable(shape,name=None,lr_type = 'conv'): 18 | # weight initial problem is very importand during training 19 | # use tf.random_normal convergence slower then truncated 20 | if lr_type == 'conv': 21 | initial = tf.truncated_normal(shape, dtype="float32", stddev = 0.01) 22 | # initial = tf.random_normal(shape=shape, mean=0, stddev=0.001) 23 | else: 24 | x = np.sqrt(6. / (np.prod(np.array(shape[:-1])) + shape[-1])) 25 | initial = tf.random_uniform(shape, minval=-x,maxval=x) 26 | 27 | return tf.Variable(initial,name=name) 28 | 29 | def bias_variable(shape,name=None): 30 | initial = tf.constant(value=0.1, shape=shape) 31 | return tf.Variable(initial,name=name) 32 | 33 | def conv2d(x, W, stride, pad = 'SAME'): 34 | return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding=pad) 35 | 36 | def max_pool(x, kernelSz, stride, pad = 'SAME'): 37 | return tf.nn.max_pool(x, ksize=[1, kernelSz, kernelSz, 1], strides=[1, stride, stride, 1], padding=pad) 38 | 39 | #12-net 40 | class detect_12Net: 41 | def __init__(self,size = (48,48,3),lr = 0.001 , is_train = True): 42 | self.size = size 43 | 44 | # data,label 45 | self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]]) 46 | self.targets = tf.placeholder("float", [None,2]) 47 | 48 | with tf.variable_scope("12det_"): 49 | 50 | #conv layer 1 51 | self.w_conv1 = weight_variable([3,3,size[2],16],"w_conv1") 52 | self.b_conv1 = bias_variable([16],"b_conv1") 53 | self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1) 54 | 55 | 56 | #pooling layer 1 57 | self.pool1 = max_pool(self.conv1, 3, 2) 58 | 59 | 60 | #fully conv layer 1 61 | self.w_fc1 = weight_variable([int(size[0]/2 * size[1]/2 * 16), 16],'w_fc1',lr_type = 'fc') 62 | self.b_fc1 = bias_variable([16],'b_fc1') 63 | self.pool1_flat = tf.reshape(self.pool1, [-1, int(size[0]/2 * size[1]/2 *16)]) 64 | self.fc1 = tf.nn.relu(tf.matmul(self.pool1_flat, self.w_fc1) + self.b_fc1) 65 | 66 | 67 | 68 | #fully conv layer 2 69 | self.w_fc2 = weight_variable([16, 2],'w_fc2',lr_type = 'fc') 70 | self.b_fc2 = bias_variable([2],'b_fc2') 71 | self.fc2 = tf.matmul(self.fc1,self.w_fc2) + self.b_fc2 72 | if is_train: 73 | self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets)) 74 | self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) 75 | 76 | def get_fc(self,inputs_12): 77 | return self.fc1.eval(feed_dict = {self.inputs:inputs_12}) 78 | 79 | def evaluate(self,inputs_12,targets): 80 | predict = tf.argmax( self.fc2,1) 81 | label = tf.argmax(targets,1) 82 | eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_12, self.targets:targets}) 83 | return eva 84 | 85 | 86 | #24-net 87 | class detect_24Net: 88 | def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True): 89 | self.size = size 90 | 91 | # data,label 92 | self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]]) 93 | self.targets = tf.placeholder("float", [None,2]) 94 | # the fc1 from 12net 95 | self.from_12 = tf.placeholder("float",[None,16]) 96 | 97 | with tf.variable_scope("24det_"): 98 | #conv layer 1 99 | self.w_conv1 = weight_variable([3,3,size[2],64],"w_conv1") 100 | self.b_conv1 = bias_variable([64],"b_conv1") 101 | self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1) 102 | 103 | 104 | #pooling layer 1 105 | self.pool1 = max_pool(self.conv1, 3, 2) 106 | 107 | 108 | #fully conv layer 1 109 | self.w_fc1 = weight_variable([int(size[0]/2 * size[1]/2 * 64), 128],lr_type = 'fc') 110 | self.b_fc1 = bias_variable([128]) 111 | self.pool1_flat = tf.reshape(self.pool1, [-1, int(size[0]/2 * size[1]/2 *64)]) 112 | self.fc1 = tf.nn.relu(tf.matmul(self.pool1_flat, self.w_fc1) + self.b_fc1) 113 | 114 | 115 | #concat 116 | self.concat1 = tf.concat([self.fc1,self.from_12],1) 117 | 118 | 119 | #fully conv layer 2 120 | self.w_fc2 = weight_variable([128+16, 2],lr_type = 'fc') 121 | self.b_fc2 = bias_variable([2]) 122 | self.fc2 = tf.matmul(self.concat1,self.w_fc2) + self.b_fc2 123 | 124 | if is_train: 125 | self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets)) 126 | self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) 127 | 128 | def get_fc(self,inputs_24,net12_fc): 129 | return self.concat1.eval(feed_dict = {self.inputs:inputs_24,self.from_12:net12_fc}) 130 | def evaluate(self,inputs_24,targets,net_12_fc): 131 | predict = tf.argmax( self.fc2,1) 132 | label = tf.argmax(targets,1) 133 | eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_24, self.targets:targets,self.from_12:net_12_fc}) 134 | return eva 135 | 136 | 137 | #48-net 138 | class detect_48Net: 139 | def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True): 140 | self.size = size 141 | 142 | # data,label 143 | self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]]) 144 | self.targets = tf.placeholder("float", [None,2]) 145 | # the concat1 from 24net 146 | self.from_24 = tf.placeholder("float",[None,16+128]) 147 | 148 | with tf.variable_scope("48det_"): 149 | #conv layer 1 150 | self.w_conv1 = weight_variable([5,5,size[2],64],"w_conv1") 151 | self.b_conv1 = bias_variable([64],"b_conv1") 152 | self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1) 153 | 154 | 155 | #pooling layer 1 156 | self.pool1 = max_pool(self.conv1, 3, 2) 157 | 158 | 159 | #conv layer 2 160 | self.w_conv2 = weight_variable([5,5,64,64],"w_conv2") 161 | self.b_conv2 = bias_variable([64],"b_conv2") 162 | self.conv2 = tf.nn.relu(conv2d(self.pool1, self.w_conv2, 1) + self.b_conv2) 163 | 164 | 165 | #pooling layer 2 166 | self.pool2 = max_pool(self.conv2, 3, 2) 167 | 168 | 169 | 170 | #fully conv layer 1 171 | self.w_fc1 = weight_variable([int(size[0]/4 * size[1]/4 * 64), 256],lr_type = 'fc') 172 | self.b_fc1 = bias_variable([256]) 173 | self.pool2_flat = tf.reshape(self.pool2, [-1, int(size[0]/4 * size[1]/4 *64)]) 174 | self.fc1 = tf.nn.relu(tf.matmul(self.pool2_flat, self.w_fc1) + self.b_fc1) 175 | 176 | 177 | #concat 178 | 179 | self.concat1 = tf.concat([self.fc1,self.from_24],1) 180 | 181 | 182 | #fully conv layer 2 183 | self.w_fc2 = weight_variable([256+128+16, 2],lr_type = 'fc') 184 | self.b_fc2 = bias_variable([2]) 185 | self.fc2 = tf.matmul(self.concat1,self.w_fc2) + self.b_fc2 186 | if is_train: 187 | self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets)) 188 | self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) 189 | def evaluate(self,inputs_48,targets,net_24_fc): 190 | predict = tf.argmax( self.fc2,1) 191 | label = tf.argmax(targets,1) 192 | eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_48, self.targets:targets,self.from_24:net_24_fc}) 193 | return eva 194 | 195 | 196 | 197 | class calib_12Net: 198 | def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True): 199 | self.size = size 200 | 201 | # data,label 202 | self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]]) 203 | self.targets = tf.placeholder("float", [None,45]) 204 | 205 | #12-net 206 | with tf.variable_scope("12calib_"): 207 | #conv layer 1 208 | self.w_conv1 = weight_variable([3,3,size[2],16],"w1") 209 | self.b_conv1 = bias_variable([16],"b1") 210 | self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1) 211 | 212 | #pooling layer 1 213 | self.pool1 = max_pool(self.conv1, 3, 2) 214 | 215 | #fc layer 1 216 | self.w_fc1 = weight_variable([int(size[0]//2 * size[1]//2 * 16), 128],"w2") 217 | self.b_fc1 = bias_variable([128],"b2") 218 | self.pool1_reshaped = tf.reshape(self.pool1, [-1, int(size[0]//2 * size[1]//2 * 16)]) 219 | self.fc1 = tf.nn.relu(tf.matmul(self.pool1_reshaped, self.w_fc1) + self.b_fc1) 220 | 221 | #fc layer2 222 | self.w_fc2 = weight_variable([128, 45],"w3") 223 | self.b_fc2 = bias_variable([45],"b3") 224 | self.fc2 = tf.matmul(self.fc1, self.w_fc2) + self.b_fc2 225 | if is_train: 226 | self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets)) 227 | self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) 228 | def evaluate(self,inputs_12,targets): 229 | predict = tf.argmax( self.fc2,1) 230 | label = tf.argmax(targets,1) 231 | eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_12, self.targets:targets}) 232 | return eva 233 | 234 | class calib_24Net: 235 | def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True): 236 | self.size = size 237 | 238 | # data,label 239 | self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]]) 240 | self.targets = tf.placeholder("float", [None,45]) 241 | 242 | 243 | #24-net 244 | with tf.variable_scope("24calib_"): 245 | #conv layer 1 246 | self.w_conv1 = weight_variable([5,5,size[2],32],"w1") 247 | self.b_conv1 = bias_variable([32],"b1") 248 | self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1) 249 | 250 | #pooling layer 1 251 | self.pool1 = max_pool(self.conv1, 3, 2) 252 | 253 | #fc layer 1 254 | self.w_fc1 = weight_variable([int(size[0]//2 * size[1]//2 * 32), 64],"w2") 255 | self.b_fc1 = bias_variable([64],"b2") 256 | self.pool1_reshaped = tf.reshape(self.pool1, [-1, int(size[0]//2 * size[1]//2 * 32)]) 257 | self.fc1 = tf.nn.relu(tf.matmul(self.pool1_reshaped, self.w_fc1) + self.b_fc1) 258 | 259 | #fc layer2 260 | self.w_fc2 = weight_variable([64, 45],"w4") 261 | self.b_fc2 = bias_variable([45],"b4") 262 | self.fc2 = tf.matmul(self.fc1, self.w_fc2) + self.b_fc2 263 | 264 | if is_train: 265 | self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets)) 266 | self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) 267 | def evaluate(self,inputs_24,targets): 268 | 269 | predict = tf.argmax(self.fc2,1) 270 | label = tf.argmax(targets,1) 271 | eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_24, self.targets:targets}) 272 | 273 | return eva 274 | 275 | class calib_48Net: 276 | 277 | def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True): 278 | self.size = size 279 | 280 | # data,label 281 | self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]]) 282 | self.targets = tf.placeholder("float", [None,45]) 283 | 284 | #24-net 285 | with tf.variable_scope("48calib_"): 286 | #conv layer 1 287 | self.w_conv1 = weight_variable([5,5,size[2],64],"w1") 288 | self.b_conv1 = bias_variable([64],"b1") 289 | self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1) 290 | 291 | #pooling layer 1 292 | self.pool1 = max_pool(self.conv1, 3, 2) 293 | 294 | #conv layer 2 295 | self.w_conv2 = weight_variable([5,5,64,64],"w2") 296 | self.b_conv2 = bias_variable([64],"b2") 297 | self.conv2 = tf.nn.relu(conv2d(self.pool1, self.w_conv2, 1) + self.b_conv2) 298 | 299 | #fc layer 1 300 | self.w_fc1 = weight_variable([int(size[0]//2 * size[1]//2 * 64), 256],"w3") 301 | self.b_fc1 = bias_variable([256],"b3") 302 | self.conv2_reshaped = tf.reshape(self.conv2, [-1, int(size[0]//2 * size[1]//2 * 64)]) 303 | self.fc1 = tf.nn.relu(tf.matmul(self.conv2_reshaped, self.w_fc1) + self.b_fc1) 304 | 305 | #fc layer2 306 | self.w_fc2 = weight_variable([256, 45],"w4") 307 | self.b_fc2 = bias_variable([45],"b4") 308 | self.fc2 = tf.matmul(self.fc1, self.w_fc2) + self.b_fc2 309 | if is_train: 310 | self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets)) 311 | self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss) 312 | 313 | def evaluate(self,inputs_48,targets): 314 | predict = tf.argmax(self.fc2,1) 315 | label = tf.argmax(targets,1) 316 | eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_48, self.targets:targets}) 317 | return eva 318 | -------------------------------------------------------------------------------- /detection.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection " 3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf 4 | Author : Dennis Liu 5 | Modify : 2017/11/10 6 | 7 | Description : Three important class use to detect img : 8 | Classifier : create the detection net 12,24,48 , and provide each net predict function 9 | Aligner : create the calibration net 12,24,48 , and provide each net predict function 10 | Detector : include non max suppression ,image pyramids , interface to use Classifer & Aliner 11 | 12 | ''' 13 | 14 | 15 | import cv2 16 | import numpy as np 17 | import tensorflow as tf 18 | 19 | import model 20 | 21 | 22 | class Classifier: 23 | def __init__(self,model_path,sizes = [12,24,48]): 24 | self.sizes = sizes 25 | # load network 26 | self.net_12 = model.detect_12Net(is_train = False,size = (sizes[0],sizes[0],3)) 27 | self.net_24 = model.detect_24Net(is_train = False,size = (sizes[1],sizes[1],3)) 28 | self.net_48 = model.detect_48Net(is_train = False) 29 | # create session 30 | self.sess = tf.Session() 31 | self.restore(model_path) 32 | def restore(self,model_path): 33 | all_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='12det_')+\ 34 | tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='24det_')+\ 35 | tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='48det_') 36 | # Add ops to save and restore all the variables. 37 | saver = tf.train.Saver(all_var) 38 | # Restore model from disk. 39 | saver.restore(self.sess, model_path) 40 | def net_12_predict(self,data,threshold = 0.5): 41 | # resize data to fit the size of net work 42 | input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data]) 43 | 44 | # the class of prediction 45 | max_idx = tf.to_float(tf.argmax( self.net_12.fc2,1)) 46 | # the confidence of predicted class 47 | max_value = tf.reduce_max(tf.nn.softmax(self.net_12.fc2), axis=1) 48 | # combine the result 49 | predict = tf.stack([max_idx,max_value],1) 50 | 51 | # forward 52 | result = self.sess.run(predict,feed_dict = {self.net_12.inputs : input_12}) 53 | return result 54 | 55 | def net_24_predict(self,data,threshold = 0.5): 56 | # resize data to fit the size of net work 57 | input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data]) 58 | input_24 = np.array([cv2.resize(img,(self.sizes[1],self.sizes[1]))for img in data]) 59 | 60 | # get previous net output 61 | net_12_fc = self.sess.run(self.net_12.fc1,feed_dict = {self.net_12.inputs :input_12}) 62 | 63 | max_idx = tf.to_float(tf.argmax( self.net_24.fc2,1)) 64 | max_value = tf.reduce_max(tf.nn.softmax(self.net_24.fc2), axis=1) 65 | predict = tf.stack([max_idx,max_value],1) 66 | 67 | result = self.sess.run(predict,feed_dict = {self.net_24.inputs : input_24, self.net_24.from_12 : net_12_fc}) 68 | return result 69 | def net_48_predict(self,data,threshold = 0.5): 70 | # resize data to fit the size of net work 71 | input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data]) 72 | input_24 = np.array([cv2.resize(img,(self.sizes[1],self.sizes[1]))for img in data]) 73 | input_48 = np.array([cv2.resize(img,(self.sizes[2],self.sizes[2]))for img in data]) 74 | 75 | # get previous net output 76 | net_12_fc = self.sess.run(self.net_12.fc1,feed_dict = {self.net_12.inputs :input_12}) 77 | net_24_fc = self.sess.run(self.net_24.concat1,feed_dict = {self.net_24.inputs : input_24, self.net_24.from_12 : net_12_fc}) 78 | 79 | # the class of prediction 80 | max_idx = tf.to_float(tf.argmax( self.net_48.fc2,1)) 81 | # the confidence of predicted class 82 | max_value = tf.reduce_max(tf.nn.softmax(self.net_48.fc2), axis=1) 83 | # combine the result 84 | predict = tf.stack([max_idx,max_value],1) 85 | 86 | result = self.sess.run(predict,feed_dict = {self.net_48.inputs : input_48, self.net_48.from_24 : net_24_fc}) 87 | return result 88 | 89 | class Aligner: 90 | def __init__(self,model_path,sizes = [12,24,48]): 91 | self.sizes = sizes 92 | # load network 93 | self.net_12 = model.calib_12Net(is_train = False,size = (sizes[0],sizes[0],3)) 94 | self.net_24 = model.calib_24Net(is_train = False,size = (sizes[1],sizes[1],3)) 95 | self.net_48 = model.calib_48Net(is_train = False,size = (sizes[2],sizes[2],3)) 96 | # create session 97 | self.sess = tf.Session() 98 | self.restore(model_path) 99 | def restore(self,model_path): 100 | all_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='12calib_')+\ 101 | tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='24calib_')+\ 102 | tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='48calib_') 103 | # Add ops to save and restore all the variables. 104 | saver = tf.train.Saver(all_var) 105 | # Restore model from disk. 106 | saver.restore(self.sess, model_path) 107 | 108 | def net_12_predict(self,data): 109 | # resize data to fit the size of net work 110 | input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data]) 111 | 112 | predict = tf.argmax( self.net_12.fc2,1) 113 | result = self.sess.run(predict,feed_dict = {self.net_12.inputs : input_12}) 114 | return result 115 | def net_24_predict(self,data): 116 | # resize data to fit the size of net work 117 | input_24 = np.array([cv2.resize(img,(self.sizes[1],self.sizes[1]))for img in data]) 118 | 119 | 120 | predict = tf.argmax( self.net_24.fc2,1) 121 | result = self.sess.run(predict,feed_dict = {self.net_24.inputs : input_24}) 122 | return result 123 | def net_48_predict(self,data): 124 | # resize data to fit the size of net work 125 | input_48 = np.array([cv2.resize(img,(self.sizes[2],self.sizes[2]))for img in data]) 126 | 127 | predict = tf.argmax( self.net_48.fc2,1) 128 | result = self.sess.run(predict,feed_dict = {self.net_48.inputs : input_48}) 129 | return result 130 | 131 | class Detector: 132 | def __init__(self,det_path,cal_path,pyramid_t = 3,win_size = (48,48),win_stride = 10): 133 | # configuration of detection windows 134 | self.pyramid_t = pyramid_t 135 | self.win_size = win_size 136 | self.win_stride = win_stride 137 | # config of network 138 | self.batch = 1000 139 | 140 | # load the models 141 | self.classifier = Classifier(det_path) 142 | self.aligner = Aligner(cal_path) 143 | 144 | self.result = [] 145 | def detect(self,img): 146 | ''' 147 | step 1. do pyramid and detect in sliding windows 148 | step 2. consturct the cascade structure 149 | ''' 150 | # net 12 classifier 151 | 152 | # net 24 classifier 153 | 154 | # net 48 classifier 155 | 156 | return 157 | def predict(self,img,bboxes,net = None,threshold = 0.9): 158 | batch = self.batch 159 | win_buff = [] 160 | idx_buff = [] 161 | h,w = img.shape[:2] 162 | 163 | # mapping the bboxes to generate batch 164 | for idx,bbox in enumerate(bboxes): 165 | xmin,ymin,xmax,ymax,prop = bbox[:] 166 | if prop == 0.0 : continue 167 | win = img[int(ymin*h):int(ymax*h),int(xmin*w):int(xmax*w)] 168 | if win is None or win.shape[0] < 1 or win.shape[1] < 1 : continue 169 | win = cv2.cvtColor(cv2.resize(win,(48,48)),cv2.COLOR_BGR2RGB) 170 | win_buff.append(win) 171 | idx_buff.append(idx) 172 | 173 | if len(win_buff)>=batch: 174 | bboxes = self.net_forward(win_buff,idx_buff,bboxes,net,threshold) 175 | win_buff = [] 176 | idx_buff = [] 177 | if len(win_buff) > 0: 178 | bboxes = self.net_forward(win_buff,idx_buff,bboxes,net,threshold) 179 | return bboxes 180 | def net_forward(self,win_buff,idx_buff,bboxes,net , threshold): 181 | # forward the detection net 182 | if not 'cal' in net : 183 | if net == 'net12': 184 | res = self.classifier.net_12_predict(win_buff,threshold) 185 | elif net == 'net24': 186 | res = self.classifier.net_24_predict(win_buff,threshold) 187 | elif net == 'net48': 188 | res = self.classifier.net_48_predict(win_buff,threshold) 189 | else: 190 | return None 191 | for i,idx in enumerate(idx_buff): 192 | is_face,prop = res[i] 193 | if is_face == 1.0: 194 | bboxes[idx][4] = prop 195 | else: 196 | bboxes[idx][4] = 0.0 197 | return bboxes 198 | else: 199 | # forward the calibration net 200 | if net == 'net12_cal': 201 | res = self.aligner.net_12_predict(win_buff) 202 | elif net == 'net24_cal': 203 | res = self.aligner.net_24_predict(win_buff) 204 | elif net == 'net48_cal': 205 | res = self.aligner.net_48_predict(win_buff) 206 | else: 207 | return None 208 | cali_scale = [1.20, 1.09, 1.0, 0.9, 0.82] 209 | cali_off_x = [0.17, 0., -0.17] 210 | cali_off_y = [0.17, 0., -0.17] 211 | 212 | for i,idx in enumerate(idx_buff): 213 | clss = res[i] 214 | h,w = win_buff[i].shape[:2] 215 | xmin,ymin,xmax,ymax = bboxes[idx][:4] 216 | xmin,ymin,xmax,ymax = xmin*w,ymin*h,xmax*w,ymax*h 217 | s = cali_scale[int(clss/(len(cali_off_x)*len(cali_off_y)))] 218 | x_off = cali_off_x[int(s/len(cali_off_y))] 219 | y_off = cali_off_y[int(s%len(cali_off_y))] 220 | new_xmin = xmin - x_off*(xmax-xmin)/s 221 | new_ymin = ymin - y_off*(ymax-ymin)/s 222 | new_xmax = new_xmin+(xmax-xmin)/s 223 | new_ymax = new_ymin+(ymax-ymin)/s 224 | bboxes[idx][:4] = new_xmin/w,new_ymin/h,new_xmax/w,new_ymax/h 225 | return bboxes 226 | 227 | def non_max_sup(self,bboxes,iou_thresh = 0.5): 228 | def overlap(box1,box2): 229 | 230 | # determine the coordinates of the intersection rectangle 231 | in_xmin = max([box1[0],box2[0]]) 232 | in_ymin = max([box1[1],box2[1]]) 233 | in_xmax = min([box1[2],box2[2]]) 234 | in_ymax = min([box1[3],box2[3]]) 235 | 236 | if in_xmax < in_xmin or in_ymax < in_ymin: 237 | return 0.0 , 0.0 , 0.0 238 | 239 | # compute the intersection area 240 | intersection_area = (in_xmax - in_xmin) * (in_ymax - in_ymin) 241 | 242 | 243 | # compute the area of both bboxes 244 | box1_area = (box1[2]-box1[0])*(box1[3]-box1[1]) 245 | box2_area = (box2[2]-box2[0])*(box2[3]-box2[1]) 246 | 247 | iou = intersection_area / float(box1_area + box2_area - intersection_area) 248 | 249 | box1_iou = intersection_area / float(box1_area) 250 | box2_iou = intersection_area / float(box2_area) 251 | 252 | return iou,box1_iou,box2_iou 253 | 254 | # bboxes = [,,,,] 255 | for i,bbox1 in enumerate(bboxes) : 256 | if bbox1[4] < 0.0001 : continue 257 | for j,bbox2 in enumerate(bboxes): 258 | if bbox2[4] < 0.0001 : continue 259 | if i==j : continue 260 | iou,box1_iou,box2_iou = overlap(bbox1,bbox2) 261 | bbox1_prop = bbox1[4] 262 | bbox2_prop = bbox2[4] 263 | # # inner box threshold 264 | # if box1_iou > 0.9: 265 | # bbox1[4] = 0.0 266 | # elif box2_iou > 0.9: 267 | # bbox2[4] = 0.0 268 | if iou >= iou_thresh: 269 | if bbox1_prop <= bbox2_prop: 270 | bbox1[4] = 0.0 271 | elif bbox1_prop > bbox2_prop: 272 | bbox2[4] = 0.0 273 | return bboxes 274 | 275 | def img_pyramids(self,img): 276 | # init the return list 277 | # bbox = [,,,,] 278 | bboxes = [] 279 | # slide a window across the image 280 | def sliding_window(image, stepSize, windowSize): 281 | for y in range(0, image.shape[0], stepSize): 282 | for x in range(0, image.shape[1], stepSize): 283 | # yield the current window 284 | yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]]) 285 | 286 | # set sliding windows config 287 | win_stride = self.win_stride 288 | win_size = self.win_size 289 | 290 | # generate Gaussian pyramid for img 291 | imgPyramids = [img.copy()] 292 | for i in range(1, self.pyramid_t): 293 | imgPyramids.append(cv2.pyrDown(imgPyramids[i - 1])) 294 | # sliding all image from pyramids 295 | for i in range(self.pyramid_t): 296 | p_img = imgPyramids[i] 297 | p_h,p_w = p_img.shape[:2] 298 | for (x, y, window) in sliding_window(p_img, stepSize=win_stride, windowSize=win_size): 299 | # if the window does not meet our desired window size, ignore it 300 | if window.shape[0] != win_size[0] or window.shape[1] != win_size[1]: 301 | continue 302 | 303 | x,y = float(x),float(y) 304 | bbox = [x/p_w,y/p_h,(x+win_size[0])/p_w,(y+win_size[1])/p_h,-0.1] 305 | bboxes.append(bbox) 306 | return bboxes 307 | 308 | 309 | 310 | 311 | def test_detect(): 312 | det_mod_path = 'models/48_net_223.ckpt' 313 | cal_mod_path = 'models/48_cal_net_100.ckpt' 314 | detector = Detector(det_mod_path,cal_mod_path) 315 | 316 | img_path = '/home/share/data/FDDB/2002/07/25/big/img_362.jpg' 317 | img_path = '/home/share/data/FDDB/2002/07/25/big/img_1026.jpg' 318 | img = cv2.imread(img_path) 319 | h , w = img.shape[:2] 320 | 321 | bboxes = detector.detect(img) 322 | import matplotlib.pyplot as plt 323 | 324 | for b in bboxes: 325 | xmin,ymin,xmax,ymax,prop = b[:] 326 | if prop > 0.5: 327 | cv2.rectangle(img, (int(xmin*w), int(ymin*h)), (int(xmax*w), int(ymax*h)), (255, 0, 0), 2) 328 | plt.imshow(img) 329 | plt.show() 330 | 331 | def test_predict(): 332 | def read_img(img_path,size = (48,48,3)): 333 | img_file = tf.read_file(img_path) 334 | img_decoded = tf.image.decode_jpeg(img_file, channels=size[2]) 335 | resized_image = tf.image.resize_images(img_decoded, [size[0], size[1]]) 336 | return resized_image 337 | 338 | det_mod_path = 'models/48_cal_net_100.ckpt' 339 | detector = Aligner(det_mod_path) 340 | 341 | # list of nagative samples paths 342 | neg_samples = [] 343 | # list of positive samples paths 344 | pos_samples = [] 345 | 346 | 347 | img_paths = neg_samples + pos_samples 348 | 349 | 350 | batch = tf.stack([read_img(p,(48,48,3)) for p in img_paths],0) 351 | data1 = detector.sess.run(batch) 352 | 353 | data = np.array([cv2.cvtColor(cv2.resize(cv2.imread(p),(48,48), interpolation = cv2.INTER_AREA ),cv2.COLOR_BGR2RGB) for p in img_paths]).astype(np.float32) 354 | 355 | print (detector.net_12_predict(data)) 356 | print (detector.net_12_predict(data)) 357 | 358 | if __name__ == "__main__": 359 | # test_predict() 360 | test_detect() 361 | --------------------------------------------------------------------------------