├── dataset
    ├── __init__.py
    ├── __init__.pyc
    ├── fddb_crawler.pyc
    └── fddb_crawler.py
├── data
    ├── img_1.jpg
    └── results
    │   └── img_1_result.jpg
├── LICENSE
├── example
    └── demo_sliding_windows.py
├── README.md
├── data.py
├── train_calibration_net.py
├── train_detection_net.py
├── model.py
└── detection.py


/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/img_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/data/img_1.jpg


--------------------------------------------------------------------------------
/dataset/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/dataset/__init__.pyc


--------------------------------------------------------------------------------
/dataset/fddb_crawler.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/dataset/fddb_crawler.pyc


--------------------------------------------------------------------------------
/data/results/img_1_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liumusicforever/CNN_Face_Detection/HEAD/data/results/img_1_result.jpg


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 DennisLiu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/example/demo_sliding_windows.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import cv2
 3 | 
 4 | def sliding_window(image, stepSize, windowSize):
 5 | 	# slide a window across the image
 6 | 	for y in xrange(0, image.shape[0], stepSize):
 7 | 		for x in xrange(0, image.shape[1], stepSize):
 8 | 			# yield the current window
 9 | 			yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
10 | 
11 | 
12 | img_path = '/home/share/data/FDDB/2002/07/19/big/img_130.jpg'
13 | img = cv2.imread(img_path)
14 | 
15 | 
16 | pyramid_t = 3
17 | win_size = (48,48)
18 | win_stride =  10
19 | 
20 | 
21 | # Generate Gaussian pyramid for img
22 | imgPyramids = [img.copy()]
23 | for i in range(1, pyramid_t):
24 |     imgPyramids.append(cv2.pyrDown(imgPyramids[i - 1]))
25 | for i in range(pyramid_t):
26 |     image = imgPyramids[i]
27 |     for (x, y, window) in sliding_window(image, stepSize=win_stride, windowSize=win_size):
28 | 		# if the window does not meet our desired window size, ignore it
29 |         if window.shape[0] != win_size[0] or window.shape[1] != win_size[1]:
30 |             continue
31 | 
32 |         clone = image.copy()
33 |         cv2.rectangle(clone, (x, y), (x + win_size[0], y + win_size[1]), (255, 0, 0), 2)
34 |         face = image[y : y+win_size[1] , x : x+win_size[0]]
35 |         plt.imshow(clone)
36 |         plt.show(block = False)
37 |         plt.pause(0.1)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CNN_Face_Detection
 2 | Repository for "A Convolutional Neural Network Cascade for Face Detection", implemented with Python interface.
 3 | 
 4 | ## About
 5 | This repo implemented the [paper](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf) in python/tensorflow , providing the interface to contruct the cascade structure , including function of **detection networks** , **calibration networks** , **image pyramids** and **non maximum supression** .
 6 | 
 7 | ## Requirement
 8 | * Thensorflow : [Tensorflow installation guild](https://www.tensorflow.org/install/)
 9 | * Opencv : [OpenCV installation guild](https://pypi.python.org/pypi/opencv-python)
10 | 
11 | ## Useful Operations
12 | * Import Detector
13 | ```python
14 | # make sure detection.py is in program folder
15 | from  detection import Detector
16 | ```
17 | * Restore the pretrained model , [download_link](https://drive.google.com/file/d/170IlbvlBxrrtML_j3rmLFMDNIgX44Rgw/view?usp=sharing)
18 | 
19 | ```python
20 | # given paths from both models to Detector , and It will load the model on 
21 | # your memory (or gpu memory).
22 | det_mod_path = 'models/det_net_<epoch num>.ckpt'
23 | cal_mod_path = 'models/cal_net_<epoch num>.ckpt'
24 | detector = Detector(det_mod_path,cal_mod_path)
25 | ```
26 | 
27 | * Processing image pyramids
28 | ```python
29 | # bboxes is all bounding boxes of sliding windows , It’s include position
30 | # and probability of face (default is -0.1)
31 | # bboxes = [<xmin> , <ymin> , <xmax> , <ymax> , <probability>]
32 | bboxes = detector.img_pyramids(image)
33 | ```
34 | * Non Maximum Suppression
35 | ```python
36 | # iou_thresh is the overlapping threshold of iou in non maximum suppression 
37 | # In returning bboxes , function will set the box’s probability = 0.0 which have 
38 | # been filtered.
39 | bboxes = detector.non_max_sup(bboxes,iou_thresh = 0.5)
40 | ```
41 | 
42 | * Predict the bounding boxes on detection/calibration net
43 | ```python
44 | # predict function will predict all the bounding boxes which probability is not 
45 | # zero , function will set the box’s probability from prediction and return the 
46 | # final bboxes .
47 | # flags of net :  ‘net12’ , ‘net24’ , ‘net48’ , ‘net12_cal’ , ‘net24_cal’ , ‘net48_cal’ .
48 | # threshold : the threshold of preditction.
49 | bboxes = detector.predict(img,bboxes,net = 'net12',threshold = 0.9)
50 | ```
51 | 
52 | ### Results
53 | ![image](https://github.com/liumusicforever/CNN_Face_Detection/blob/master/data/results/img_1_result.jpg)
54 | 
55 | 
56 | ## Implementation Issue
57 | ### 12-net and 24-net is too small ?
58 | When I was training models , **finding size of 12-net and 24-net was so hard to convergence** , maybe the size of network is too small to learn pattern , so I change network size of net12 and net24 to 48*48 finally. But still confuse about it !
59 | ### Necessary for calibration network ?
60 | The accuracy of the calibration almost only 0.8 , **It result the calibration of bounding box after network may making mistake , and bounding box will be removed in next stage** , so sometimes I have better result without calibration net.
61 | 
62 | 
63 | ## License
64 | 
65 | MIT LICENSE
66 | 
67 | ## Reference
68 | 
69 | Haoxiang Li, Zhe Lin, Xiaohui Shen, Jonathan Brandt, Gang Hua ; A Convolutional Neural Network Cascade for Face Detection ; The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2015, pp. 5325-5334
70 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from tensorflow.contrib.data import Dataset, Iterator
  5 | 
  6 | 
  7 | class DataSet:
  8 |     def __init__(self, data_path_list , train_rate = 0.9):
  9 |         self.data_path_list = data_path_list
 10 |         self.train_rate = train_rate
 11 |     def get_dataset(self,batch,size = (48,48,3)):
 12 |         self.size = size
 13 |         dataset = self.data_path_list
 14 |         
 15 |         from random import shuffle
 16 |         shuffle(dataset)
 17 | 
 18 |         
 19 |         train_set = dataset[0:int(len(dataset)*self.train_rate)]
 20 |         val_set = dataset[int(len(dataset)*self.train_rate):]
 21 | 
 22 |         # pading last batch
 23 |         if len(train_set) % batch != 0 :
 24 |             for i in range(batch - (len(train_set) % batch)):
 25 |                 train_set.append(train_set[0])
 26 |         if len(val_set) % batch != 0 :
 27 |             for i in range(batch - (len(val_set) % batch)):
 28 |                 val_set.append(val_set[0])
 29 | 
 30 | 
 31 |         train_imgs = tf.constant( [data[0] for data in train_set])
 32 |         train_labels = tf.constant([data[1] for data in train_set])
 33 | 
 34 |         val_imgs = tf.constant([data[0] for data in val_set])
 35 |         val_labels = tf.constant([data[1] for data in val_set])
 36 | 
 37 |         # create TensorFlow Dataset objects
 38 |         tr_data = Dataset.from_tensor_slices((train_imgs, train_labels))
 39 |         val_data = Dataset.from_tensor_slices((val_imgs, val_labels))
 40 | 
 41 |         tr_data = tr_data.map(self.data_loader)
 42 |         val_data = val_data.map(self.data_loader)
 43 |         
 44 |         return tr_data,val_data
 45 | 
 46 |     def get_iterator(self,batch = 3,size = (12,12,3)):
 47 |         tr_data , val_data = self.get_dataset(batch,size)
 48 | 
 49 |         tr_data = tr_data.batch(batch)
 50 |         val_data = val_data.batch(batch)
 51 | 
 52 |         # create TensorFlow Iterator object
 53 |         iterator = Iterator.from_structure(tr_data.output_types,
 54 |                                         tr_data.output_shapes)
 55 | 
 56 |         # create two initialization ops to switch between the datasets
 57 |         training_init_op = iterator.make_initializer(tr_data)
 58 |         validation_init_op = iterator.make_initializer(val_data)
 59 | 
 60 |         next_element = iterator.get_next()        
 61 |         return iterator , training_init_op , validation_init_op , next_element
 62 | 
 63 |     def data_loader(self , img_path, label):
 64 |         # label format : [<cls-id>,<pattern-id>]
 65 |         
 66 | 
 67 |         # read the img from file
 68 |         img_file = tf.read_file(img_path)
 69 |         img_decoded = tf.image.decode_jpeg(img_file, channels=self.size[2])
 70 |         resized_image = tf.image.resize_images(img_decoded, [self.size[0], self.size[1]])
 71 |         
 72 |         
 73 |         classes_num = 2
 74 |         clss = tf.one_hot(label[0], classes_num)
 75 | 
 76 |         # convert the label to one-hot encoding
 77 |         pattern_classes = 45
 78 |         pattern = tf.one_hot(label[1], pattern_classes)
 79 |         
 80 |         return resized_image, clss , pattern
 81 | 
 82 | 
 83 | def test_dataset():
 84 |     
 85 |     dataset = DataSet([['data/img_1.jpg',[0,1]],
 86 |                        ['data/img_1.jpg',[1,5]],
 87 |                        ['data/img_1.jpg',[0,10]],
 88 |                        ])
 89 |     _ , train_op , val_op , next_ele = dataset.get_iterator(batch = 1)
 90 |     sess = tf.InteractiveSession()
 91 |     sess.run(train_op)
 92 |     while True:
 93 |         try:
 94 |             inputs , targets, patterns = sess.run(next_ele)
 95 |             # print 'inputs',inputs
 96 |             
 97 |             print 'targets',targets
 98 |             print 'patterns',patterns
 99 |         except tf.errors.OutOfRangeError:
100 |             print("End of training dataset.")
101 |             break
102 |         
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     test_dataset()


--------------------------------------------------------------------------------
/train_calibration_net.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection "
  3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf
  4 | Author : Dennis Liu
  5 | Modify : 2017/11/10
  6 | 
  7 | Description :   The example of training calibration nets , 
  8 | 
  9 | '''
 10 | import cv2
 11 | import numpy as np
 12 | import tensorflow as tf
 13 |     
 14 | import model
 15 | 
 16 | 
 17 | from data import DataSet
 18 | from dataset.fddb_crawler import parse_data_info
 19 | 
 20 | 
 21 | def train_cal_net():
 22 |     # get only the positive training sample
 23 |     data_info = parse_data_info(only_positive = True)
 24 | 
 25 | 
 26 |     # training configuration
 27 |     batch = 500
 28 |     size = (48,48,3)
 29 |     start_epoch = 0
 30 |     end_epoch = 1000
 31 |     train_validation_rate = 0.9 # training set / all sample
 32 | 
 33 |     # load the pretrained model , set None if you don't have
 34 |     pretrained =  'models/48_cal_net_18.ckpt'
 35 | 
 36 |     # load data iterater
 37 |     dataset = DataSet(data_info,train_rate = train_validation_rate)
 38 |     _ , train_op , val_op , next_ele = dataset.get_iterator(batch,size)
 39 | 
 40 |     
 41 |     # load network
 42 |     net_12_c = model.calib_12Net(lr = 0.001,size = (12,12,3))  
 43 |     net_24_c = model.calib_24Net(lr = 0.001,size = (24,24,3))  
 44 |     net_48_c = model.calib_48Net(lr = 0.001,size = (48,48,3))  
 45 | 
 46 |     sess = tf.InteractiveSession()
 47 |     saver = tf.train.Saver()
 48 | 
 49 |     if pretrained:
 50 |         saver.restore(sess , pretrained)
 51 |         
 52 |     else:
 53 |         sess.run(tf.global_variables_initializer())    
 54 |     
 55 |     
 56 | 
 57 |     for epoch in xrange(start_epoch,end_epoch):
 58 |         loss = 0
 59 |         iteration = 0
 60 |         sess.run(train_op)
 61 |         # get each element of the training dataset until the end is reached
 62 |         while True:
 63 |             try:
 64 |                 # default of the size returned from data iterator is 48
 65 |                 inputs,clss ,pattern = sess.run(next_ele)
 66 |                 # <ndarray> , <0/1> , <one-hot of 45-class>
 67 | 
 68 | 
 69 |                 clss = clss.reshape(batch,2)
 70 |                 pattern = pattern.reshape(batch,45)
 71 |                 
 72 | 
 73 |                 # resize image to fit each net
 74 |                 inputs_12 = np.array([cv2.resize(img,(net_12_c.size[0],net_12_c.size[1])) for img in inputs])
 75 |                 inputs_24 = np.array([cv2.resize(img,(net_24_c.size[0],net_24_c.size[1])) for img in inputs])
 76 |                 inputs_48 = np.array([cv2.resize(img,(net_48_c.size[0],net_48_c.size[1])) for img in inputs])
 77 | 
 78 |                 '''Put the size(48,48) into 12_cal_net and 24_cal_net ,because of the origrinal size is too small to convergence'''
 79 |                 train_nets = [net_12_c,net_24_c,net_48_c]
 80 |                 net_feed_dict = {net_12_c.inputs:inputs_12 , net_12_c.targets:pattern,\
 81 |                                 net_24_c.inputs:inputs_24 , net_24_c.targets:pattern,\
 82 |                                 net_48_c.inputs:inputs_48 , net_48_c.targets:pattern,}
 83 | 
 84 |                 # training net
 85 |                 sess.run([net.train_step for net in train_nets],\
 86 |                         feed_dict = net_feed_dict)
 87 |                 # loss computation
 88 |                 losses = sess.run([net.loss for net in train_nets],\
 89 |                         feed_dict = net_feed_dict)
 90 | 
 91 |                 if iteration % 100 == 0:
 92 |                     net_12_c_eva = net_12_c.evaluate(inputs_12,pattern)
 93 |                     net_12_c_acc = sum(net_12_c_eva)/len(net_12_c_eva)
 94 |                     net_24_c_eva = net_24_c.evaluate(inputs_24,pattern)
 95 |                     net_24_c_acc = sum(net_24_c_eva)/len(net_24_c_eva)
 96 |                     net_48_c_eva = net_48_c.evaluate(inputs_48,pattern)
 97 |                     net_48_c_acc = sum(net_48_c_eva)/len(net_48_c_eva)
 98 |                     print ('Training Epoch {} --- Iter {} --- Training Accuracy:  {}%,{}%,{}% --- Training Loss: {}'\
 99 |                             .format(epoch , iteration , net_12_c_acc , net_24_c_acc , net_48_c_acc  , losses))
100 | 
101 |                 iteration += 1
102 |             except tf.errors.OutOfRangeError:
103 |                 print("End of training dataset.")
104 |                 break
105 |         
106 |         # get each element of the validation dataset until the end is reached
107 |         sess.run(val_op)
108 |         net_12_c_acc = []
109 |         net_24_c_acc = []
110 |         net_48_c_acc = []
111 |         while True:
112 |             try:
113 |                 # the size returned from data iterator is 48
114 |                 inputs,clss ,pattern = sess.run(next_ele)
115 |                 clss = clss.reshape(batch,2)
116 |                 pattern = pattern.reshape(batch,45)
117 | 
118 |                 # resize image to fit each net
119 |                 inputs_12 = np.array([cv2.resize(img,(net_12_c.size[0],net_12_c.size[1])) for img in inputs])
120 |                 inputs_24 = np.array([cv2.resize(img,(net_24_c.size[0],net_24_c.size[1])) for img in inputs])
121 |                 inputs_48 = np.array([cv2.resize(img,(net_48_c.size[0],net_48_c.size[1])) for img in inputs])
122 | 
123 |                 
124 |                 net_12_c_eva = net_12_c.evaluate(inputs_12,pattern)
125 |                 net_24_c_eva = net_24_c.evaluate(inputs_24,pattern)
126 |                 net_48_c_eva = net_48_c.evaluate(inputs_48,pattern)
127 |                 for i in range(len(net_12_c_eva)):
128 |                     net_12_c_acc.append(net_12_c_eva[i])
129 |                     net_24_c_acc.append(net_24_c_eva[i])
130 |                     net_48_c_acc.append(net_48_c_eva[i])
131 |             except tf.errors.OutOfRangeError:
132 |                 print("End of validation dataset.")
133 |                 break
134 | 
135 |         print ('Validation Epoch {}  Validation Accuracy:  {}%,{}%,{}%'\
136 |                             .format(epoch , sum(net_12_c_acc)/len(net_12_c_acc),\
137 |                                             sum(net_24_c_acc)/len(net_24_c_acc),\
138 |                                             sum(net_48_c_acc)/len(net_48_c_acc)))
139 | 
140 |         saver = tf.train.Saver()
141 |         save_path = saver.save(sess, "models/48_cal_net_{}.ckpt".format(epoch))
142 |         print ("Model saved in file: ",save_path) 
143 | 
144 | if __name__ == "__main__":
145 |     train_cal_net()


--------------------------------------------------------------------------------
/train_detection_net.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection "
  3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf
  4 | Author : Dennis Liu
  5 | Modify : 2017/11/10
  6 | 
  7 | Description :   The example of training detection nets .
  8 | 
  9 | '''
 10 | import cv2
 11 | import numpy as np
 12 | import tensorflow as tf
 13 |     
 14 | import model
 15 | 
 16 | 
 17 | from data import DataSet
 18 | from dataset.fddb_crawler import parse_data_info
 19 | 
 20 | 
 21 | def train_det_net():
 22 |     # get all training sample
 23 |     data_info = parse_data_info(only_positive = False)
 24 |     # data_info = [<image-path str>,[<nonface/face int>,<pattern-id int>]]
 25 | 
 26 | 
 27 |     # training configuration
 28 |     batch = 500
 29 |     size = (48,48,3)
 30 |     start_epoch = 0
 31 |     end_epoch = 1000
 32 |     train_validation_rate = 0.9 # training set / all sample
 33 | 
 34 |     # load the pretrained model , set None if you don't have
 35 |     pretrained =   'models/48_net_6.ckpt'
 36 | 
 37 |     # load data iterater
 38 |     dataset = DataSet(data_info,train_rate = train_validation_rate)
 39 |     _ , train_op , val_op , next_ele = dataset.get_iterator(batch,size)
 40 | 
 41 |     
 42 |     # load network
 43 |     # learning rate is great impact in training models
 44 |     net_12 = model.detect_12Net(lr = 0.001,size = (12,12,3))  
 45 |     net_24 = model.detect_24Net(lr = 0.001,size = (24,24,3))
 46 |     net_48 = model.detect_48Net(lr = 0.001,size = (48,48,3))
 47 | 
 48 |     sess = tf.InteractiveSession()
 49 |     saver = tf.train.Saver()
 50 | 
 51 |     if pretrained:
 52 |         saver.restore(sess , pretrained)
 53 |     else:
 54 |         sess.run(tf.global_variables_initializer())    
 55 |     
 56 |     
 57 | 
 58 |     for epoch in xrange(start_epoch,end_epoch):
 59 |         loss = 0
 60 |         iteration = 0
 61 |         sess.run(train_op)
 62 |         # get each element of the training dataset until the end is reached
 63 |         while True:
 64 |             try:
 65 |                 # default of the size returned from data iterator is 48
 66 |                 inputs,clss ,pattern = sess.run(next_ele)
 67 |                 # <ndarray> , <0/1> , <one-hot of 45-class>
 68 | 
 69 | 
 70 |                 clss = clss.reshape(batch,2)
 71 |                 pattern = pattern.reshape(batch,45)
 72 |                 
 73 | 
 74 |                 # resize image to fit each net
 75 |                 inputs_12 = np.array([cv2.resize(img,(net_12.size[0],net_12.size[1])) for img in inputs])
 76 |                 inputs_24 = np.array([cv2.resize(img,(net_24.size[0],net_24.size[1])) for img in inputs])
 77 |                 inputs_48 = np.array([cv2.resize(img,(net_48.size[0],net_48.size[1])) for img in inputs])
 78 | 
 79 |                 # forward 12net
 80 |                 net_12_fc = net_12.get_fc(inputs_12)
 81 |                 
 82 |                 # forward 24net
 83 |                 net_24_fc = net_24.get_fc(inputs_24,net_12_fc)
 84 |         
 85 |                 train_nets = [net_12,net_24,net_48]
 86 |                 net_feed_dict = {net_12.inputs:inputs_12 , net_12.targets:clss,\
 87 |                                 net_24.inputs:inputs_24 , net_24.targets:clss,net_24.from_12:net_12_fc,\
 88 |                                 net_48.inputs:inputs_48 , net_48.targets:clss,net_48.from_24:net_24_fc}
 89 | 
 90 |                 # training net
 91 |                 sess.run([net.train_step for net in train_nets],\
 92 |                         feed_dict = net_feed_dict)
 93 |                 # loss computation
 94 |                 losses = sess.run([net.loss for net in train_nets],\
 95 |                         feed_dict = net_feed_dict)
 96 | 
 97 |                 if iteration % 100 == 0:
 98 |                     net_12_eva = net_12.evaluate(inputs_12,clss)
 99 |                     net_12_acc = sum(net_12_eva)/len(net_12_eva)
100 |                     net_24_eva = net_24.evaluate(inputs_24,clss,net_12_fc)
101 |                     net_24_acc = sum(net_24_eva)/len(net_24_eva)
102 |                     net_48_eva = net_48.evaluate(inputs_48,clss,net_24_fc)
103 |                     net_48_acc = sum(net_48_eva)/len(net_48_eva)
104 |                     print ('Training Epoch {} --- Iter {} --- Training Accuracy:  {}%,{}%,{}% --- Training Loss: {}'\
105 |                             .format(epoch , iteration , net_12_acc , net_24_acc , net_48_acc  , losses))
106 |                         
107 | 
108 |                 iteration += 1
109 |             except tf.errors.OutOfRangeError:
110 |                 # print("End of training dataset.")
111 |                 break
112 |         
113 |         # get each element of the validation dataset until the end is reached
114 |         sess.run(val_op)
115 |         net_12_acc = []
116 |         net_24_acc = []
117 |         net_48_acc = []
118 |         while True:
119 |             try:
120 |                 # the size returned from data iterator is 48
121 |                 inputs,clss ,pattern = sess.run(next_ele)
122 |                 clss = clss.reshape(batch,2)
123 |                 
124 |                 # resize image to fit each net
125 |                 inputs_12 = np.array([cv2.resize(img,(net_12.size[0],net_12.size[1])) for img in inputs])
126 |                 inputs_24 = np.array([cv2.resize(img,(net_24.size[0],net_24.size[1])) for img in inputs])
127 |                 inputs_48 = np.array([cv2.resize(img,(net_48.size[0],net_48.size[1])) for img in inputs])
128 | 
129 |                 # forward 12net
130 |                 net_12_fc = net_12.get_fc(inputs_12)
131 |                 
132 |                 # forward 24net
133 |                 net_24_fc = net_24.get_fc(inputs_24,net_12_fc)
134 | 
135 |                 net_12_eva = net_12.evaluate(inputs_12,clss)
136 |                 net_24_eva = net_24.evaluate(inputs_24,clss,net_12_fc)
137 |                 net_48_eva = net_48.evaluate(inputs_48,clss,net_24_fc)
138 |                 for i in range(len(net_12_eva)):
139 |                     net_12_acc.append(net_12_eva[i])
140 |                     net_24_acc.append(net_24_eva[i])
141 |                     net_48_acc.append(net_48_eva[i])
142 |             except tf.errors.OutOfRangeError:
143 |                 # print("End of validation dataset.")
144 |                 break
145 | 
146 |         print ('Validation Epoch {}  Validation Accuracy:  {}%,{}%,{}%'\
147 |                             .format(epoch , sum(net_12_acc)/len(net_12_acc),\
148 |                                             sum(net_24_acc)/len(net_24_acc),\
149 |                                             sum(net_48_acc)/len(net_48_acc)))
150 | 
151 |         saver = tf.train.Saver()
152 |         save_path = saver.save(sess, "models/48_net_{}.ckpt".format(epoch))
153 |         print ("Model saved in file: ", save_path)
154 | 
155 | if __name__ == "__main__":
156 |     train_det_net()


--------------------------------------------------------------------------------
/dataset/fddb_crawler.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This program use to parsing fddb dataset and generate a trining set of 12,24,48 net
  3 | 
  4 | author dennisliu
  5 | modify 2017/11/10
  6 | 
  7 | please modify the out_path and function name of gen_pos_sample or gen_neg_sample.
  8 | '''
  9 | import os
 10 | import uuid
 11 | import cv2
 12 | import random
 13 | import matplotlib.pyplot as plt
 14 | import matplotlib.patches as patches
 15 | 
 16 | 
 17 | fddb_path = '/home/share/data/FDDB/'
 18 | label_files = [fddb_path+'FDDB-folds/' + txt for txt in \
 19 | ['FDDB-fold-01-ellipseList.txt',
 20 | 'FDDB-fold-02-ellipseList.txt',
 21 | 'FDDB-fold-03-ellipseList.txt',
 22 | 'FDDB-fold-04-ellipseList.txt',
 23 | 'FDDB-fold-05-ellipseList.txt',
 24 | 'FDDB-fold-06-ellipseList.txt',
 25 | 'FDDB-fold-07-ellipseList.txt',
 26 | 'FDDB-fold-08-ellipseList.txt',
 27 | 'FDDB-fold-09-ellipseList.txt',
 28 | 'FDDB-fold-10-ellipseList.txt']]
 29 | 
 30 | def parse_data_info(only_positive = False,limit_num = None,pos_neg_ratio = 0.5):
 31 |     data_info = []
 32 |     pos_num = None
 33 |     neg_num = None
 34 |     import os
 35 |     pos_folders = '/home/share/data/FDDB/positive_sample'
 36 |     neg_folders = '/home/share/data/FDDB/negative_sample'
 37 | 
 38 |     if limit_num:
 39 |         pos_num = int(limit_num * pos_neg_ratio)
 40 |         neg_num = int(limit_num * (1-pos_neg_ratio))
 41 |         poses = os.listdir(pos_folders)[:pos_num]
 42 |         negs  = os.listdir(neg_folders)[:neg_num]
 43 |     else:
 44 |         poses = os.listdir(pos_folders)
 45 |         negs = os.listdir(neg_folders)
 46 | 
 47 |     for img in poses:
 48 |         img_path = os.path.join(pos_folders,img)
 49 |         labels = img.replace('.jpg','').split('_')
 50 |         clss = int(labels[1])
 51 |         pattern = int(labels[2])
 52 |         data_info.append([img_path,[clss,pattern]])
 53 |     if not only_positive:
 54 |         for img in negs:
 55 |             img_path = os.path.join(neg_folders,img)
 56 |             labels = img.replace('.jpg','').split('_')
 57 |             clss = int(labels[1])
 58 |             pattern = int(labels[2])
 59 |             data_info.append([img_path,[clss,pattern]])
 60 |     
 61 |     return data_info
 62 | 
 63 | def fddb_loader(fddb_path):
 64 |     images = []
 65 | 
 66 |     for txt in label_files:
 67 |         with open(txt) as f:
 68 |             content = f.readlines()
 69 |         content = [x.strip() for x in content] 
 70 |         idx = 0
 71 |         faces = 0
 72 |         # convert txt to list
 73 |         while idx < len(content):
 74 |             if faces == 0:
 75 |                 filename = fddb_path + content[idx] + '.jpg'
 76 |                 faces = int(content[idx+1])
 77 |                 idx += 2
 78 |             else:
 79 |                 bboxes = []
 80 |                 for i in range(faces):
 81 |                     bboxes.append(content[idx+i].split())
 82 |                 idx += faces
 83 |                 if os.path.exists(filename) :
 84 |                     images.append([filename,faces,bboxes])
 85 |                 faces = 0
 86 |     return images
 87 |             
 88 | def bbox_convert(images):
 89 |     '''
 90 |     description : 
 91 |         convert Elliptical regions to Rectangular regions
 92 |     input : 
 93 |         imgaes : [[<filepath>,<faces>,<bboxes>]]
 94 |     return : 
 95 |         result : [<filepath>,<bboxes>]
 96 |     bbox format : 
 97 |         bboxes: [<xmin>,<ymin>,<xmax>,<ymax>]
 98 |     '''
 99 | 
100 |     result = []
101 |     for i,img in enumerate(images):
102 |         image = cv2.imread(img[0])
103 |         
104 |         # remove when image not avalible
105 |         if image is None: continue
106 | 
107 |         H,W = image.shape[:2]
108 |         bboxes = []
109 |         for bbox in img[2]:
110 |             h = float(bbox[0])
111 |             x = float(bbox[3])
112 |             w = float(bbox[1])
113 |             y = float(bbox[4])
114 |             xmin = (x-w)/W
115 |             ymin = (y-h)/H
116 |             xmax = (x+w)/W
117 |             ymax = (y+h)/H
118 |             bboxes.append([xmin,ymin,xmax,ymax])
119 |         result.append([img[0],bboxes])
120 |     return result
121 | 
122 | 
123 | def show(image,bboxes = None):
124 |     fig,ax = plt.subplots(1)
125 | 
126 |     img = cv2.imread(image)
127 |     H,W = img.shape[:2]
128 |     if bboxes:
129 |         for bbox in bboxes:
130 |             xmin,ymin,xmax,ymax = bbox[:]
131 |             rect = patches.Rectangle((xmin*W,ymin*H),(xmax-xmin)*W,(ymax-ymin)*H,linewidth=1,fill=False)
132 |             ax.add_patch(rect)
133 |     ax.imshow(img)
134 |     plt.show()
135 | 
136 | def gen_pos_sample(images , out_path):
137 |     cali_scale = [0.83, 0.91, 1.0, 1.10, 1.21]
138 |     cali_off_x = [-0.17, 0., 0.17]
139 |     cali_off_y = [-0.17, 0., 0.17]
140 | 
141 |     for image in images:
142 |         im_path = image[0]
143 |         bboxes = image[1]
144 |         img = cv2.imread(im_path)
145 |         H,W = img.shape[:2]
146 |         for bbox in bboxes:
147 |             facename = str(uuid.uuid4())
148 |             for si,s in enumerate(cali_scale):
149 |                 for xi,x_off in enumerate(cali_off_x):
150 |                     for yi,y_off in enumerate(cali_off_y):
151 |                         xmin , ymin , xmax , ymax = bbox[:]
152 |                         new_xmin = xmin - x_off*(xmax-xmin)/s
153 |                         new_ymin = ymin - y_off*(ymax-ymin)/s
154 |                         new_xmax = new_xmin+(xmax-xmin)/s
155 |                         new_ymax = new_ymin+(ymax-ymin)/s
156 |                         # crop  
157 |                         face = img[int(new_ymin*H):int(new_ymax*H),int(new_xmin*W):int(new_xmax*W)]
158 |                         
159 |                         if all(i > 10 for i in face.shape[:2]) : 
160 |                             # annot = '{},{},{}'.format(si,xi,yi)
161 |                             clss = xi * len(cali_off_y) + si * len(cali_off_y) * len(cali_off_x) + yi
162 |                             imgname = facename + '_1_' + str(clss) + '.jpg'
163 |                             cv2.imwrite(out_path+'/'+imgname,face)
164 |                             
165 |                             
166 | 
167 | def gen_neg_sample(images , out_path):
168 |     def check_in_bbox(poses , bboxes):
169 |         '''
170 |         input :
171 |             poses : [<lefttop>,<butt_down>]
172 |             bboxes : [<bbox1>,<bbox2>,...]
173 |         return :
174 |             in_range : 
175 |                 True : position of box is in the bboxes of faces
176 |         '''
177 |         in_range = False
178 |         for bbox in bboxes:
179 |             for pos in poses :
180 |                 if pos[0] > bbox[0] and pos[0] < bbox[2] and pos[1] > bbox[1] and pos[1] < bbox[3] :
181 |                     in_range = True
182 |                 else:
183 |                     pass
184 |         return in_range
185 |     # the background sampleing times
186 |     sample_times = 100
187 |     cali_scale = [0.5, 0.75, 1.0, 1.25, 1.50]
188 | 
189 |     for image in images:
190 |         im_path = image[0]
191 |         bboxes = image[1]
192 |         img = cv2.imread(im_path)
193 |         H,W = img.shape[:2]
194 |         
195 | 
196 |         
197 |         for i in range(sample_times):
198 |             # random position
199 |             pos_xmin =  random.uniform(0, 1)
200 |             pos_ymin =  random.uniform(0, 1)
201 | 
202 |             # set region with position and mean of x abd y
203 |             mean_x = sum([bbox[2]-bbox[0] for bbox in bboxes])/len(bboxes)
204 |             mean_y = sum([bbox[3]-bbox[1] for bbox in bboxes])/len(bboxes)
205 |             for s in cali_scale:
206 |                 facename = str(uuid.uuid4())
207 |                 pos_xmax = pos_xmin + mean_x/s
208 |                 pos_ymax = pos_ymin + mean_y/s
209 |                 
210 |                 if pos_xmax > 1 or pos_ymax > 1:
211 |                     continue
212 | 
213 |                 poses = [[pos_xmin,pos_ymin],[pos_xmax,pos_ymax],[pos_xmin,pos_ymax],[pos_xmax,pos_ymin],[(pos_xmin+pos_xmax)/2,(pos_ymin+pos_ymax)/2]]
214 |                 # check if not region in range of bboxes
215 |                 if not check_in_bbox(poses,bboxes):
216 |                     # generate negative sample
217 |                     face = img[int(pos_ymin*H):int(pos_ymax*H),int(pos_xmin*W):int(pos_xmax*W)]
218 |                     imgname = facename + '_0_99.jpg'
219 |                     cv2.imwrite(out_path+'/'+imgname,face)
220 |                     # plt.imshow(face)
221 |                     # plt.show()
222 |                 else:
223 |                     continue
224 |         
225 |         
226 | 
227 |         
228 |             
229 | def main():
230 |     out_path = fddb_path + 'positive_sample/'
231 |     if not os.path.exists(out_path):
232 |         os.makedirs(out_path)
233 | 
234 |     # load FDDB annotation file
235 |     images = fddb_loader(fddb_path)
236 |     print ("total processing image : {}".format(len(images)))
237 |     # convert Elliptical regions to Rectangular regions
238 |     images = bbox_convert(images)
239 |     
240 |     # positive sample generator
241 |     gen_pos_sample(images , out_path)
242 | 
243 |         
244 |     #image = images[0][0]
245 |     #bboxes = images[0][1]
246 |     # show images
247 |     # show(image,bboxes)
248 | 
249 | if __name__ == "__main__":
250 |     main()


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection "
  3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf
  4 | Author : Dennis Liu
  5 | Modify : 2017/11/04
  6 | 
  7 | Description : The tensorflow structure of models in paper , 12net,24net,48net (include detection and calibration)
  8 | 
  9 | '''
 10 | 
 11 | 
 12 | import tensorflow as tf
 13 | import numpy as np
 14 | 
 15 | 
 16 | 
 17 | def weight_variable(shape,name=None,lr_type = 'conv'):
 18 |     # weight initial problem is very importand during training
 19 |     # use tf.random_normal convergence slower then truncated
 20 |     if lr_type == 'conv': 
 21 |         initial = tf.truncated_normal(shape, dtype="float32", stddev = 0.01)
 22 |         # initial = tf.random_normal(shape=shape, mean=0, stddev=0.001)
 23 |     else:
 24 |         x = np.sqrt(6. / (np.prod(np.array(shape[:-1])) + shape[-1]))
 25 |         initial = tf.random_uniform(shape, minval=-x,maxval=x)
 26 |         
 27 |     return tf.Variable(initial,name=name)
 28 | 
 29 | def bias_variable(shape,name=None):
 30 |     initial = tf.constant(value=0.1, shape=shape)
 31 |     return tf.Variable(initial,name=name)
 32 | 
 33 | def conv2d(x, W, stride, pad = 'SAME'):
 34 |     return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding=pad)
 35 | 
 36 | def max_pool(x, kernelSz, stride, pad = 'SAME'):
 37 |     return tf.nn.max_pool(x, ksize=[1, kernelSz, kernelSz, 1], strides=[1, stride, stride, 1], padding=pad)
 38 | 
 39 | #12-net
 40 | class detect_12Net:
 41 |     def __init__(self,size = (48,48,3),lr = 0.001 , is_train = True):
 42 |         self.size = size
 43 | 
 44 |         # data,label
 45 |         self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]])
 46 |         self.targets = tf.placeholder("float", [None,2])
 47 |         
 48 |         with tf.variable_scope("12det_"):
 49 |         
 50 |             #conv layer 1
 51 |             self.w_conv1 = weight_variable([3,3,size[2],16],"w_conv1")
 52 |             self.b_conv1 = bias_variable([16],"b_conv1")
 53 |             self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1)
 54 |             
 55 |             
 56 |             #pooling layer 1
 57 |             self.pool1 =  max_pool(self.conv1, 3, 2)
 58 |             
 59 | 
 60 |             #fully conv layer 1
 61 |             self.w_fc1 = weight_variable([int(size[0]/2 * size[1]/2 * 16), 16],'w_fc1',lr_type = 'fc')
 62 |             self.b_fc1 = bias_variable([16],'b_fc1')
 63 |             self.pool1_flat = tf.reshape(self.pool1, [-1, int(size[0]/2 * size[1]/2 *16)])
 64 |             self.fc1 = tf.nn.relu(tf.matmul(self.pool1_flat, self.w_fc1) + self.b_fc1)
 65 |             
 66 | 
 67 | 
 68 |             #fully conv layer 2
 69 |             self.w_fc2 = weight_variable([16, 2],'w_fc2',lr_type = 'fc')
 70 |             self.b_fc2 = bias_variable([2],'b_fc2')
 71 |             self.fc2 = tf.matmul(self.fc1,self.w_fc2) + self.b_fc2
 72 |         if is_train:
 73 |             self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets))
 74 |             self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)  
 75 |     
 76 |     def get_fc(self,inputs_12):
 77 |         return self.fc1.eval(feed_dict = {self.inputs:inputs_12})
 78 | 
 79 |     def evaluate(self,inputs_12,targets):
 80 |         predict = tf.argmax( self.fc2,1)
 81 |         label   =  tf.argmax(targets,1)
 82 |         eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_12, self.targets:targets})
 83 |         return eva
 84 | 
 85 | 
 86 | #24-net
 87 | class detect_24Net:
 88 |     def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True):
 89 |         self.size = size
 90 | 
 91 |         # data,label
 92 |         self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]])
 93 |         self.targets = tf.placeholder("float", [None,2])
 94 |         # the fc1 from 12net
 95 |         self.from_12 = tf.placeholder("float",[None,16])
 96 | 
 97 |         with tf.variable_scope("24det_"):
 98 |             #conv layer 1
 99 |             self.w_conv1 = weight_variable([3,3,size[2],64],"w_conv1")
100 |             self.b_conv1 = bias_variable([64],"b_conv1")
101 |             self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1)
102 |             
103 |             
104 |             #pooling layer 1
105 |             self.pool1 =  max_pool(self.conv1, 3, 2)
106 |             
107 | 
108 |             #fully conv layer 1
109 |             self.w_fc1 = weight_variable([int(size[0]/2 * size[1]/2 * 64), 128],lr_type = 'fc')
110 |             self.b_fc1 = bias_variable([128])
111 |             self.pool1_flat = tf.reshape(self.pool1, [-1, int(size[0]/2 * size[1]/2 *64)])
112 |             self.fc1 = tf.nn.relu(tf.matmul(self.pool1_flat, self.w_fc1) + self.b_fc1)
113 |             
114 |             
115 |             #concat
116 |             self.concat1 = tf.concat([self.fc1,self.from_12],1)
117 | 
118 | 
119 |             #fully conv layer 2
120 |             self.w_fc2 = weight_variable([128+16, 2],lr_type = 'fc')
121 |             self.b_fc2 = bias_variable([2])
122 |             self.fc2 = tf.matmul(self.concat1,self.w_fc2) + self.b_fc2
123 |             
124 |         if is_train:
125 |             self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets))
126 |             self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)   
127 |     
128 |     def get_fc(self,inputs_24,net12_fc):
129 |         return self.concat1.eval(feed_dict = {self.inputs:inputs_24,self.from_12:net12_fc})
130 |     def evaluate(self,inputs_24,targets,net_12_fc):
131 |         predict = tf.argmax( self.fc2,1)
132 |         label   =  tf.argmax(targets,1)
133 |         eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_24, self.targets:targets,self.from_12:net_12_fc})
134 |         return eva
135 |         
136 |         
137 | #48-net
138 | class detect_48Net:
139 |     def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True):
140 |         self.size = size
141 |         
142 |         # data,label
143 |         self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]])
144 |         self.targets = tf.placeholder("float", [None,2])
145 |         # the concat1 from 24net
146 |         self.from_24 = tf.placeholder("float",[None,16+128])
147 | 
148 |         with tf.variable_scope("48det_"):
149 |             #conv layer 1
150 |             self.w_conv1 = weight_variable([5,5,size[2],64],"w_conv1")
151 |             self.b_conv1 = bias_variable([64],"b_conv1")
152 |             self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1)
153 |             
154 |             
155 |             #pooling layer 1
156 |             self.pool1 =  max_pool(self.conv1, 3, 2)
157 |             
158 | 
159 |             #conv layer 2
160 |             self.w_conv2 = weight_variable([5,5,64,64],"w_conv2")
161 |             self.b_conv2 = bias_variable([64],"b_conv2")
162 |             self.conv2 = tf.nn.relu(conv2d(self.pool1, self.w_conv2, 1) + self.b_conv2)
163 |             
164 |             
165 |             #pooling layer 2
166 |             self.pool2 =  max_pool(self.conv2, 3, 2)
167 |             
168 | 
169 | 
170 |             #fully conv layer 1
171 |             self.w_fc1 = weight_variable([int(size[0]/4 * size[1]/4 * 64), 256],lr_type = 'fc')
172 |             self.b_fc1 = bias_variable([256])
173 |             self.pool2_flat = tf.reshape(self.pool2, [-1, int(size[0]/4 * size[1]/4 *64)])
174 |             self.fc1 = tf.nn.relu(tf.matmul(self.pool2_flat, self.w_fc1) + self.b_fc1)
175 |             
176 |             
177 |             #concat
178 |             
179 |             self.concat1 = tf.concat([self.fc1,self.from_24],1)
180 | 
181 | 
182 |             #fully conv layer 2
183 |             self.w_fc2 = weight_variable([256+128+16, 2],lr_type = 'fc')
184 |             self.b_fc2 = bias_variable([2])
185 |             self.fc2 = tf.matmul(self.concat1,self.w_fc2) + self.b_fc2
186 |         if is_train:
187 |             self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets))
188 |             self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)  
189 |     def evaluate(self,inputs_48,targets,net_24_fc):
190 |         predict = tf.argmax( self.fc2,1)
191 |         label   =  tf.argmax(targets,1)
192 |         eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_48, self.targets:targets,self.from_24:net_24_fc})
193 |         return eva
194 | 
195 | 
196 | 
197 | class calib_12Net:
198 |     def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True):
199 |         self.size = size
200 | 
201 |         # data,label
202 |         self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]])
203 |         self.targets = tf.placeholder("float", [None,45])
204 | 
205 |         #12-net
206 |         with tf.variable_scope("12calib_"):
207 |             #conv layer 1
208 |             self.w_conv1 = weight_variable([3,3,size[2],16],"w1")
209 |             self.b_conv1 = bias_variable([16],"b1")
210 |             self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1)
211 | 
212 |             #pooling layer 1
213 |             self.pool1 =  max_pool(self.conv1, 3, 2)
214 | 
215 |             #fc layer 1
216 |             self.w_fc1 =  weight_variable([int(size[0]//2 * size[1]//2 * 16), 128],"w2")
217 |             self.b_fc1 =  bias_variable([128],"b2")
218 |             self.pool1_reshaped = tf.reshape(self.pool1, [-1, int(size[0]//2 * size[1]//2 * 16)])
219 |             self.fc1 = tf.nn.relu(tf.matmul(self.pool1_reshaped, self.w_fc1) + self.b_fc1)
220 | 
221 |             #fc layer2
222 |             self.w_fc2 =  weight_variable([128, 45],"w3")
223 |             self.b_fc2 =  bias_variable([45],"b3")
224 |             self.fc2 = tf.matmul(self.fc1, self.w_fc2) + self.b_fc2
225 |         if is_train:
226 |             self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets))
227 |             self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)   
228 |     def evaluate(self,inputs_12,targets):
229 |         predict = tf.argmax( self.fc2,1)
230 |         label   =  tf.argmax(targets,1)
231 |         eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_12, self.targets:targets})
232 |         return eva
233 | 
234 | class calib_24Net:
235 |     def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True):
236 |         self.size = size
237 | 
238 |         # data,label
239 |         self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]])
240 |         self.targets = tf.placeholder("float", [None,45])
241 |         
242 | 
243 |         #24-net
244 |         with tf.variable_scope("24calib_"):
245 |             #conv layer 1
246 |             self.w_conv1 = weight_variable([5,5,size[2],32],"w1")
247 |             self.b_conv1 = bias_variable([32],"b1")
248 |             self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1)
249 | 
250 |             #pooling layer 1
251 |             self.pool1 =  max_pool(self.conv1, 3, 2)
252 | 
253 |             #fc layer 1
254 |             self.w_fc1 =  weight_variable([int(size[0]//2 * size[1]//2 * 32), 64],"w2")
255 |             self.b_fc1 =  bias_variable([64],"b2")
256 |             self.pool1_reshaped = tf.reshape(self.pool1, [-1, int(size[0]//2 * size[1]//2  * 32)])
257 |             self.fc1 = tf.nn.relu(tf.matmul(self.pool1_reshaped, self.w_fc1) + self.b_fc1)
258 | 
259 |             #fc layer2
260 |             self.w_fc2 =  weight_variable([64, 45],"w4")
261 |             self.b_fc2 =  bias_variable([45],"b4")
262 |             self.fc2 = tf.matmul(self.fc1, self.w_fc2) + self.b_fc2
263 | 
264 |         if is_train:
265 |             self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets))
266 |             self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)    
267 |     def evaluate(self,inputs_24,targets):
268 |         
269 |         predict = tf.argmax(self.fc2,1)
270 |         label   =  tf.argmax(targets,1)
271 |         eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_24, self.targets:targets})
272 | 
273 |         return eva
274 | 
275 | class calib_48Net:
276 | 
277 |     def __init__(self,size = (48,48,3) ,lr = 0.001, is_train = True):
278 |         self.size = size
279 | 
280 |         # data,label
281 |         self.inputs = tf.placeholder("float",[None,size[0],size[1],size[2]])
282 |         self.targets = tf.placeholder("float", [None,45])
283 |         
284 |         #24-net
285 |         with tf.variable_scope("48calib_"):
286 |             #conv layer 1
287 |             self.w_conv1 = weight_variable([5,5,size[2],64],"w1")
288 |             self.b_conv1 = bias_variable([64],"b1")
289 |             self.conv1 = tf.nn.relu(conv2d(self.inputs, self.w_conv1, 1) + self.b_conv1)
290 | 
291 |             #pooling layer 1
292 |             self.pool1 =  max_pool(self.conv1, 3, 2)
293 |             
294 |             #conv layer 2
295 |             self.w_conv2 = weight_variable([5,5,64,64],"w2")
296 |             self.b_conv2 = bias_variable([64],"b2")
297 |             self.conv2 = tf.nn.relu(conv2d(self.pool1, self.w_conv2, 1) + self.b_conv2)
298 | 
299 |             #fc layer 1
300 |             self.w_fc1 =  weight_variable([int(size[0]//2 * size[1]//2 * 64), 256],"w3")
301 |             self.b_fc1 =  bias_variable([256],"b3")
302 |             self.conv2_reshaped = tf.reshape(self.conv2, [-1, int(size[0]//2 * size[1]//2 * 64)])
303 |             self.fc1 = tf.nn.relu(tf.matmul(self.conv2_reshaped, self.w_fc1) + self.b_fc1)
304 | 
305 |             #fc layer2
306 |             self.w_fc2 =  weight_variable([256, 45],"w4")
307 |             self.b_fc2 =  bias_variable([45],"b4")
308 |             self.fc2 = tf.matmul(self.fc1, self.w_fc2) + self.b_fc2
309 |         if is_train:
310 |             self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.fc2,labels =self.targets))
311 |             self.train_step = tf.train.AdamOptimizer(lr).minimize(self.loss)    
312 |          
313 |     def evaluate(self,inputs_48,targets):
314 |         predict = tf.argmax(self.fc2,1)
315 |         label   =  tf.argmax(targets,1)
316 |         eva = tf.cast(tf.equal(predict,label),"float").eval(feed_dict = {self.inputs:inputs_48, self.targets:targets})
317 |         return eva
318 | 


--------------------------------------------------------------------------------
/detection.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Implementation of "A Convolutional Neural Network Cascade for Face Detection "
  3 | Paper : https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf
  4 | Author : Dennis Liu
  5 | Modify : 2017/11/10
  6 | 
  7 | Description :   Three important class use to detect img : 
  8 |                 Classifier : create the detection net 12,24,48 , and provide each net predict function
  9 |                 Aligner :  create the calibration net 12,24,48 , and provide each net predict function
 10 |                 Detector : include non max suppression ,image pyramids , interface to use Classifer & Aliner
 11 | 
 12 | '''
 13 | 
 14 | 
 15 | import cv2
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | import model
 20 | 
 21 | 
 22 | class Classifier:
 23 |     def __init__(self,model_path,sizes = [12,24,48]):
 24 |         self.sizes = sizes
 25 |         # load network
 26 |         self.net_12 = model.detect_12Net(is_train = False,size = (sizes[0],sizes[0],3))  
 27 |         self.net_24 = model.detect_24Net(is_train = False,size = (sizes[1],sizes[1],3))
 28 |         self.net_48 = model.detect_48Net(is_train = False)
 29 |         # create session
 30 |         self.sess = tf.Session()
 31 |         self.restore(model_path)
 32 |     def restore(self,model_path):
 33 |         all_var =   tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='12det_')+\
 34 |                     tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='24det_')+\
 35 |                     tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='48det_')
 36 |         # Add ops to save and restore all the variables.
 37 |         saver = tf.train.Saver(all_var)
 38 |         # Restore model from disk.
 39 |         saver.restore(self.sess, model_path)
 40 |     def net_12_predict(self,data,threshold = 0.5):
 41 |         # resize data to fit the size of net work
 42 |         input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data])
 43 |         
 44 |         # the class of prediction
 45 |         max_idx = tf.to_float(tf.argmax( self.net_12.fc2,1))
 46 |         # the confidence of predicted class
 47 |         max_value = tf.reduce_max(tf.nn.softmax(self.net_12.fc2), axis=1)
 48 |         # combine the result
 49 |         predict = tf.stack([max_idx,max_value],1)
 50 | 
 51 |         # forward 
 52 |         result = self.sess.run(predict,feed_dict = {self.net_12.inputs : input_12})  
 53 |         return result
 54 | 
 55 |     def net_24_predict(self,data,threshold = 0.5):
 56 |         # resize data to fit the size of net work
 57 |         input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data])
 58 |         input_24 = np.array([cv2.resize(img,(self.sizes[1],self.sizes[1]))for img in data])
 59 |         
 60 |         # get previous net output
 61 |         net_12_fc = self.sess.run(self.net_12.fc1,feed_dict = {self.net_12.inputs :input_12})
 62 | 
 63 |         max_idx = tf.to_float(tf.argmax( self.net_24.fc2,1))
 64 |         max_value = tf.reduce_max(tf.nn.softmax(self.net_24.fc2), axis=1)
 65 |         predict = tf.stack([max_idx,max_value],1)
 66 |         
 67 |         result = self.sess.run(predict,feed_dict = {self.net_24.inputs : input_24, self.net_24.from_12 : net_12_fc})
 68 |         return result
 69 |     def net_48_predict(self,data,threshold = 0.5):
 70 |         # resize data to fit the size of net work
 71 |         input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data])
 72 |         input_24 = np.array([cv2.resize(img,(self.sizes[1],self.sizes[1]))for img in data])
 73 |         input_48 = np.array([cv2.resize(img,(self.sizes[2],self.sizes[2]))for img in data])
 74 |         
 75 |         # get previous net output
 76 |         net_12_fc = self.sess.run(self.net_12.fc1,feed_dict = {self.net_12.inputs :input_12})
 77 |         net_24_fc = self.sess.run(self.net_24.concat1,feed_dict = {self.net_24.inputs : input_24, self.net_24.from_12 : net_12_fc})
 78 | 
 79 |         # the class of prediction
 80 |         max_idx = tf.to_float(tf.argmax( self.net_48.fc2,1))
 81 |         # the confidence of predicted class
 82 |         max_value = tf.reduce_max(tf.nn.softmax(self.net_48.fc2), axis=1)
 83 |         # combine the result
 84 |         predict = tf.stack([max_idx,max_value],1)
 85 | 
 86 |         result = self.sess.run(predict,feed_dict = {self.net_48.inputs : input_48, self.net_48.from_24 : net_24_fc})
 87 |         return result
 88 | 
 89 | class Aligner:
 90 |     def __init__(self,model_path,sizes = [12,24,48]):
 91 |         self.sizes = sizes
 92 |         # load network
 93 |         self.net_12 = model.calib_12Net(is_train = False,size = (sizes[0],sizes[0],3))
 94 |         self.net_24 = model.calib_24Net(is_train = False,size = (sizes[1],sizes[1],3))
 95 |         self.net_48 = model.calib_48Net(is_train = False,size = (sizes[2],sizes[2],3))
 96 |         # create session
 97 |         self.sess = tf.Session()
 98 |         self.restore(model_path)
 99 |     def restore(self,model_path):
100 |         all_var =   tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='12calib_')+\
101 |                     tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='24calib_')+\
102 |                     tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='48calib_')
103 |         # Add ops to save and restore all the variables.
104 |         saver = tf.train.Saver(all_var)
105 |         # Restore model from disk.
106 |         saver.restore(self.sess, model_path)
107 |         
108 |     def net_12_predict(self,data):
109 |         # resize data to fit the size of net work
110 |         input_12 = np.array([cv2.resize(img,(self.sizes[0],self.sizes[0]))for img in data])
111 |         
112 |         predict = tf.argmax( self.net_12.fc2,1)
113 |         result = self.sess.run(predict,feed_dict = {self.net_12.inputs : input_12})
114 |         return result
115 |     def net_24_predict(self,data):
116 |         # resize data to fit the size of net work
117 |         input_24 = np.array([cv2.resize(img,(self.sizes[1],self.sizes[1]))for img in data])
118 |         
119 | 
120 |         predict = tf.argmax( self.net_24.fc2,1)
121 |         result = self.sess.run(predict,feed_dict = {self.net_24.inputs : input_24})
122 |         return result
123 |     def net_48_predict(self,data):
124 |         # resize data to fit the size of net work
125 |         input_48 = np.array([cv2.resize(img,(self.sizes[2],self.sizes[2]))for img in data])
126 |         
127 |         predict = tf.argmax( self.net_48.fc2,1)
128 |         result = self.sess.run(predict,feed_dict = {self.net_48.inputs : input_48})
129 |         return result
130 | 
131 | class Detector:
132 |     def __init__(self,det_path,cal_path,pyramid_t = 3,win_size = (48,48),win_stride =  10):
133 |         # configuration of detection windows
134 |         self.pyramid_t = pyramid_t
135 |         self.win_size = win_size
136 |         self.win_stride =  win_stride
137 |         # config of network
138 |         self.batch = 1000
139 |         
140 |         # load the models
141 |         self.classifier = Classifier(det_path)
142 |         self.aligner = Aligner(cal_path)
143 | 
144 |         self.result = []
145 |     def detect(self,img):
146 |         '''
147 |             step 1. do pyramid and detect in sliding windows
148 |             step 2. consturct the cascade structure
149 |         '''
150 |         # net 12 classifier 
151 |         
152 |         # net 24 classifier 
153 |         
154 |         # net 48 classifier
155 |         
156 |         return 
157 |     def predict(self,img,bboxes,net = None,threshold = 0.9): 
158 |         batch = self.batch
159 |         win_buff = []
160 |         idx_buff = []
161 |         h,w = img.shape[:2]
162 |         
163 |         # mapping the bboxes to generate batch
164 |         for idx,bbox in enumerate(bboxes):
165 |             xmin,ymin,xmax,ymax,prop = bbox[:]
166 |             if prop == 0.0 : continue
167 |             win = img[int(ymin*h):int(ymax*h),int(xmin*w):int(xmax*w)]
168 |             if win is None or win.shape[0] < 1 or win.shape[1] < 1 : continue
169 |             win = cv2.cvtColor(cv2.resize(win,(48,48)),cv2.COLOR_BGR2RGB)
170 |             win_buff.append(win)
171 |             idx_buff.append(idx)
172 | 
173 |             if len(win_buff)>=batch:
174 |                 bboxes = self.net_forward(win_buff,idx_buff,bboxes,net,threshold)
175 |                 win_buff = []
176 |                 idx_buff = []
177 |         if len(win_buff) > 0:
178 |             bboxes = self.net_forward(win_buff,idx_buff,bboxes,net,threshold)
179 |         return bboxes
180 |     def net_forward(self,win_buff,idx_buff,bboxes,net , threshold):
181 |         # forward the detection net
182 |         if not 'cal' in net :
183 |             if net == 'net12':
184 |                 res = self.classifier.net_12_predict(win_buff,threshold)
185 |             elif net == 'net24':
186 |                 res = self.classifier.net_24_predict(win_buff,threshold)
187 |             elif net == 'net48':
188 |                 res = self.classifier.net_48_predict(win_buff,threshold)
189 |             else:
190 |                 return None
191 |             for i,idx in enumerate(idx_buff):
192 |                 is_face,prop = res[i]
193 |                 if is_face == 1.0:
194 |                     bboxes[idx][4] = prop
195 |                 else:
196 |                     bboxes[idx][4] = 0.0
197 |             return bboxes
198 |         else:
199 |             # forward the calibration net
200 |             if net == 'net12_cal':
201 |                 res = self.aligner.net_12_predict(win_buff)
202 |             elif net == 'net24_cal':
203 |                 res = self.aligner.net_24_predict(win_buff)
204 |             elif net == 'net48_cal':
205 |                 res = self.aligner.net_48_predict(win_buff)
206 |             else:
207 |                 return None
208 |             cali_scale = [1.20, 1.09, 1.0, 0.9, 0.82]
209 |             cali_off_x = [0.17, 0., -0.17]
210 |             cali_off_y = [0.17, 0., -0.17]
211 | 
212 |             for i,idx in enumerate(idx_buff):
213 |                 clss = res[i]
214 |                 h,w = win_buff[i].shape[:2]
215 |                 xmin,ymin,xmax,ymax = bboxes[idx][:4]
216 |                 xmin,ymin,xmax,ymax = xmin*w,ymin*h,xmax*w,ymax*h
217 |                 s = cali_scale[int(clss/(len(cali_off_x)*len(cali_off_y)))]
218 |                 x_off = cali_off_x[int(s/len(cali_off_y))]
219 |                 y_off = cali_off_y[int(s%len(cali_off_y))]
220 |                 new_xmin = xmin - x_off*(xmax-xmin)/s
221 |                 new_ymin = ymin - y_off*(ymax-ymin)/s
222 |                 new_xmax = new_xmin+(xmax-xmin)/s
223 |                 new_ymax = new_ymin+(ymax-ymin)/s
224 |                 bboxes[idx][:4] = new_xmin/w,new_ymin/h,new_xmax/w,new_ymax/h
225 |             return bboxes
226 |             
227 |     def non_max_sup(self,bboxes,iou_thresh = 0.5):
228 |         def overlap(box1,box2):
229 |             
230 |             # determine the coordinates of the intersection rectangle
231 |             in_xmin = max([box1[0],box2[0]])
232 |             in_ymin = max([box1[1],box2[1]])
233 |             in_xmax = min([box1[2],box2[2]])
234 |             in_ymax = min([box1[3],box2[3]])
235 | 
236 |             if in_xmax < in_xmin or in_ymax < in_ymin:
237 |                 return 0.0 , 0.0 , 0.0
238 | 
239 |             # compute the intersection area
240 |             intersection_area = (in_xmax - in_xmin) * (in_ymax - in_ymin)
241 | 
242 |             
243 |             # compute the area of both bboxes
244 |             box1_area = (box1[2]-box1[0])*(box1[3]-box1[1])
245 |             box2_area = (box2[2]-box2[0])*(box2[3]-box2[1])
246 | 
247 |             iou = intersection_area / float(box1_area + box2_area - intersection_area)
248 |             
249 |             box1_iou = intersection_area / float(box1_area)
250 |             box2_iou = intersection_area / float(box2_area)
251 |             
252 |             return iou,box1_iou,box2_iou
253 | 
254 |         # bboxes = [<xmin>,<ymin>,<xmax>,<ymax>,<prop>]
255 |         for i,bbox1 in enumerate(bboxes) :
256 |             if bbox1[4] < 0.0001 : continue
257 |             for j,bbox2 in enumerate(bboxes):
258 |                 if bbox2[4] < 0.0001 : continue
259 |                 if i==j : continue
260 |                 iou,box1_iou,box2_iou = overlap(bbox1,bbox2)
261 |                 bbox1_prop = bbox1[4]
262 |                 bbox2_prop = bbox2[4]
263 |                 # # inner box threshold
264 |                 # if box1_iou > 0.9:
265 |                 #     bbox1[4] = 0.0
266 |                 # elif box2_iou > 0.9:
267 |                 #     bbox2[4] = 0.0
268 |                 if iou >= iou_thresh:
269 |                     if bbox1_prop <= bbox2_prop:
270 |                         bbox1[4] = 0.0
271 |                     elif bbox1_prop > bbox2_prop:
272 |                         bbox2[4] = 0.0
273 |         return bboxes
274 | 
275 |     def img_pyramids(self,img):
276 |         # init the return list
277 |         # bbox = [<xmin>,<xmax>,<ymin>,<ymax>,<prop>]
278 |         bboxes = []
279 |         # slide a window across the image
280 |         def sliding_window(image, stepSize, windowSize):
281 |             for y in range(0, image.shape[0], stepSize):
282 |                 for x in range(0, image.shape[1], stepSize):
283 |                     # yield the current window
284 |                     yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
285 |         
286 |         # set sliding windows config
287 |         win_stride = self.win_stride
288 |         win_size = self.win_size
289 | 
290 |         # generate Gaussian pyramid for img
291 |         imgPyramids = [img.copy()]
292 |         for i in range(1, self.pyramid_t):
293 |             imgPyramids.append(cv2.pyrDown(imgPyramids[i - 1]))
294 |         # sliding all image from pyramids
295 |         for i in range(self.pyramid_t):
296 |             p_img = imgPyramids[i]
297 |             p_h,p_w = p_img.shape[:2]
298 |             for (x, y, window) in sliding_window(p_img, stepSize=win_stride, windowSize=win_size):
299 |                 # if the window does not meet our desired window size, ignore it
300 |                 if window.shape[0] != win_size[0] or window.shape[1] != win_size[1]:
301 |                     continue
302 |                 
303 |                 x,y = float(x),float(y)
304 |                 bbox = [x/p_w,y/p_h,(x+win_size[0])/p_w,(y+win_size[1])/p_h,-0.1]
305 |                 bboxes.append(bbox)
306 |         return bboxes
307 |     
308 | 
309 |         
310 | 
311 | def test_detect():
312 |     det_mod_path = 'models/48_net_223.ckpt'
313 |     cal_mod_path = 'models/48_cal_net_100.ckpt'
314 |     detector = Detector(det_mod_path,cal_mod_path)
315 | 
316 |     img_path = '/home/share/data/FDDB/2002/07/25/big/img_362.jpg'
317 |     img_path = '/home/share/data/FDDB/2002/07/25/big/img_1026.jpg'
318 |     img = cv2.imread(img_path)
319 |     h , w = img.shape[:2]
320 |     
321 |     bboxes = detector.detect(img)
322 |     import matplotlib.pyplot as plt
323 |     
324 |     for b in bboxes:
325 |         xmin,ymin,xmax,ymax,prop = b[:]
326 |         if prop > 0.5:
327 |             cv2.rectangle(img, (int(xmin*w), int(ymin*h)), (int(xmax*w), int(ymax*h)), (255, 0, 0), 2)
328 |     plt.imshow(img)
329 |     plt.show()
330 | 
331 | def test_predict():
332 |     def read_img(img_path,size = (48,48,3)):
333 |         img_file = tf.read_file(img_path)
334 |         img_decoded = tf.image.decode_jpeg(img_file, channels=size[2])
335 |         resized_image = tf.image.resize_images(img_decoded, [size[0], size[1]])
336 |         return resized_image
337 |     
338 |     det_mod_path = 'models/48_cal_net_100.ckpt'
339 |     detector = Aligner(det_mod_path)
340 | 
341 |     # list of nagative samples paths
342 |     neg_samples = []
343 |     # list of positive samples paths
344 |     pos_samples = []
345 | 
346 |     
347 |     img_paths = neg_samples + pos_samples
348 |     
349 | 
350 |     batch = tf.stack([read_img(p,(48,48,3)) for p in img_paths],0)
351 |     data1 = detector.sess.run(batch)
352 | 
353 |     data = np.array([cv2.cvtColor(cv2.resize(cv2.imread(p),(48,48), interpolation = cv2.INTER_AREA  ),cv2.COLOR_BGR2RGB) for p in img_paths]).astype(np.float32)
354 | 
355 |     print (detector.net_12_predict(data))
356 |     print (detector.net_12_predict(data))
357 | 
358 | if __name__ == "__main__":
359 |     # test_predict()
360 |     test_detect()
361 | 


--------------------------------------------------------------------------------