├── FaceFinder.py
├── README.md
├── demo.jpg
├── demo.py
├── demo_result.png
├── face_ds.py
├── face_model
└── tfac.py


/FaceFinder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for face detection.
  3 | Trains a small convolutional neural network for binary classification of an image as a face/non-face.
  4 | Uses a simple sliding window approach with variable sized windows to localize faces.
  5 | See demo.py for usage using the pre-trained model "face_model".
  6 | 
  7 | Author: Prithvijit Chakrabarty (prithvichakra@gmail.com)
  8 | """
  9 | 
 10 | import cv2
 11 | import tensorflow as tf
 12 | from tensorflow import nn
 13 | import tfac
 14 | import face_ds
 15 | import numpy as np
 16 | 
 17 | #Localization parameters
 18 | DET_SIZE = (300,300)    #Run all localization at a standard size
 19 | BLUR_DIM = (50,50)      #Dimension for blurring the face location mask
 20 | CONF_THRESH = 0.99      #Confidence threshold to mark a window as a face
 21 | 
 22 | X_STEP = 10     #Horizontal slide for the sliding window
 23 | Y_STEP = 10     #Vertical stride for the sliding window
 24 | WIN_MIN = 40    #Minimum sliding window size
 25 | WIN_MAX = 100   #Maximum sliding window size
 26 | WIN_STRIDE = 10   #Stride to increase the sliding window
 27 | 
 28 | #Build the net in the session
 29 | def build_net(sess):
 30 |     in_len = 32
 31 |     in_dep = 1
 32 | 
 33 |     x_hold = tf.placeholder(tf.float32,shape=[None,in_dep*in_len*in_len])
 34 |     y_hold = tf.placeholder(tf.float32,shape=[None,2])
 35 |     keep_prob = tf.placeholder(tf.float32)
 36 | 
 37 |     xt = tf.reshape(x_hold,[-1,in_len,in_len,in_dep])
 38 | 
 39 |     #Layer 1 - 5x5 convolution
 40 |     w1 = tfac.weight([5,5,in_dep,4])
 41 |     b1 = tfac.bias([4])
 42 |     c1 = nn.relu(nn.conv2d(xt,w1,strides=[1,2,2,1],padding='VALID')+b1)
 43 |     o1 = c1
 44 | 
 45 |     #Layer 2 - 3x3 convolution
 46 |     w2 = tfac.weight([3,3,4,16])
 47 |     b2 = tfac.bias([16])
 48 |     c2 = nn.relu(nn.conv2d(o1,w2,strides=[1,2,2,1],padding='VALID')+b2)
 49 |     o2 = c2
 50 | 
 51 |     #Layer 3 - 3x3 convolution
 52 |     w3 = tfac.weight([3,3,16,32])
 53 |     b3 = tfac.bias([32])
 54 |     c3 = nn.relu(nn.conv2d(o2,w3,strides=[1,1,1,1],padding='VALID')+b3)
 55 |     o3 = c3
 56 | 
 57 |     dim = 32 * 4*4
 58 |         
 59 |     #Fully connected layer - 600 units
 60 |     of = tf.reshape(o3,[-1,dim])
 61 |     w4 = tfac.weight([dim,600])
 62 |     b4 = tfac.bias([600])
 63 |     o4 = nn.relu(tf.matmul(of,w4)+b4)
 64 | 
 65 |     o4 = nn.dropout(o4, keep_prob)
 66 | 
 67 |     #Output softmax layer - 2 units
 68 |     w5 = tfac.weight([600,2])
 69 |     b5 = tfac.bias([2])
 70 |     y = nn.softmax(tf.matmul(o4,w5)+b5)
 71 | 
 72 |     sess.run(tf.initialize_all_variables())
 73 | 
 74 |     return y,x_hold,y_hold,keep_prob
 75 | 
 76 | #Method to run the training
 77 | def train_net():
 78 |     train,val,test = face_ds.load_find_ds()
 79 |     sess = tfac.start_sess()
 80 |     y,x_hold,y_hold,keep_prob = build_net(sess)
 81 |     acc = tfac.train(sess,
 82 |                     y,
 83 |                     x_hold,
 84 |                     y_hold,
 85 |                     keep_prob,
 86 |                     train[0],train[1],
 87 |                     test[0],test[1],
 88 |                     lrate=1e-4,
 89 |                     epsilon=1e-16,
 90 |                     n_epoch=8,
 91 |                     batch_size=100,
 92 |                     print_epoch=1,
 93 |                     save_path=model_path)
 94 |     print "Accuracy:",acc
 95 |     sess.close()
 96 | 
 97 | #Basic sliding window detector to find faces
 98 | #Returns an image showing only the faces along with the sliding window mask (before blurring)
 99 | def localize(img,model_path):
100 |     sess = tfac.start_sess()
101 |     y,x_hold,y_hold,keep_prob = build_net(sess)
102 |     saver = tf.train.Saver()
103 |     saver.restore(sess,model_path)
104 | 
105 |     #Run all detection at a fixed size
106 |     img = cv2.resize(img,DET_SIZE)
107 |     mask = np.zeros(img.shape)
108 |     #Run sliding windows of different sizes
109 |     for bx in range(WIN_MIN,WIN_MAX,WIN_STRIDE):
110 |         by = bx
111 |         for i in xrange(0, img.shape[1]-bx, X_STEP):
112 |             for j in xrange(0, img.shape[0]-by, Y_STEP):
113 |                 sub_img = cv2.resize(img[i:i+bx,j:j+by],face_ds.IN_SIZE)
114 |                 X = sub_img.reshape((1,tfac.dim_prod(face_ds.IN_SIZE)))
115 |                 out = y.eval(session=sess,feed_dict={x_hold:X,keep_prob:1})[0]
116 |                 if out[0] >= CONF_THRESH:
117 |                     mask[i:i+bx,j:j+by] = mask[i:i+bx,j:j+by]+1
118 | 
119 |     sess.close()
120 |     mask = np.uint8(255*mask/np.max(mask))
121 |     faces = img*(cv2.threshold(cv2.blur(mask,BLUR_DIM),0,255,cv2.THRESH_OTSU)[1]/255)
122 |     return (faces,mask)
123 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FaceDetect
 2 | 
 3 | This is a module for face detection with convolutional neural networks (CNNs). It uses a small CNN as a binary classifier to distinguish between faces and non-faces. A simple sliding window (with multiple windows of varying size) is used to locaize the faces in the image.
 4 | 
 5 | **Requirements**
 6 |     
 7 |     1. TensorFlow
 8 |     2. OpenCV for Python
 9 | 
10 | **Network topology**
11 | 
12 | The network consists of 3 convolution layers
13 | 
14 |     Input: 32x32 black and white image
15 |     1. Layer 1: 5x5 convolutions
16 |                 4 feature maps
17 |     2. Layer 2: 3x3 convolutions
18 |                 16 feature maps
19 |     3. Layer 3: 3x3 feature maps
20 |                 32 feature maps
21 |     Layer 3 outputs 32 4x4 feature maps
22 |     4. Layer 4: Fully connected layer
23 |                 600 units
24 |     5. Layer 5: Softmax layer
25 |                 2 units
26 | 
27 | **Training parameters**
28 | 
29 | The network was trained with TensorFlow's AdamOptimzer 
30 |     
31 |     lrate: 1e-4
32 |     epsilon: 1e-16
33 |     mini-batch size: 100
34 |     number of epochs: 8
35 | 
36 | The validation accuracy was 98.762% and the final accuracy on the test set was 98.554%.
37 | 
38 | **Dataset**
39 | 
40 | Positive samples (images of faces) for the classification were taken from 2 sources:
41 | 
42 |     1. Cropped labelled faces in the wild (http://conradsanderson.id.au/lfwcrop/)
43 |     2. MIT CBCL face recognition database (http://cbcl.mit.edu/software-datasets/heisele/facerecognition-database.html)
44 | 
45 | The horizontal mirror images of these images were included in the dataset.
46 | 
47 | Negative samples (non-faces) were taken from 4 sources:
48 | 
49 |     1. Fifteen scene categories (http://www-cvr.ai.uiuc.edu/ponce_grp/data/)
50 |     2. Texture database (http://www-cvr.ai.uiuc.edu/ponce_grp/data/)
51 |     3. Caltech cars (Rear) background dataset (http://www.robots.ox.ac.uk/~vgg/data3.html)
52 |     4. Caltech houses dataset (http://www.robots.ox.ac.uk/~vgg/data3.html)
53 |     
54 | Random snapshots from these images were generated by taking sub-images of a random lengths at random positions in the images. These snapshots were mixed in the dataset along with the complete images.
55 | 
56 | The final dataset consists of the 32,000 images from each class (positive and negative). Training, validation and test sets were generated from this with a 0.6 split for training, 0.2 for validation and 0.2 for test. Each of these sets have 50% positive and 50% negative samples.
57 | 
58 | **Localization**
59 | 
60 | The module uses a simple sliding window localizer. The input image is reshaped to (300,300). Square windows of side lengths 40,50... 100 are slid along the image. Each sub image seen through the window is reshaped to (32,32) and fed to the network. If the sub image is a face with a minimum confidence of 0.99, the window is marked in the mask. After running all different sized windows on the image, the final mask is blurred with a 50x50 Gaussian filter and binarized. This final binarized mask is used to extract only the faces from the image. The localizer returns two images: an image with only the faces and the raw mask (before blurring and binarization).
61 | 
62 | **Demo**
63 | 
64 | The repo includes a pre-trained model: face_model. This can directly be used for localization. Sample usage of this model with FaceDetect.py can be seen in demo.py. Running the demo should display the result of running the localizer on demo.jpg.
65 | Demos with other images can be seen here: https://youtu.be/N4GIGVnyNBo 
66 | 
67 | Output of demo.py:
68 | ![Alt text](demo_result.png?raw=true)
69 | 


--------------------------------------------------------------------------------
/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PCJohn/FaceDetect/d4b624b95244b32e1241a4f3532fd8e386381519/demo.jpg


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Demo for the face detection. Runs the sliding window detector on demo.jpg
 3 | Displays the input image, the localized faces and the sliding window mask
 4 | 
 5 | Author: Prithvijit Chakrabarty (prithvichakra@gmail.com)
 6 | """
 7 | 
 8 | import FaceFinder
 9 | import cv2
10 | 
11 | model_path = 'face_model'
12 | img = cv2.imread("demo.jpg",0)
13 | faces,mask = FaceFinder.localize(img,model_path)
14 | cv2.imshow("faces",faces)
15 | cv2.imshow("sliding window mask",mask)
16 | cv2.imshow("input image",img)
17 | cv2.waitKey(0)
18 | cv2.destroyAllWindows()
19 | 


--------------------------------------------------------------------------------
/demo_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PCJohn/FaceDetect/d4b624b95244b32e1241a4f3532fd8e386381519/demo_result.png


--------------------------------------------------------------------------------
/face_ds.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import os
 3 | import numpy as np
 4 | import cv2
 5 | import random
 6 | import requests
 7 | 
 8 | FACE_PATH = "/home/prithvi/dsets/Faces/positive/"
 9 | NON_FACE_PATH = "/home/prithvi/dsets/Faces/negative/"
10 | IN_SIZE = (32,32)   #Input dimensions of image for the network
11 | SNAP_COUNT = 5      #Number of random snapshots per non-face image
12 | MIN_LEN = 10        #Minimum length for the ranom snaphsots of non-faces
13 | GOOD = [1,0]        #Vector output for faces
14 | BAD = [0,1]         #Vector output for non-faces
15 | 
16 | FACE_COUNT = 36000  #Number of images of each class (positive and negative) in the dataset
17 | TRAIN_SPLIT = int(0.6*FACE_COUNT)
18 | VAL_SPLIT = int(0.2*FACE_COUNT) + TRAIN_SPLIT
19 | 
20 | #Method to generate multiple snapshots from an image
21 | def rand_snap(img):
22 |     r = []
23 |     x = img.shape[0]
24 |     y = img.shape[1]
25 |     #Generate 5 snapshots of different sizes
26 |     for i in range(SNAP_COUNT):
27 |         snap_size = max([MIN_LEN,int(random.random()*200)])
28 |         fx = int(random.random()*(x-snap_size))
29 |         fy = int(random.random()*(y-snap_size))
30 |         snap = img[fx:fx+snap_size,fy:fy+snap_size]
31 |         r.append(cv2.resize(snap,IN_SIZE))
32 |     return r
33 | 
34 | #Load the dataset for face/non face classification
35 | def load_find_ds():
36 |     ds = []
37 |     #Load faces (positive samples)
38 |     for n in os.listdir(FACE_PATH):
39 |         name = FACE_PATH+n
40 |         for img_path in os.listdir(name):
41 |             t_img = cv2.resize(cv2.imread(name+"/"+img_path,0),IN_SIZE)
42 |             ds.append((t_img, GOOD))
43 |             ds.append((cv2.flip(t_img,1),GOOD)) #Use the horizontal mirror image
44 |     random.shuffle(ds)
45 |     ds = ds[:FACE_COUNT] 
46 |     #Load non-faces (negative samples) from dataset
47 |     nface_ds = []
48 |     for n in os.listdir(NON_FACE_PATH):
49 |         name = NON_FACE_PATH+n
50 |         for img_path in os.listdir(name):
51 |             t_img = cv2.imread(name+"/"+img_path,0)
52 |             nface_ds.extend([(r,BAD) for r in rand_snap(t_img)])
53 |             nface_ds.append((cv2.resize(t_img, IN_SIZE),BAD))
54 |     random.shuffle(nface_ds)
55 |     nface_ds = nface_ds[:FACE_COUNT]
56 | 
57 |     #Make the train, val and test sets: Ensure 50% for each set
58 |     train = ds[:TRAIN_SPLIT]
59 |     train.extend(nface_ds[:TRAIN_SPLIT])
60 |     random.shuffle(train)
61 |     val = ds[TRAIN_SPLIT:VAL_SPLIT]
62 |     val.extend(nface_ds[TRAIN_SPLIT:VAL_SPLIT])
63 |     random.shuffle(val)
64 |     test = ds[TRAIN_SPLIT:]
65 |     test.extend(nface_ds[TRAIN_SPLIT:])
66 |     random.shuffle(test)
67 | 
68 |     trainX,trainY = map(np.array,zip(*train))
69 |     valX,valY = map(np.array,zip(*val))
70 |     testX,testY = map(np.array,zip(*test))
71 | 
72 |     return ((trainX,trainY),(valX,valY),(testX,testY))
73 | 


--------------------------------------------------------------------------------
/face_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PCJohn/FaceDetect/d4b624b95244b32e1241a4f3532fd8e386381519/face_model


--------------------------------------------------------------------------------
/tfac.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Accessory methods for using TensorFlow -- Mostly taken out from the TensorFlow tutorials!
 3 | 
 4 | Author: Prithvijit Chakrabarty (prithvichakra@gmail.com)
 5 | """
 6 | 
 7 | import tensorflow as tf
 8 | import numpy as np
 9 | import random
10 | 
11 | #Make weight and bias variables -- From the TensorFlow tutorial
12 | def weight(shape):
13 |     intial = tf.truncated_normal(shape, stddev=0.1)
14 |     return tf.Variable(intial)
15 | 
16 | def bias(shape):
17 |     intial = tf.constant(0.1, shape=shape)
18 |     return tf.Variable(intial)
19 | 
20 | #Finds the product of a dimension tuple to find the total legth
21 | def dim_prod(dim_arr):
22 |     return np.prod([d for d in dim_arr if d != None])
23 | 
24 | #Start a TensorFlow session
25 | def start_sess():
26 |     config = tf.ConfigProto()
27 |     config.gpu_options.allocator_type = 'BFC'
28 |     sess = tf.Session(config=config)
29 |     return sess
30 | 
31 | #Train the model
32 | def train(sess, y, x_hold, y_hold, keep_prob, X, Y, valX, valY, lrate=0.5, epsilon=1e-8, n_epoch=100, batch_size=10, print_epoch=100, save_path=None):
33 |     cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_hold*tf.log(y+1e-10), reduction_indices=[1]))
34 |     train_step = tf.train.AdamOptimizer(learning_rate=lrate,epsilon=epsilon).minimize(cross_entropy)
35 |     correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_hold,1))
36 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
37 |     #Flatten the input images for the placeholder
38 |     flat_len = dim_prod(x_hold._shape_as_list())
39 |     X = X.reshape((X.shape[0],flat_len))
40 | 
41 |     print 'Starting training session...'
42 | 
43 |     sess.run(tf.initialize_all_variables())
44 |     batch_num = 0
45 |     batches = batchify(X,Y,batch_size)
46 |     print 'Number of batches:',len(batches)
47 |     for i in range(n_epoch):
48 |         avg_acc = 0
49 |         random.shuffle(batches)
50 |         for batchX,batchY in batches:
51 |             avg_acc = avg_acc + accuracy.eval(session=sess, feed_dict={x_hold:batchX, y_hold:batchY, keep_prob:1})
52 |             train_step.run(session=sess,feed_dict={x_hold:batchX, y_hold:batchY, keep_prob:0.5})
53 |         print 'Epoch '+str(i)+': '+str(avg_acc/len(batches))
54 |     if (not valX is None) & (not valY is None):
55 |         #Validation
56 |         valX = valX.reshape((valX.shape[0],flat_len))
57 |         val_accuracy = accuracy.eval(session=sess,feed_dict={x_hold:valX, y_hold:valY, keep_prob:1})
58 |         print 'Val acc:',val_accuracy
59 | 
60 |     if not save_path is None:
61 |         saver = tf.train.Saver(tf.all_variables())
62 |         saver.save(sess,save_path)
63 |         merged = tf.merge_all_summaries()
64 |         writer = tf.train.SummaryWriter(save_path+'_graph',sess.graph)
65 |         writer.flush()
66 |         writer.close()
67 |         print 'Model saved'
68 |     return val_accuracy
69 | 
70 | #Test a model
71 | def test(sess, X, Y, model_path):
72 |     correct_prediction = tf.equal(tf.argmax(self.net,1), tf.argmax(self.y_hold,1))
73 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
74 |     saver = tf.train.Saver()
75 |     sess.run(tf.initialize_all_variables())
76 |     saver.restore(sess,model_path)
77 |     X = X.reshape((X.shape[0],X.shape[1]*X.shape[2]))
78 |     test_accuracy = accuracy.eval(session=sess,feed_dict={x_hold:X,y_hold:Y,keep_prob:1})
79 |     return test_accuracy
80 | 
81 | #Split to mini batches
82 | def batchify(X, Y, batch_size):
83 |     batches = [(X[i:i+batch_size],Y[i:i+batch_size]) for i in xrange(0,X.shape[0],batch_size)]
84 |     random.shuffle(batches)
85 |     return batches
86 | 


--------------------------------------------------------------------------------