├── README.md ├── data_prep.py ├── main.py ├── main_prediction.py ├── model.py ├── model_prediction.py ├── new_files ├── data_prep.py ├── irp.py ├── lnms.py ├── main.py ├── model.py ├── selective_search.py ├── split_tf_record.py └── vis.py ├── results_analysis.py ├── selective_search.py ├── split_tf_record.py ├── test_results.npy ├── truth.npy ├── version_0.0.txt └── with SPN ├── logs └── events.out.tfevents.1494397553.shashanks-mbp.dynamic.ucsd.edu ├── main.py ├── model.py ├── model.pyc ├── spatial_transformer.py └── spatial_transformer.pyc /README.md: -------------------------------------------------------------------------------- 1 | # HyperFace 2 | 3 | A TensorFlow implementation of the following paper: 4 | 5 | HyperFace: A Deep Multi-task Learning Framework for Face Detection, Landmark Localization, Pose Estimation, and Gender Recognition (https://arxiv.org/abs/1603.01249) 6 | 7 | -------------------------------------------------------------------------------- /data_prep.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | #from skimage import io 4 | import sqlite3 5 | #import cv2 6 | import matplotlib.pyplot as plt 7 | import os 8 | import random 9 | from tqdm import tqdm 10 | 11 | # select_string = "faceimages.filepath, faces.face_id, facepose.roll, facepose.pitch, facepose.yaw, facerect.x, facerect.y, facerect.w, facerect.h" 12 | # from_string = "faceimages, faces, facepose, facerect" 13 | # where_string = "faces.face_id = facepose.face_id and faces.file_id = faceimages.file_id and faces.face_id = facerect.face_id" 14 | # query_string = "SELECT " + select_string + " FROM " + from_string + " WHERE " + where_string 15 | 16 | # conn = sqlite3.connect('/home/shashank/Documents/CSE-252C/AFLW/aflw/data/aflw.sqlite') 17 | # c = conn.cursor() 18 | 19 | img_path = '/home/shashank/Documents/CSE-252C/AFLW/' 20 | 21 | # tfrecords_train_filename = 'aflw_train.tfrecords' 22 | # tfrecords_test_filename = 'aflw_test.tfrecords' 23 | tfrecords_filename = 'aflw_train.tfrecords' 24 | # writer_train = tf.python_io.TFRecordWriter(tfrecords_train_filename) 25 | # writer_test = tf.python_io.TFRecordWriter(tfrecords_test_filename) 26 | 27 | def _bytes_feature(value): 28 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 29 | 30 | def _float_feature(value): 31 | return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) 32 | 33 | def _int64_feature(value): 34 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 35 | 36 | def test_names(): 37 | l=[] 38 | names = os.listdir(img_path+'0') 39 | random.shuffle(names) 40 | l.append(['0/'+name for name in names[:300]]) 41 | 42 | names = os.listdir(img_path+'2') 43 | random.shuffle(names) 44 | l.append(['2/'+name for name in names[:300]]) 45 | 46 | names = os.listdir(img_path+'3') 47 | random.shuffle(names) 48 | l.append(['3/'+name for name in names[:400]]) 49 | 50 | return l[0]+l[1]+l[2] 51 | 52 | def make_tfrecord(test_images): 53 | 54 | it_test =0 55 | it_train = 0 56 | 57 | for row in c.execute(query_string): 58 | ''' 59 | row[0] = image path str 60 | row[1] = face id int 61 | row[2] = roll float 62 | row[3] = pitch float 63 | row[4] = yaw float 64 | row[5] = x int 65 | row[6] = y int 66 | row[7] = w int 67 | row[8] = h int 68 | ''' 69 | 70 | try: 71 | img_raw = np.asarray(io.imread(img_path+row[0])) 72 | w = img_raw.shape[1] 73 | h = img_raw.shape[0] 74 | 75 | img_raw = img_raw.tostring() 76 | example = tf.train.Example(features=tf.train.Features(feature={ 77 | 'image_raw':_bytes_feature(img_raw), 78 | 'width': _int64_feature(w), 79 | 'height': _int64_feature(h), 80 | 'face_id': _int64_feature(row[1]), 81 | 'roll': _float_feature(row[2]), 82 | 'pitch': _float_feature(row[3]), 83 | 'yaw': _float_feature(row[4]), 84 | 'loc_x': _int64_feature(row[5]), 85 | 'loc_y': _int64_feature(row[6]), 86 | 'loc_w': _int64_feature(row[7]), 87 | 'loc_h': _int64_feature(row[8]) 88 | })) 89 | 90 | if row[0] in test_images: 91 | writer_test.write(example.SerializeToString()) 92 | it_test += 1 93 | else: 94 | writer_train.write(example.SerializeToString()) 95 | it_train += 1 96 | 97 | except: 98 | print row[0] 99 | 100 | if it_train > 50: 101 | break 102 | print it_test,it_train 103 | c.close() 104 | writer_train.close() 105 | writer_test.close() 106 | 107 | def extract_tfrecord(session): 108 | record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename) 109 | save_data = None 110 | save_euler = [] 111 | for string_record in record_iterator: 112 | example = tf.train.Example() 113 | example.ParseFromString(string_record) 114 | 115 | img_string = example.features.feature['image_raw'].bytes_list.value[0] 116 | img_width = int(example.features.feature['width'].int64_list.value[0]) 117 | img_height = int(example.features.feature['height'].int64_list.value[0]) 118 | img_1d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3) 119 | loc_x = int(example.features.feature['loc_x'].int64_list.value[0]) 120 | loc_y = int(example.features.feature['loc_y'].int64_list.value[0]) 121 | loc_w = int(example.features.feature['loc_w'].int64_list.value[0]) 122 | loc_h = int(example.features.feature['loc_h'].int64_list.value[0]) 123 | roll = float(example.features.feature['roll'].float_list.value[0]) 124 | yaw = float(example.features.feature['yaw'].float_list.value[0]) 125 | pitch = float(example.features.feature['pitch'].float_list.value[0]) 126 | 127 | boxes = np.asarray([[loc_y/float(img_height),loc_x/float(img_width),(loc_y+loc_h)/float(img_height),(loc_x+loc_w)/float(img_width)]]) 128 | resized_and_cropped_image = tf.image.crop_and_resize(img_1d[np.newaxis,:,:,:].astype(np.float32), boxes.astype(np.float32), [0]*1, crop_size=[227,227]) 129 | if save_data is not None: 130 | save_data = np.concatenate([save_data,resized_and_cropped_image.eval(session=session)],axis=0) 131 | else: 132 | save_data = resized_and_cropped_image.eval(session=session) 133 | save_euler.append([roll,yaw,pitch]) 134 | 135 | np.save('truth_data.npy',save_data) 136 | np.save('annotations.npy',np.asarray(save_euler)) 137 | 138 | # cv2.rectangle(img_1d,(loc_x,loc_y),(loc_x+loc_w,loc_y+loc_h),(0,255,0),3) 139 | # cv2.imshow('result',img_1d) 140 | # cv2.waitKey(0) 141 | 142 | 143 | if __name__ == '__main__': 144 | #test_images = test_names() 145 | #make_tfrecord(test_images) 146 | session = tf.Session() 147 | extract_tfrecord(session) 148 | 149 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from model import * 4 | 5 | 6 | 7 | if not os.path.exists('../logs'): 8 | os.makedirs('../logs') 9 | 10 | if not os.path.exists('../checkpoint'): 11 | os.makedirs('../checkpoint') 12 | 13 | if not os.path.exists('../best_checkpoint'): 14 | os.makedirs('../best_checkpoint') 15 | 16 | map(os.unlink, (os.path.join( '../logs',f) for f in os.listdir('../logs')) ) 17 | 18 | net = HyperFace(True, tf_record_file_path='../../aflw_train_new.tfrecords',model_save_path='../checkpoint/',best_model_save_path='../best_checkpoint/', 19 | restore_model_path='../full_best_checkpoint/') 20 | 21 | with tf.Session() as sess: 22 | print 'Building Graph...' 23 | net.build_network(sess) 24 | print 'Graph Built!' 25 | # net.print_variables() 26 | # net.load_weights(weights_path) 27 | net.train() 28 | 29 | -------------------------------------------------------------------------------- /main_prediction.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from model_prediction import * 4 | 5 | 6 | 7 | if not os.path.exists('../logs'): 8 | os.makedirs('../logs') 9 | 10 | if not os.path.exists('../checkpoint'): 11 | os.makedirs('../checkpoint') 12 | 13 | if not os.path.exists('../best_checkpoint'): 14 | os.makedirs('../best_checkpoint') 15 | 16 | map(os.unlink, (os.path.join( '../logs',f) for f in os.listdir('../logs')) ) 17 | 18 | net = HyperFace(True, tf_record_file_path='./aflw_train_small_check.tfrecords',model_save_path='../checkpoint/',best_model_save_path='../best_checkpoint/', 19 | restore_model_path='../full_best_checkpoint/') 20 | 21 | with tf.Session() as sess: 22 | print 'Building Graph...' 23 | net.build_network(sess) 24 | print 'Graph Built!' 25 | # net.print_variables() 26 | # net.load_weights('/Users/shashank/TensorFlow/SPN/weights/') 27 | net.predict() 28 | # net.train() 29 | 30 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | from tqdm import tqdm 5 | from pdb import set_trace as brk 6 | import sys 7 | 8 | class HyperFace(object): 9 | 10 | def __init__(self,load_model,tf_record_file_path=None,model_save_path=None,best_model_save_path=None,restore_model_path=None): 11 | 12 | self.batch_size = 32 13 | self.img_height = 227 14 | self.img_width = 227 15 | self.channel = 3 16 | 17 | self.num_epochs =10 18 | 19 | # Hyperparameters 1,5,0.5,5,2 20 | self.weight_detect = 1 21 | self.weight_landmarks = 5 22 | self.weight_visibility = 0.5 23 | self.weight_pose = 5 24 | self.weight_gender = 2 25 | 26 | #tf_Record Paramters 27 | self.tf_record_file_path = tf_record_file_path 28 | self.filename_queue = tf.train.string_input_producer([self.tf_record_file_path], num_epochs=self.num_epochs) 29 | self.images, self.labels, self.land, self.vis, self.po, self.gen= self.load_from_tfRecord(self.filename_queue) 30 | 31 | self.model_save_path = model_save_path 32 | self.best_model_save_path = best_model_save_path 33 | self.restore_model_path = restore_model_path 34 | 35 | self.save_after_steps = 200 36 | self.print_after_steps = 50 37 | self.load_model = load_model 38 | 39 | 40 | def build_network(self, sess): 41 | 42 | self.sess = sess 43 | 44 | self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images') 45 | self.detection = tf.placeholder(tf.int32, [self.batch_size], name='detection') 46 | self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks') 47 | self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility') 48 | self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose') 49 | self.gender = tf.placeholder(tf.int32, [self.batch_size], name='gender') 50 | 51 | net_output = self.network(self.X) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender) 52 | self.test_model = net_output 53 | self.loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=net_output[0], labels=tf.one_hot(self.detection, 2))) 54 | 55 | detection_mask = tf.cast(tf.expand_dims(self.detection, axis=1),tf.float32) 56 | 57 | visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1]) 58 | self.loss_landmarks = tf.reduce_mean(tf.square(detection_mask*visibility_mask*(net_output[1] - self.landmarks))) 59 | 60 | self.loss_visibility = tf.reduce_mean(tf.square(detection_mask*(net_output[2] - self.visibility))) 61 | self.loss_pose = tf.reduce_mean(tf.square(detection_mask*(net_output[3] - self.pose))) 62 | self.loss_gender = tf.reduce_mean(detection_mask*tf.nn.sigmoid_cross_entropy_with_logits(logits=net_output[4], labels=tf.one_hot(self.gender,2))) 63 | 64 | 65 | self.loss = self.weight_detect*self.loss_detection + self.weight_landmarks*self.loss_landmarks \ 66 | + self.weight_visibility*self.loss_visibility + self.weight_pose*self.loss_pose \ 67 | + self.weight_gender*self.loss_gender 68 | 69 | self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(tf.argmax(net_output[0],1),tf.int32),self.detection),tf.float32)) 70 | 71 | #self.loss = self.loss_detection 72 | #self.optimizer = tf.train.AdamOptimizer(1e-7).minimize(self.loss) 73 | self.optimizer = tf.train.MomentumOptimizer(1e-3,0.9,use_nesterov=True).minimize(self.loss) 74 | self.saver = tf.train.Saver(max_to_keep=4, keep_checkpoint_every_n_hours=4) 75 | self.best_saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=4) 76 | 77 | 78 | def train(self): 79 | 80 | 81 | if self.load_model: 82 | print "Restoring Model" 83 | ckpt = tf.train.get_checkpoint_state(self.restore_model_path) 84 | if ckpt and ckpt.model_checkpoint_path: 85 | self.saver.restore(self.sess,ckpt.model_checkpoint_path) 86 | self.sess.run(tf.local_variables_initializer()) 87 | else: 88 | print "Initializing Model" 89 | self.sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())) 90 | 91 | #self.load_det_weights(self.restore_model_path+'weights.npy') 92 | 93 | 94 | coord = tf.train.Coordinator() 95 | threads = tf.train.start_queue_runners(sess=self.sess,coord=coord) 96 | 97 | writer = tf.summary.FileWriter('../logs', self.sess.graph) 98 | loss_summ = tf.summary.scalar('loss', self.loss) 99 | img_summ = tf.summary.image('images', self.images, max_outputs=5) 100 | label_summ = tf.summary.histogram('labels', self.detection) 101 | detect_summ = tf.summary.scalar('det_loss', self.loss_detection) 102 | landmarks_summ = tf.summary.scalar('landmarks_loss', self.loss_landmarks) 103 | vis_summ = tf.summary.scalar('visibility_loss', self.loss_visibility) 104 | pose_summ = tf.summary.scalar('pose_loss', self.loss_pose) 105 | gender_summ = tf.summary.scalar('gender_loss', self.loss_gender) 106 | 107 | summ_op = tf.summary.merge_all() 108 | 109 | counter = 0 110 | best_loss = sys.maxint 111 | try: 112 | while not coord.should_stop(): 113 | batch_imgs, batch_labels, batch_landmarks, batch_visibility, batch_pose, batch_gender = self.sess.run([self.images,self.labels,self.land, self.vis, self.po, self.gen]) 114 | batch_imgs = (batch_imgs - 127.5) / 128.0 115 | input_feed={self.X: batch_imgs, self.detection: batch_labels, self.landmarks: batch_landmarks, self.visibility: batch_visibility, self.pose: batch_pose, self.gender: np.squeeze(batch_gender)} 116 | #input_feed={self.X: batch_imgs, self.detection: batch_labels} 117 | 118 | _,model_op,loss,l_d,l_l,l_v,l_p,l_g, summ, accuracy = self.sess.run([self.optimizer,self.test_model,self.loss,self.loss_detection, 119 | self.loss_landmarks,self.loss_visibility,self.loss_pose,self.loss_gender, summ_op, self.accuracy], input_feed) 120 | 121 | writer.add_summary(summ, counter) 122 | 123 | if counter % self.save_after_steps == 0: 124 | self.saver.save(self.sess,self.model_save_path+'hyperface_model',global_step=int(counter),write_meta_graph=False) 125 | 126 | 127 | if loss <= best_loss: 128 | best_loss = loss 129 | self.best_saver.save(self.sess,self.best_model_save_path+'hyperface_best_model',global_step=int(counter),write_meta_graph=False) 130 | #self.save_weights(self.best_model_save_path) 131 | 132 | if counter % self.print_after_steps == 0: 133 | print "Iteration:{},Total Loss:{},Detection loss:{},Landmark loss:{},Visbility Loss :{},Pose Loss:{},Gender Loss:{},Accuracy:{}".format(counter,loss,l_d,l_l,l_v,l_p,l_g,accuracy) 134 | 135 | counter += 1 136 | 137 | except tf.errors.OutOfRangeError: 138 | print('Done training -- epoch limit reached') 139 | finally: 140 | coord.request_stop() 141 | 142 | coord.join(threads) 143 | 144 | 145 | 146 | 147 | 148 | def network_det(self,inputs,reuse=False): 149 | 150 | if reuse: 151 | tf.get_variable_scope().reuse_variables() 152 | 153 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 154 | activation_fn = tf.nn.relu, 155 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): 156 | 157 | conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') 158 | max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') 159 | 160 | conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') 161 | max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') 162 | conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') 163 | 164 | conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') 165 | conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') 166 | pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') 167 | 168 | shape = int(np.prod(pool5.get_shape()[1:])) 169 | fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6') 170 | 171 | fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1') 172 | out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None) 173 | 174 | return out_detection 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | def network(self,inputs,reuse=False): 183 | 184 | if reuse: 185 | tf.get_variable_scope().reuse_variables() 186 | 187 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 188 | activation_fn = tf.nn.relu, 189 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ): 190 | 191 | conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') 192 | max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') 193 | 194 | conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a') 195 | 196 | conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') 197 | max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') 198 | conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') 199 | 200 | conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a') 201 | 202 | conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') 203 | conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') 204 | pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') 205 | 206 | concat_feat = tf.concat([conv1a, conv3a, pool5],3) 207 | conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all') 208 | 209 | shape = int(np.prod(conv_all.get_shape()[1:])) 210 | fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full') 211 | 212 | fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1') 213 | fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1') 214 | fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1') 215 | fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1') 216 | fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1') 217 | 218 | out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn = None) 219 | out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn = None ) 220 | out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn = None) 221 | out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn = None) 222 | out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn = None) 223 | 224 | return [out_detection, out_landmarks, out_visibility, out_pose, out_gender] 225 | 226 | 227 | 228 | def predict(self, imgs_path): 229 | print 'Running inference...' 230 | np.set_printoptions(suppress=True) 231 | imgs = (np.load(imgs_path) - 127.5)/128.0 232 | shape = imgs.shape 233 | self.X = tf.placeholder(tf.float32, [shape[0], self.img_height, self.img_width, self.channel], name='images') 234 | pred = self.network(self.X, reuse = True) 235 | 236 | net_preds = self.sess.run(pred, feed_dict={self.X: imgs}) 237 | 238 | print 'gender: \n', net_preds[-1] 239 | import matplotlib.pyplot as plt 240 | plt.imshow(imgs[-1]);plt.show() 241 | 242 | 243 | 244 | def load_from_tfRecord(self,filename_queue): 245 | 246 | reader = tf.TFRecordReader() 247 | _, serialized_example = reader.read(filename_queue) 248 | 249 | features = tf.parse_single_example( 250 | serialized_example, 251 | features={ 252 | 'image_raw':tf.FixedLenFeature([], tf.string), 253 | 'width': tf.FixedLenFeature([], tf.int64), 254 | 'height': tf.FixedLenFeature([], tf.int64), 255 | 'pos_locs':tf.FixedLenFeature([], tf.string), 256 | 'neg_locs':tf.FixedLenFeature([], tf.string), 257 | 'n_pos_locs':tf.FixedLenFeature([], tf.int64), 258 | 'n_neg_locs':tf.FixedLenFeature([], tf.int64), 259 | 'gender':tf.FixedLenFeature([], tf.int64), 260 | 'pose': tf.FixedLenFeature([], tf.string), 261 | 'landmarks':tf.FixedLenFeature([], tf.string), 262 | 'visibility':tf.FixedLenFeature([], tf.string), 263 | 264 | }) 265 | 266 | landmarks = tf.decode_raw(features['landmarks'], tf.float32) 267 | pose = tf.decode_raw(features['pose'], tf.float32) 268 | visibility = tf.decode_raw(features['visibility'], tf.int32) 269 | gender = tf.cast(features['gender'], tf.int32) 270 | 271 | landmarks_shape = tf.stack([1,21*2]) 272 | pose_shape = tf.stack([1,3]) 273 | visibility_shape = tf.stack([1,21]) 274 | gender_shape = tf.stack([1,1]) 275 | 276 | landmarks = tf.reshape(landmarks,landmarks_shape) 277 | visibility = tf.reshape(visibility,visibility_shape) 278 | pose = tf.reshape(pose,pose_shape) 279 | gender = tf.reshape(gender,gender_shape) 280 | 281 | image = tf.decode_raw(features['image_raw'], tf.uint8) 282 | pos_locs = tf.decode_raw(features['pos_locs'], tf.float32) 283 | neg_locs = tf.decode_raw(features['neg_locs'], tf.float32) 284 | 285 | orig_height = tf.cast(features['height'], tf.int32) 286 | orig_width = tf.cast(features['width'], tf.int32) 287 | n_pos_locs = tf.cast(features['n_pos_locs'], tf.int32) 288 | n_neg_locs = tf.cast(features['n_neg_locs'], tf.int32) 289 | 290 | image_shape = tf.stack([1,orig_height,orig_width,3]) 291 | image = tf.cast(tf.reshape(image,image_shape),tf.float32) 292 | 293 | pos_locs_shape = tf.stack([n_pos_locs,4]) 294 | pos_locs = tf.reshape(pos_locs,pos_locs_shape) 295 | 296 | neg_locs_shape = tf.stack([n_neg_locs,4]) 297 | neg_locs = tf.reshape(neg_locs,neg_locs_shape) 298 | 299 | positive_cropped = tf.image.crop_and_resize(image,pos_locs,tf.zeros([n_pos_locs],dtype=tf.int32),[227,227]) 300 | negative_cropped = tf.image.crop_and_resize(image,neg_locs,tf.zeros([n_neg_locs],dtype=tf.int32),[227,227]) 301 | 302 | all_images = tf.concat([positive_cropped,negative_cropped],axis=0) 303 | 304 | positive_labels = tf.ones([n_pos_locs]) 305 | negative_labels = tf.zeros([n_neg_locs]) 306 | 307 | 308 | positive_landmarks = tf.tile(landmarks,[n_pos_locs,1]) 309 | negative_landmarks = tf.tile(landmarks,[n_neg_locs,1]) 310 | 311 | positive_visibility = tf.tile(visibility,[n_pos_locs,1]) 312 | negative_visibility = tf.tile(visibility,[n_neg_locs,1]) 313 | 314 | positive_pose = tf.tile(pose,[n_pos_locs,1]) 315 | negative_pose = tf.tile(pose,[n_neg_locs,1]) 316 | 317 | positive_gender = tf.tile(gender,[n_pos_locs,1]) 318 | negative_gender = tf.tile(gender,[n_neg_locs,1]) 319 | 320 | all_landmarks = tf.concat([positive_landmarks,negative_landmarks],axis=0) 321 | all_visibility = tf.concat([positive_visibility,negative_visibility],axis=0) 322 | all_pose = tf.concat([positive_pose,negative_pose],axis=0) 323 | 324 | all_labels = tf.concat([positive_labels,negative_labels],axis=0) 325 | all_gender = tf.concat([positive_gender,negative_gender],axis=0) 326 | 327 | tf.random_shuffle(all_images,seed=7) 328 | tf.random_shuffle(all_labels,seed=7) 329 | tf.random_shuffle(all_landmarks,seed=7) 330 | tf.random_shuffle(all_visibility,seed=7) 331 | tf.random_shuffle(all_pose,seed=7) 332 | tf.random_shuffle(all_gender,seed=7) 333 | 334 | images,labels,landmarks_,visibility_,pose_,gender_ = tf.train.shuffle_batch([all_images,all_labels,all_landmarks,all_visibility,all_pose,all_gender] 335 | ,enqueue_many=True,batch_size=self.batch_size,num_threads=1,capacity=1000,min_after_dequeue=500) 336 | 337 | return images,labels,landmarks_,visibility_,pose_,gender_ 338 | 339 | 340 | def load_weights(self, path): 341 | variables = slim.get_model_variables() 342 | print 'Loading weights...' 343 | for var in tqdm(variables): 344 | if ('conv' in var.name) and ('weights' in var.name): 345 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').transpose((2,3,1,0)))) 346 | elif ('fc' in var.name) and ('weights' in var.name): 347 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').T)) 348 | elif 'biases' in var.name: 349 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/b.npy'))) 350 | print 'Weights loaded!!' 351 | 352 | def print_variables(self): 353 | variables = slim.get_model_variables() 354 | print 'Model Variables:' 355 | for var in variables: 356 | print var.name, ' ', var.get_shape() 357 | 358 | 359 | def save_weights(self, path): 360 | variables = slim.get_model_variables() 361 | weights = {} 362 | for var in variables: 363 | weights[var.name] = self.sess.run(var) 364 | 365 | np.save(path+ '/weights', weights) 366 | 367 | def load_det_weights(self, path): 368 | variables = slim.get_model_variables() 369 | weights = np.load(path) 370 | for var in variables: 371 | if var.name in weights.item(): 372 | print var.name 373 | self.sess.run(var.assign(weights.item()[var.name])) 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | -------------------------------------------------------------------------------- /model_prediction.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | from tqdm import tqdm 5 | from pdb import set_trace as brk 6 | import sys 7 | 8 | class HyperFace(object): 9 | 10 | def __init__(self,load_model,tf_record_file_path=None,model_save_path=None,best_model_save_path=None,restore_model_path=None): 11 | 12 | self.batch_size = 32 13 | self.img_height = 227 14 | self.img_width = 227 15 | self.channel = 3 16 | 17 | self.num_epochs =1 18 | 19 | # Hyperparameters 1,5,0.5,5,2 20 | self.weight_detect = 1 21 | self.weight_landmarks = 5 22 | self.weight_visibility = 0.5 23 | self.weight_pose = 5 24 | self.weight_gender = 2 25 | 26 | #tf_Record Paramters 27 | self.tf_record_file_path = tf_record_file_path 28 | self.filename_queue = tf.train.string_input_producer([self.tf_record_file_path], num_epochs=self.num_epochs) 29 | self.images, self.labels, self.land, self.vis, self.po, self.gen= self.load_from_tfRecord(self.filename_queue) 30 | 31 | self.model_save_path = model_save_path 32 | self.best_model_save_path = best_model_save_path 33 | self.restore_model_path = restore_model_path 34 | 35 | self.save_after_steps = 200 36 | self.print_after_steps = 50 37 | self.load_model = load_model 38 | 39 | 40 | def build_network(self, sess): 41 | 42 | self.sess = sess 43 | 44 | self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images') 45 | self.detection = tf.placeholder(tf.int32, [self.batch_size], name='detection') 46 | self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks') 47 | self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility') 48 | self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose') 49 | self.gender = tf.placeholder(tf.int32, [self.batch_size], name='gender') 50 | 51 | self.net_output = self.network(self.X) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender) 52 | self.loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.net_output[0], labels=tf.one_hot(self.detection, 2))) 53 | 54 | detection_mask = tf.cast(tf.expand_dims(self.detection, axis=1),tf.float32) 55 | 56 | visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1]) 57 | self.loss_landmarks = tf.reduce_mean(tf.square(detection_mask*visibility_mask*(self.net_output[1] - self.landmarks))) 58 | 59 | self.loss_visibility = tf.reduce_mean(tf.square(detection_mask*(self.net_output[2] - self.visibility))) 60 | self.loss_pose = tf.reduce_mean(tf.square(detection_mask*(self.net_output[3] - self.pose))) 61 | self.loss_gender = tf.reduce_mean(detection_mask*tf.nn.sigmoid_cross_entropy_with_logits(logits=self.net_output[4], labels=tf.one_hot(self.gender,2))) 62 | 63 | 64 | self.loss = self.weight_detect*self.loss_detection + self.weight_landmarks*self.loss_landmarks \ 65 | + self.weight_visibility*self.loss_visibility + self.weight_pose*self.loss_pose \ 66 | + self.weight_gender*self.loss_gender 67 | 68 | self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(tf.argmax(self.net_output[0],1),tf.int32),self.detection),tf.float32)) 69 | 70 | #self.loss = self.loss_detection 71 | #self.optimizer = tf.train.AdamOptimizer(1e-7).minimize(self.loss) 72 | self.optimizer = tf.train.MomentumOptimizer(1e-3,0.9,use_nesterov=True).minimize(self.loss) 73 | self.saver = tf.train.Saver(max_to_keep=4, keep_checkpoint_every_n_hours=4) 74 | self.best_saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=4) 75 | 76 | 77 | def train(self): 78 | 79 | 80 | if self.load_model: 81 | print "Restoring Model" 82 | ckpt = tf.train.get_checkpoint_state(self.restore_model_path) 83 | if ckpt and ckpt.model_checkpoint_path: 84 | self.saver.restore(self.sess,ckpt.model_checkpoint_path) 85 | self.sess.run(tf.local_variables_initializer()) 86 | else: 87 | print "Initializing Model" 88 | self.sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())) 89 | 90 | #self.load_det_weights(self.restore_model_path+'weights.npy') 91 | 92 | 93 | coord = tf.train.Coordinator() 94 | threads = tf.train.start_queue_runners(sess=self.sess,coord=coord) 95 | 96 | writer = tf.summary.FileWriter('../logs', self.sess.graph) 97 | loss_summ = tf.summary.scalar('loss', self.loss) 98 | img_summ = tf.summary.image('images', self.images, max_outputs=5) 99 | label_summ = tf.summary.histogram('labels', self.detection) 100 | detect_summ = tf.summary.scalar('det_loss', self.loss_detection) 101 | landmarks_summ = tf.summary.scalar('landmarks_loss', self.loss_landmarks) 102 | vis_summ = tf.summary.scalar('visibility_loss', self.loss_visibility) 103 | pose_summ = tf.summary.scalar('pose_loss', self.loss_pose) 104 | gender_summ = tf.summary.scalar('gender_loss', self.loss_gender) 105 | 106 | summ_op = tf.summary.merge_all() 107 | 108 | counter = 0 109 | best_loss = sys.maxint 110 | try: 111 | while not coord.should_stop(): 112 | batch_imgs, batch_labels, batch_landmarks, batch_visibility, batch_pose, batch_gender = self.sess.run([self.images,self.labels,self.land, self.vis, self.po, self.gen]) 113 | batch_imgs = (batch_imgs - 127.5) / 128.0 114 | input_feed={self.X: batch_imgs, self.detection: batch_labels, self.landmarks: batch_landmarks, self.visibility: batch_visibility, self.pose: batch_pose, self.gender: np.squeeze(batch_gender)} 115 | #input_feed={self.X: batch_imgs, self.detection: batch_labels} 116 | 117 | _,model_op,loss,l_d,l_l,l_v,l_p,l_g, summ, accuracy = self.sess.run([self.optimizer,self.test_model,self.loss,self.loss_detection, 118 | self.loss_landmarks,self.loss_visibility,self.loss_pose,self.loss_gender, summ_op, self.accuracy], input_feed) 119 | 120 | writer.add_summary(summ, counter) 121 | 122 | if counter % self.save_after_steps == 0: 123 | self.saver.save(self.sess,self.model_save_path+'hyperface_model',global_step=int(counter),write_meta_graph=False) 124 | 125 | 126 | if loss <= best_loss: 127 | best_loss = loss 128 | self.best_saver.save(self.sess,self.best_model_save_path+'hyperface_best_model',global_step=int(counter),write_meta_graph=False) 129 | #self.save_weights(self.best_model_save_path) 130 | 131 | if counter % self.print_after_steps == 0: 132 | print "Iteration:{},Total Loss:{},Detection loss:{},Landmark loss:{},Visbility Loss :{},Pose Loss:{},Gender Loss:{},Accuracy:{}".format(counter,loss,l_d,l_l,l_v,l_p,l_g,accuracy) 133 | 134 | counter += 1 135 | 136 | except tf.errors.OutOfRangeError: 137 | print('Done training -- epoch limit reached') 138 | finally: 139 | coord.request_stop() 140 | 141 | coord.join(threads) 142 | 143 | 144 | 145 | 146 | 147 | def network_det(self,inputs,reuse=False): 148 | 149 | if reuse: 150 | tf.get_variable_scope().reuse_variables() 151 | 152 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 153 | activation_fn = tf.nn.relu, 154 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): 155 | 156 | conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') 157 | max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') 158 | 159 | conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') 160 | max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') 161 | conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') 162 | 163 | conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') 164 | conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') 165 | pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') 166 | 167 | shape = int(np.prod(pool5.get_shape()[1:])) 168 | fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6') 169 | 170 | fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1') 171 | out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None) 172 | 173 | return out_detection 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | def network(self,inputs,reuse=False): 182 | 183 | if reuse: 184 | tf.get_variable_scope().reuse_variables() 185 | 186 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 187 | activation_fn = tf.nn.relu, 188 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ): 189 | 190 | conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') 191 | max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') 192 | 193 | conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a') 194 | 195 | conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') 196 | max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') 197 | conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') 198 | 199 | conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a') 200 | 201 | conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') 202 | conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') 203 | pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') 204 | 205 | concat_feat = tf.concat([conv1a, conv3a, pool5],3) 206 | conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all') 207 | 208 | shape = int(np.prod(conv_all.get_shape()[1:])) 209 | fc_full = slim.fully_connected(tf.reshape(tf.transpose(conv_all, [0,3,1,2]), [-1, shape]), 3072, scope='fc_full') 210 | 211 | fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1') 212 | fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1') 213 | fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1') 214 | fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1') 215 | fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1') 216 | 217 | out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn = None) 218 | out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn = None ) 219 | out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn = None) 220 | out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn = None) 221 | out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn = None) 222 | 223 | return [out_detection, out_landmarks, out_visibility, out_pose, out_gender] 224 | 225 | 226 | 227 | def predict(self): 228 | print 'Running inference...' 229 | self.sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())) 230 | self.load_weights('/Users/shashank/TensorFlow/SPN/weights/') 231 | coord = tf.train.Coordinator() 232 | threads = tf.train.start_queue_runners(sess=self.sess,coord=coord) 233 | 234 | result = [] 235 | truth = [] 236 | count =0 237 | try: 238 | while not coord.should_stop(): 239 | print count 240 | batch_imgs, batch_labels, batch_landmarks, batch_visibility, batch_pose, batch_gender = self.sess.run([self.images,self.labels,self.land, self.vis, self.po, self.gen]) 241 | batch_imgs = (batch_imgs - 127.5) / 128.0 242 | 243 | net_preds = self.sess.run(self.net_output, feed_dict={self.X: batch_imgs}) 244 | result.append(np.concatenate(net_preds, axis=1)) 245 | truth.append(np.concatenate([batch_labels[:, np.newaxis], batch_landmarks, batch_visibility, batch_pose, batch_gender], axis=1)) 246 | count += 1 247 | 248 | except tf.errors.OutOfRangeError: 249 | print('Done training -- epoch limit reached') 250 | finally: 251 | coord.request_stop() 252 | 253 | coord.join(threads) 254 | np.save('test_results', np.concatenate(result, axis = 0)) 255 | np.save('truth', np.concatenate(truth, axis = 0)) 256 | 257 | def load_from_tfRecord(self,filename_queue): 258 | 259 | reader = tf.TFRecordReader() 260 | _, serialized_example = reader.read(filename_queue) 261 | 262 | features = tf.parse_single_example( 263 | serialized_example, 264 | features={ 265 | 'image_raw':tf.FixedLenFeature([], tf.string), 266 | 'width': tf.FixedLenFeature([], tf.int64), 267 | 'height': tf.FixedLenFeature([], tf.int64), 268 | 'pos_locs':tf.FixedLenFeature([], tf.string), 269 | 'neg_locs':tf.FixedLenFeature([], tf.string), 270 | 'n_pos_locs':tf.FixedLenFeature([], tf.int64), 271 | 'n_neg_locs':tf.FixedLenFeature([], tf.int64), 272 | 'gender':tf.FixedLenFeature([], tf.int64), 273 | 'pose': tf.FixedLenFeature([], tf.string), 274 | 'landmarks':tf.FixedLenFeature([], tf.string), 275 | 'visibility':tf.FixedLenFeature([], tf.string), 276 | 277 | }) 278 | 279 | landmarks = tf.decode_raw(features['landmarks'], tf.float32) 280 | pose = tf.decode_raw(features['pose'], tf.float32) 281 | visibility = tf.decode_raw(features['visibility'], tf.int32) 282 | gender = tf.cast(features['gender'], tf.int32) 283 | 284 | landmarks_shape = tf.stack([1,21*2]) 285 | pose_shape = tf.stack([1,3]) 286 | visibility_shape = tf.stack([1,21]) 287 | gender_shape = tf.stack([1,1]) 288 | 289 | landmarks = tf.reshape(landmarks,landmarks_shape) 290 | visibility = tf.reshape(visibility,visibility_shape) 291 | pose = tf.reshape(pose,pose_shape) 292 | gender = tf.reshape(gender,gender_shape) 293 | 294 | image = tf.decode_raw(features['image_raw'], tf.uint8) 295 | pos_locs = tf.decode_raw(features['pos_locs'], tf.float32) 296 | neg_locs = tf.decode_raw(features['neg_locs'], tf.float32) 297 | 298 | orig_height = tf.cast(features['height'], tf.int32) 299 | orig_width = tf.cast(features['width'], tf.int32) 300 | n_pos_locs = tf.cast(features['n_pos_locs'], tf.int32) 301 | n_neg_locs = tf.cast(features['n_neg_locs'], tf.int32) 302 | 303 | image_shape = tf.stack([1,orig_height,orig_width,3]) 304 | image = tf.cast(tf.reshape(image,image_shape),tf.float32) 305 | 306 | pos_locs_shape = tf.stack([n_pos_locs,4]) 307 | pos_locs = tf.reshape(pos_locs,pos_locs_shape) 308 | 309 | neg_locs_shape = tf.stack([n_neg_locs,4]) 310 | neg_locs = tf.reshape(neg_locs,neg_locs_shape) 311 | 312 | positive_cropped = tf.image.crop_and_resize(image,pos_locs,tf.zeros([n_pos_locs],dtype=tf.int32),[227,227]) 313 | negative_cropped = tf.image.crop_and_resize(image,neg_locs,tf.zeros([n_neg_locs],dtype=tf.int32),[227,227]) 314 | 315 | all_images = tf.concat([positive_cropped,negative_cropped],axis=0) 316 | 317 | positive_labels = tf.ones([n_pos_locs]) 318 | negative_labels = tf.zeros([n_neg_locs]) 319 | 320 | 321 | positive_landmarks = tf.tile(landmarks,[n_pos_locs,1]) 322 | negative_landmarks = tf.tile(landmarks,[n_neg_locs,1]) 323 | 324 | positive_visibility = tf.tile(visibility,[n_pos_locs,1]) 325 | negative_visibility = tf.tile(visibility,[n_neg_locs,1]) 326 | 327 | positive_pose = tf.tile(pose,[n_pos_locs,1]) 328 | negative_pose = tf.tile(pose,[n_neg_locs,1]) 329 | 330 | positive_gender = tf.tile(gender,[n_pos_locs,1]) 331 | negative_gender = tf.tile(gender,[n_neg_locs,1]) 332 | 333 | all_landmarks = tf.concat([positive_landmarks,negative_landmarks],axis=0) 334 | all_visibility = tf.concat([positive_visibility,negative_visibility],axis=0) 335 | all_pose = tf.concat([positive_pose,negative_pose],axis=0) 336 | 337 | all_labels = tf.concat([positive_labels,negative_labels],axis=0) 338 | all_gender = tf.concat([positive_gender,negative_gender],axis=0) 339 | 340 | tf.random_shuffle(all_images,seed=7) 341 | tf.random_shuffle(all_labels,seed=7) 342 | tf.random_shuffle(all_landmarks,seed=7) 343 | tf.random_shuffle(all_visibility,seed=7) 344 | tf.random_shuffle(all_pose,seed=7) 345 | tf.random_shuffle(all_gender,seed=7) 346 | 347 | images,labels,landmarks_,visibility_,pose_,gender_ = tf.train.shuffle_batch([all_images,all_labels,all_landmarks,all_visibility,all_pose,all_gender] 348 | ,enqueue_many=True,batch_size=self.batch_size,num_threads=1,capacity=1000,min_after_dequeue=500) 349 | 350 | return images,labels,landmarks_,visibility_,pose_,gender_ 351 | 352 | 353 | def load_weights(self, path): 354 | variables = slim.get_model_variables() 355 | print 'Loading weights...' 356 | for var in tqdm(variables): 357 | if ('conv' in var.name) and ('weights' in var.name): 358 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').transpose((2,3,1,0)))) 359 | elif ('fc' in var.name) and ('weights' in var.name): 360 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').T)) 361 | elif 'biases' in var.name: 362 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/b.npy'))) 363 | print 'Weights loaded!!' 364 | 365 | def print_variables(self): 366 | variables = slim.get_model_variables() 367 | print 'Model Variables:' 368 | for var in variables: 369 | print var.name, ' ', var.get_shape() 370 | 371 | 372 | def save_weights(self, path): 373 | variables = slim.get_model_variables() 374 | weights = {} 375 | for var in variables: 376 | weights[var.name] = self.sess.run(var) 377 | 378 | np.save(path+ '/weights', weights) 379 | 380 | def load_det_weights(self, path): 381 | variables = slim.get_model_variables() 382 | weights = np.load(path) 383 | for var in variables: 384 | if var.name in weights.item(): 385 | print var.name 386 | self.sess.run(var.assign(weights.item()[var.name])) 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | -------------------------------------------------------------------------------- /new_files/data_prep.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from skimage import io 4 | from skimage import color 5 | import sqlite3 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | import os 9 | import random 10 | from tqdm import tqdm 11 | from pdb import set_trace as brk 12 | import sys 13 | # The following are the database properties available (last updated version 2012-11-28): 14 | # 15 | # databases: db_id, path, description 16 | # faceellipse: face_id, x, y, ra, rb, theta, annot_type_id, upsidedown 17 | # faceimages: image_id, db_id, file_id, filepath, bw, widht, height 18 | # facemetadata: face_id, sex, occluded, glasses, bw, annot_type_id 19 | # facepose: face_id, roll, pitch, yaw, annot_type_id 20 | # facerect: face_id, x, y, w, h, annot_type_id 21 | # faces: face_id, file_id, db_id 22 | # featurecoords: face_id, feature_id, x, y 23 | # featurecoordtype: feature_id, descr, code, x, y, z 24 | # AFLW 21 points landmark 25 | # 0|LeftBrowLeftCorner 26 | # 1|LeftBrowCenter 27 | # 2|LeftBrowRightCorner 28 | # 3|RightBrowLeftCorner 29 | # 4|RightBrowCenter 30 | # 5|RightBrowRightCorner 31 | # 6|LeftEyeLeftCorner 32 | # 7|LeftEyeCenter 33 | # 8|LeftEyeRightCorner 34 | # 9|RightEyeLeftCorner 35 | # 10|RightEyeCenter 36 | # 11|RightEyeRightCorner 37 | # 12|LeftEar 38 | # 13|NoseLeft 39 | # 14|NoseCenter 40 | # 15|NoseRight 41 | # 16|RightEar 42 | # 17|MouthLeftCorner 43 | # 18|MouthCenter 44 | # 19|MouthRightCorner 45 | # 20|ChinCenter 46 | 47 | select_string = "faceimages.filepath, faces.face_id, facepose.roll, facepose.pitch, facepose.yaw, facerect.x, facerect.y, facerect.w, facerect.h,faceimages.image_id,facemetadata.sex" 48 | from_string = "faceimages, faces, facepose, facerect,facemetadata" 49 | where_string = "faces.face_id = facepose.face_id and faces.file_id = faceimages.file_id and faces.face_id = facerect.face_id and faces.face_id = facemetadata.face_id" 50 | query_string = "SELECT " + select_string + " FROM " + from_string + " WHERE " + where_string 51 | 52 | 53 | 54 | 55 | conn = sqlite3.connect('/home/shashank/Documents/CSE-252C/AFLW/aflw/data/aflw.sqlite') 56 | c = conn.cursor() 57 | 58 | img_path = '/home/shashank/Documents/CSE-252C/AFLW/' 59 | loc_file_path = '/home/shashank/Documents/CSE-252C/hyperface/code/locations_test/' 60 | tfrecords_train_filename = 'test_check.tfrecords' 61 | tfrecords_test_filename = 'aflw_test_new.tfrecords' 62 | 63 | writer_train = tf.python_io.TFRecordWriter(tfrecords_train_filename) 64 | writer_test = tf.python_io.TFRecordWriter(tfrecords_test_filename) 65 | 66 | def _bytes_feature(value): 67 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 68 | 69 | def _float_feature(value): 70 | return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) 71 | 72 | def _int64_feature(value): 73 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 74 | 75 | def test_names(): 76 | l=[] 77 | names = os.listdir(img_path+'0') 78 | random.shuffle(names) 79 | l.append(['0/'+name for name in names[:300]]) 80 | 81 | names = os.listdir(img_path+'2') 82 | random.shuffle(names) 83 | l.append(['2/'+name for name in names[:300]]) 84 | 85 | names = os.listdir(img_path+'3') 86 | random.shuffle(names) 87 | l.append(['3/'+name for name in names[:400]]) 88 | 89 | return l[0]+l[1]+l[2] 90 | 91 | def make_tfrecord(test_images): 92 | 93 | it_test =0 94 | it_train = 0 95 | gender_dict={'m':1,'f':0} 96 | 97 | for row in (c.execute(query_string)): 98 | ''' 99 | row[0] = image path str 100 | row[1] = face id int 101 | row[2] = roll float 102 | row[3] = pitch float 103 | row[4] = yaw float 104 | row[5] = x int 105 | row[6] = y int 106 | row[7] = w int 107 | row[8] = h int 108 | ''' 109 | 110 | 111 | center_x = float(row[5]) + float(row[7])/2 112 | center_y = float(row[6]) + float(row[8])/2 113 | 114 | 115 | if not os.path.exists(loc_file_path+str(row[1])): 116 | continue 117 | 118 | select_str = "coords.feature_id, coords.x, coords.y" 119 | from_str = "featurecoords coords" 120 | where_str = "coords.face_id = {}".format(row[1]) 121 | query_str = "SELECT " + select_str + " FROM " + from_str + " WHERE " + where_str 122 | landmark = np.zeros((21,2)).astype(np.float32) 123 | visibility = np.zeros((21,1)).astype(np.int32) 124 | 125 | c2 = conn.cursor() 126 | 127 | for xx in c2.execute(query_str): 128 | landmark[xx[0]-1][0] = xx[1]#(xx[1] - center_x)/float(row[7]) 129 | landmark[xx[0]-1][1] = xx[2]#(xx[2] - center_y)/float(row[8]) 130 | visibility[xx[0]-1] = 1 131 | landmark = landmark.reshape(-1,42) 132 | 133 | c2.close() 134 | 135 | try: 136 | 137 | img_raw = (np.asarray(cv2.imread(img_path+row[0])).astype(np.float32))/255.0 138 | cv2.imwrite('save_im.jpg',img_raw*255) 139 | landmark_pos = None 140 | 141 | if len(img_raw.shape) !=3: 142 | continue#img_raw = color.gray2rgb(img_raw) 143 | if len(img_raw.shape) !=3 or img_raw.shape[2] != 3: 144 | continue 145 | print row[1] 146 | 147 | w = img_raw.shape[1] 148 | h = img_raw.shape[0] 149 | if os.path.isfile(loc_file_path+str(row[1])+'/positive.npy'): 150 | pos_locs = np.load(loc_file_path+str(row[1])+'/positive.npy')[:,:4] 151 | cof_locs = np.tile(np.load(loc_file_path+str(row[1])+'/positive.npy')[:,4:6],(1,21)) 152 | dim_locs = np.tile(np.load(loc_file_path+str(row[1])+'/positive.npy')[:,6:8],(1,21)) 153 | n_pos_locs = pos_locs.shape[0] 154 | 155 | landmark_pos = (landmark - cof_locs)/dim_locs 156 | visibility_pos = np.ones((landmark_pos.shape[0],21)) 157 | visibility_pos[(np.where(landmark_pos > 0.5)[0],np.where(landmark_pos > 0.5)[1]/2)] = 0 158 | visibility_pos[(np.where(landmark_pos < -0.5)[0],np.where(landmark_pos < -0.5)[1]/2)] = 0 159 | 160 | # visibility_pos[np.where(landmark_pos)] 161 | pos_locs = pos_locs.astype(np.float32).tostring() 162 | 163 | # if pos_locs.shape[0] > 0: 164 | # pos_locs = np.concatenate([pos_locs,np.asarray([row[6]/float(h),row[5]/float(w), 165 | # (row[6]+row[8])/float(h),(row[5]+row[7])/float(w)]).reshape(1,4)],axis=0) 166 | 167 | # n_pos_locs = pos_locs.shape[0] 168 | 169 | # pos_locs = pos_locs.astype(np.float32).tostring() 170 | # else: 171 | # pos_locs = np.asarray([[row[6]/float(h),row[5]/float(w),(row[6]+row[8])/float(h),(row[5]+row[7])/float(w)]]).reshape(1,4) 172 | # n_pos_locs = pos_locs.shape[0] 173 | # pos_locs = pos_locs.astype(np.float32).tostring() 174 | 175 | # else: 176 | # pos_locs = np.asarray([[row[6]/float(h),row[5]/float(w),(row[6]+row[8])/float(h),(row[5]+row[7])/float(w)]]).reshape(1,4) 177 | # n_pos_locs = pos_locs.shape[0] 178 | # pos_locs = pos_locs.astype(np.float32).tostring() 179 | 180 | 181 | 182 | if os.path.isfile(loc_file_path+str(row[1])+'/negative.npy'): 183 | neg_locs = np.load(loc_file_path+str(row[1])+'/negative.npy')[:,:4] 184 | n_neg_locs = neg_locs.shape[0] 185 | cof_locs = np.tile(np.load(loc_file_path+str(row[1])+'/negative.npy')[:,4:6],(1,21)) 186 | dim_locs = np.tile(np.load(loc_file_path+str(row[1])+'/negative.npy')[:,6:8],(1,21)) 187 | 188 | landmark_neg = (landmark - cof_locs)/dim_locs 189 | visibility_neg = np.zeros((landmark_neg.shape[0],21)) 190 | 191 | # visibility_pos[np.where(landmark_pos)] 192 | neg_locs = neg_locs.astype(np.float32).tostring() 193 | 194 | all_landmarks = np.concatenate([landmark_pos,landmark_neg],axis=0) 195 | all_visibilities = np.concatenate([visibility_pos,visibility_neg],axis=0) 196 | all_landmarks = all_landmarks.astype(np.float32).tostring() 197 | all_visibilities = all_visibilities.astype(np.int32).tostring() 198 | 199 | img_raw = img_raw.tostring() 200 | 201 | print "{},{}".format(n_pos_locs,n_neg_locs) 202 | 203 | pose_array = np.asarray([row[2],row[3],row[4]]).astype(np.float32) 204 | 205 | 206 | pose_array = pose_array.tostring() 207 | # landmark = landmark.tostring() 208 | # visibility=visibility.tostring() 209 | 210 | 211 | example = tf.train.Example(features=tf.train.Features(feature={ 212 | 'image_raw':_bytes_feature(img_raw), 213 | 'width': _int64_feature(w), 214 | 'height': _int64_feature(h), 215 | 'face_id': _int64_feature(row[1]), 216 | 'pose': _bytes_feature(pose_array), 217 | 'loc_x': _int64_feature(row[5]), 218 | 'loc_y': _int64_feature(row[6]), 219 | 'loc_w': _int64_feature(row[7]), 220 | 'loc_h': _int64_feature(row[8]), 221 | 'gender':_int64_feature(gender_dict[row[10]]), 222 | 'landmarks':_bytes_feature(all_landmarks), 223 | 'visibility':_bytes_feature(all_visibilities), 224 | 'pos_locs':_bytes_feature(pos_locs), 225 | 'neg_locs':_bytes_feature(neg_locs), 226 | 'n_pos_locs':_int64_feature(n_pos_locs), 227 | 'n_neg_locs':_int64_feature(n_neg_locs) 228 | })) 229 | 230 | writer_train.write(example.SerializeToString()) 231 | it_train += 1 232 | break 233 | # if it_train >= 1: 234 | # break 235 | # if row[0] in test_images: 236 | # writer_test.write(example.SerializeToString()) 237 | # it_test += 1 238 | # else: 239 | # writer_train.write(example.SerializeToString()) 240 | # it_train += 1 241 | 242 | except Exception as e: 243 | exc_type, exc_obj, exc_tb = sys.exc_info() 244 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 245 | print(exc_type, fname, exc_tb.tb_lineno) 246 | 247 | 248 | print it_test,it_train 249 | c.close() 250 | writer_train.close() 251 | writer_test.close() 252 | 253 | def extract_tfrecord(): 254 | record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_train_filename) 255 | count =0 256 | for string_record in tqdm(record_iterator): 257 | 258 | count += 1 259 | example = tf.train.Example() 260 | example.ParseFromString(string_record) 261 | 262 | img_string = example.features.feature['image_raw'].bytes_list.value[0] 263 | landmark_string = example.features.feature['landmarks'].bytes_list.value[0] 264 | landmarks = np.fromstring(landmark_string, dtype=np.float32).reshape(21,2) 265 | img_width = int(example.features.feature['width'].int64_list.value[0]) 266 | img_height = int(example.features.feature['height'].int64_list.value[0]) 267 | 268 | img_2 = np.fromstring(img_string, dtype=np.uint8).reshape(-1,1) 269 | 270 | img_1d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3) 271 | print img_1d.shape 272 | loc_x = int(example.features.feature['loc_x'].int64_list.value[0]) 273 | loc_y = int(example.features.feature['loc_y'].int64_list.value[0]) 274 | loc_w = int(example.features.feature['loc_w'].int64_list.value[0]) 275 | loc_h = int(example.features.feature['loc_h'].int64_list.value[0]) 276 | sex = int(example.features.feature['gender'].int64_list.value[0]) 277 | 278 | 279 | # center_x = img_width/2.0 280 | # center_y = img_height/2.0 281 | 282 | # centers = np.tile(np.array([center_x,center_y]).reshape(1,2),(21,1)) 283 | # normalized = landmarks - centers 284 | # w_h = np.tile(np.array([img_width,img_height]).reshape(1,2),(21,1)) 285 | 286 | # normalized = normalized/w_h 287 | 288 | # for i in range(normalized.shape[0]): 289 | # if i == 5 or i == 9 or i==15 or i==16: 290 | # continue 291 | # point_x = normalized[i][0]*img_width + img_width/2.0 292 | # point_y = normalized[i][1]*img_height + img_height/2.0 293 | 294 | # cv2.circle(img_1d,(int(point_x),int(point_y)), 1, (0,0,255), 2) 295 | 296 | # cv2.rectangle(img_1d,(loc_x,loc_y),(loc_x+loc_w,loc_y+loc_h),(0,255,0),3) 297 | # cv2.imshow('result',img_1d) 298 | # cv2.waitKey(0) 299 | 300 | 301 | 302 | if __name__ == '__main__': 303 | test_images = test_names() 304 | print len(test_images) 305 | make_tfrecord(test_images) 306 | #extract_tfrecord() 307 | 308 | -------------------------------------------------------------------------------- /new_files/irp.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | # import pdb 5 | # pdb.set_trace() 6 | aflw_template_landmark_coords=np.array([[-0.479962468147, 0.471864163876],[-0.30303606391, 0.508996844292],[-0.106451146305, 0.498075485229],[0.106451146305, 0.498075485229],[0.30303606391, 0.508996844292],[0.479962468147, 0.471864163876],[-0.447198301554, 0.321149080992],[-0.318325966597, 0.325517624617],[-0.163242310286, 0.308043420315],[0.163242310286, 0.308043420315],[0.318325966597, 0.325517624617],[0.447198301554, 0.321149080992],[-0.674257874489, -0.151652157307],[-0.170000001788, -0.075740583241],[0.0, 0.0],[0.170000001788, -0.075740583241],[0.674257874489, -0.151652157307],[-0.272456139326, -0.347239643335],[0.0, -0.336318254471],[0.272456139326, -0.347239643335],[0.0, -0.737950384617]], dtype=np.float32) 7 | # tfrecords_train_filename = '/home/shashank/Documents/CSE-252C/hyperface/code/aflw_train.tfrecords' 8 | 9 | def region_proposal(landmark_pts,visible_landmark_index,image_size,pad=0.1): 10 | 11 | x_template,y_template,w_template,h_template = cv2.boundingRect(aflw_template_landmark_coords) 12 | 13 | x_selective,y_selective,w_selective,h_selective = cv2.boundingRect(landmark_pts.astype(np.float32)) 14 | 15 | x_selective = x_selective - (pad*w_selective)/2.0 16 | y_selective = y_selective - (pad*h_selective)/2.0 17 | w_selective = w_selective *(1+ pad) 18 | h_selective = h_selective *(1+ pad) 19 | 20 | visible_template_landmarks = aflw_template_landmark_coords[visible_landmark_index,:] 21 | 22 | #Now we have got the corresponding points or features in the two images. Using 2D Homography, find the projection matrix. 23 | #For the homography we need at least 4 features,hence 24 | if len(visible_landmark_index) < 4: 25 | return (0,0,0,0) 26 | 27 | H,__ = cv2.findHomography(visible_template_landmarks,landmark_pts,cv2.RANSAC) 28 | 29 | if H is None: 30 | return (0,0,0,0) 31 | source_pts = np.asarray([ [x_template,y_template,1.0],[x_template,y_template+h_template,1.0],[x_template+w_template,y_template,1.0],[x_template+w_template,y_template+h_template,1.0] ]).astype(np.float32) 32 | 33 | dst_points = np.dot(H,source_pts.T) 34 | dst_points = dst_points/dst_points[2,:] 35 | dst_points = dst_points[:2,:] 36 | 37 | 38 | min_x_proposed = np.min(dst_points[0,:]) 39 | min_y_proposed = np.min(dst_points[1,:]) 40 | 41 | max_x_proposed = np.max(dst_points[0,:]) 42 | max_y_proposed = np.max(dst_points[1,:]) 43 | 44 | w_proposed = max_x_proposed - min_x_proposed 45 | h_proposed = max_y_proposed - min_y_proposed 46 | 47 | final_x1 = min(min_x_proposed,x_selective) 48 | final_y1 = min(min_y_proposed,y_selective) 49 | 50 | final_x2 = max(max_x_proposed,x_selective+w_selective) 51 | final_y2 = max(max_y_proposed,y_selective+h_selective) 52 | 53 | final_x1 = max(final_x1,0) 54 | final_y1 = max(final_y1,0) 55 | 56 | final_x2 = min(image_size[1],final_x2) 57 | final_y2 = min(image_size[0],final_y2) 58 | 59 | return (final_y1,final_x1,final_y2,final_x2) 60 | 61 | # def extract_tfrecord(): 62 | # record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_train_filename) 63 | 64 | # for string_record in record_iterator: 65 | # example = tf.train.Example() 66 | # example.ParseFromString(string_record) 67 | 68 | # img_string = example.features.feature['image_raw'].bytes_list.value[0] 69 | # landmark_string = example.features.feature['landmarks'].bytes_list.value[0] 70 | # landmarks = np.fromstring(landmark_string, dtype=np.float32).reshape(21,2) 71 | # img_width = int(example.features.feature['width'].int64_list.value[0]) 72 | # img_height = int(example.features.feature['height'].int64_list.value[0]) 73 | # img_1d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3) 74 | # loc_x = int(example.features.feature['loc_x'].int64_list.value[0]) 75 | # loc_y = int(example.features.feature['loc_y'].int64_list.value[0]) 76 | # loc_w = int(example.features.feature['loc_w'].int64_list.value[0]) 77 | # loc_h = int(example.features.feature['loc_h'].int64_list.value[0]) 78 | # sex = int(example.features.feature['sex'].int64_list.value[0]) 79 | 80 | # center_x = loc_x + (loc_w/2.0) 81 | # center_y = loc_y + (loc_h/2.0) 82 | 83 | # centers = np.tile(np.array([center_x,center_y]).reshape(1,2),(21,1)) 84 | # normalized = landmarks - centers 85 | # w_h = np.tile(np.array([loc_w,loc_h]).reshape(1,2),(21,1)) 86 | 87 | # normalized = normalized/w_h 88 | # landmarks_for_irp =[] 89 | # visibility_for_irp=[] 90 | 91 | # for i in range(normalized.shape[0]): 92 | # if (landmarks[i][0] == 0.0) and (landmarks[i][0] == 0.0) : 93 | # visibility_for_irp.append([0]) 94 | # continue 95 | # else: 96 | # visibility_for_irp.append([1]) 97 | # point_x = normalized[i][0]*loc_w + center_x 98 | # point_y = normalized[i][1]*loc_h + center_y 99 | # landmarks_for_irp.append([point_x,point_y]) 100 | 101 | # #cv2.circle(img_1d,(int(point_x),int(point_y)), 1, (0,0,255), 2) 102 | # landmarks_for_irp = np.asarray(landmarks_for_irp) 103 | # visibility_for_irp = np.asarray(visibility_for_irp) 104 | # l1,l2,l3,l4 = region_proposal(landmarks_for_irp,visibility_for_irp,(img_width,img_height)) 105 | # cv2.rectangle(img_1d,(int(l1),int(l2)),(int(l3),int(l4)),(0,255,0),3) 106 | # cv2.imshow('result',img_1d) 107 | # cv2.waitKey(0) 108 | 109 | # if __name__ == '__main__': 110 | # extract_tfrecord() 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /new_files/lnms.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | # import pdb 5 | # pdb.set_trace() 6 | def fast_nms(ip_boxes, ov_threshold): 7 | 8 | if len(ip_boxes) == 0: 9 | return None 10 | 11 | #Save the Area Computation 12 | area = (ip_boxes[:,0] - ip_boxes[:,2])*(ip_boxes[:,1] - ip_boxes[:,3]) 13 | area = area.reshape(-1,1) 14 | 15 | #sorted_y_index = np.argsort(ip_boxes[:,3]) 16 | sorted_y_index = np.argsort(area[:,0]) 17 | keep = {} 18 | 19 | while len(sorted_y_index) > 0: 20 | index = sorted_y_index[-1] 21 | 22 | to_find = sorted_y_index[:-1] 23 | x1 = np.maximum(ip_boxes[to_find,0],ip_boxes[index,0]) 24 | x2 = np.maximum(np.minimum(ip_boxes[to_find,2],ip_boxes[index,2]),x1) 25 | y1 = np.maximum(ip_boxes[to_find,1],ip_boxes[index,1]) 26 | y2 = np.maximum(np.minimum(ip_boxes[to_find,3],ip_boxes[index,3]),y1) 27 | w = x2 - x1 28 | h = y2 - y1 29 | intersection_area = (w*h).reshape(-1,1) 30 | total_area = (ip_boxes[to_find,2] - ip_boxes[to_find,0]).reshape(-1,1)*(ip_boxes[to_find,3] - ip_boxes[to_find,1]).reshape(-1,1) + (ip_boxes[index,2] - ip_boxes[index,0]).reshape(-1,1)*(ip_boxes[index,3] - ip_boxes[index,1]).reshape(-1,1) - intersection_area 31 | #overlap = intersection_area/(area[to_find,:]+1e-5) 32 | overlap = intersection_area/total_area 33 | keep[index]=list(to_find[np.where(overlap >ov_threshold)[0]]) 34 | keep[index].append(index) 35 | 36 | sorted_y_index= np.delete(sorted_y_index,np.concatenate([[len(sorted_y_index)-1],np.where(overlap > ov_threshold)[0]])) 37 | 38 | return keep 39 | 40 | # if __name__ == '__main__': 41 | # a = np.load('/home/shashank/Documents/CSE-252C/chainer_ref/hyperface/ip1.npy') 42 | # x1 = a[:,0].reshape(-1,1) 43 | # y1 = a[:,1].reshape(-1,1) 44 | # x2 = a[:,2].reshape(-1,1) 45 | # y2 = a[:,3].reshape(-1,1) 46 | # x2 = x2.reshape(-1,1) + x1 47 | # y2 = y2.reshape(-1,1) + y1 48 | # a = np.concatenate([x1,y1,x2,y2],axis=1) 49 | # fast_nms(a,0.2) 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /new_files/main.py: -------------------------------------------------------------------------------- 1 | import vis 2 | import cv2 3 | import tensorflow as tf 4 | import os 5 | import argparse 6 | from skimage import io 7 | from model import * 8 | # import pdb 9 | # pdb.set_trace() 10 | 11 | if not os.path.exists('./logs'): 12 | os.makedirs('./logs') 13 | 14 | map(os.unlink, (os.path.join( './logs',f) for f in os.listdir('./logs')) ) 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('-f','--forward_only',dest='forward_only',help='Test/Train Mode Flag',default=0,type=int) 19 | parser.add_argument('-b','--batchsize',dest='batch_size',help='Batch Size to calculate the number of iterations per epoch',default=32,type=int) 20 | parser.add_argument('-e','--n_epochs',dest='num_epochs',help='Number of Epochs for Training',default=10,type=int) 21 | parser.add_argument('-p','--model_path',dest='model_path',help='Enter the path for the model to use for testing',default=None,type=str) 22 | parser.add_argument('-t','--tf_record_path',dest='tf_record_file_path',help='Enter the path for the Tf Record File to use for training',default=None,type=str) 23 | parser.add_argument('-i','--test_image_path',dest='test_image_path',help='Enter the test image path',default=None,type=str) 24 | args = parser.parse_args() 25 | return args 26 | 27 | with tf.Session() as sess: 28 | print "Parsing Argument" 29 | args = parse_args() 30 | print 'Building Graph...' 31 | net = HyperFace(sess,batch_size=args.batch_size,num_epochs=args.num_epochs,forward_only=args.forward_only) 32 | print 'Graph Built!' 33 | sess.run(tf.global_variables_initializer()) 34 | if args.forward_only == 1: 35 | print "Loading Model" 36 | net.load_model(args.model_path) 37 | print "Start Testing" 38 | #img_raw = np.asarray() 39 | img_raw = np.asarray(cv2.imread(args.test_image_path)) 40 | print img_raw.shape 41 | output_set = net.test_hyperface(img_raw) 42 | vis.vis_results(img_raw,output_set) 43 | else: 44 | filename_queue = tf.train.string_input_producer([args.tf_record_file_path], num_epochs=args.num_epochs) 45 | #net.train() 46 | print "Start Training" 47 | 48 | # net.train() 49 | 50 | -------------------------------------------------------------------------------- /new_files/model.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import irp 3 | import lnms 4 | import selective_search 5 | import tensorflow as tf 6 | import tensorflow.contrib.slim as slim 7 | import numpy as np 8 | from ipdb import set_trace as brk 9 | 10 | class HyperFace(object): 11 | 12 | def __init__(self, sess,batch_size=None,num_epochs=None,forward_only=None): 13 | 14 | self.sess = sess 15 | self.forward_only = forward_only 16 | 17 | if self.forward_only == 1: 18 | self.batch_size = None 19 | else: 20 | self.batch_size = batch_size 21 | 22 | self.img_height = 227 23 | self.img_width = 227 24 | self.channel = 3 25 | 26 | self.num_epochs = num_epochs 27 | 28 | # Hyperparameters 29 | self.weight_detect = 1 30 | self.weight_landmarks = 5 31 | self.weight_visibility = 0.5 32 | self.weight_pose = 5 33 | self.weight_gender = 2 34 | self.build_network() 35 | 36 | 37 | def build_network(self): 38 | 39 | self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images') 40 | self.detection = tf.placeholder(tf.float32, [self.batch_size,2], name='detection') 41 | self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks') 42 | self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility') 43 | self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose') 44 | self.gender = tf.placeholder(tf.float32, [self.batch_size,2], name='gender') 45 | 46 | self.net_output = self.network(self.X) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender) 47 | if self.forward_only == 0: 48 | loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.net_output[0], self.detection)) 49 | 50 | visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1]) 51 | loss_landmarks = tf.reduce_mean(tf.square(visibility_mask*(self.net_output[1] - self.landmarks))) 52 | 53 | loss_visibility = tf.reduce_mean(tf.square(self.net_output[2] - self.visibility)) 54 | loss_pose = tf.reduce_mean(tf.square(self.net_output[3] - self.pose)) 55 | loss_gender = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.net_output[4], self.gender)) 56 | 57 | self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks \ 58 | + self.weight_visibility*loss_visibility + self.weight_pose*loss_pose \ 59 | + self.weight_gender*loss_gender 60 | 61 | 62 | def train(self): 63 | 64 | optimizer = tf.train.AdamOptimizer().minimize(self.loss) 65 | writer = tf.summary.FileWriter('./logs', self.sess.graph) 66 | loss_summ = tf.summary.scalar('loss', self.loss) 67 | 68 | def network(self,inputs): 69 | 70 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 71 | activation_fn = tf.nn.relu, 72 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ): 73 | 74 | conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') 75 | max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') 76 | 77 | conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a') 78 | 79 | conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') 80 | max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') 81 | conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') 82 | 83 | conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a') 84 | 85 | conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') 86 | conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') 87 | pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') 88 | 89 | concat_feat = tf.concat( [conv1a, conv3a, pool5],3) 90 | conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all') 91 | 92 | shape = int(np.prod(conv_all.get_shape()[1:])) 93 | fc_full = slim.fully_connected(tf.reshape(tf.transpose(conv_all, [0,3,1,2]), [-1, shape]), 3072, scope='fc_full') 94 | #fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full') 95 | fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection') 96 | fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks') 97 | fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility') 98 | fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose') 99 | fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender') 100 | 101 | out_detection = slim.fully_connected(fc_detection, 2, scope='out_detection',activation_fn = None) 102 | out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='out_landmarks',activation_fn = None) 103 | out_visibility = slim.fully_connected(fc_visibility, 21, scope='out_visibility',activation_fn = None) 104 | out_pose = slim.fully_connected(fc_pose, 3, scope='out_pose',activation_fn = None) 105 | out_gender = slim.fully_connected(fc_gender, 2, scope='out_gender',activation_fn = None) 106 | 107 | 108 | return [out_detection, out_landmarks, out_visibility, out_pose, tf.nn.softmax(out_gender),conv_all] 109 | 110 | def load_from_tfRecord(self,filename_queue): 111 | 112 | reader = tf.TFRecordReader() 113 | _, serialized_example = reader.read(filename_queue) 114 | 115 | features = tf.parse_single_example( 116 | serialized_example, 117 | features={ 118 | 'image_raw':tf.FixedLenFeature([], tf.string), 119 | 'width': tf.FixedLenFeature([], tf.int64), 120 | 'height': tf.FixedLenFeature([], tf.int64), 121 | 'batch_size':tf.FixedLenFeature([], tf.int64) 122 | }) 123 | 124 | image = tf.decode_raw(features['image_raw'], tf.float32) 125 | orig_height = tf.cast(features['height'], tf.int32) 126 | orig_width = tf.cast(features['width'], tf.int32) 127 | batch_size = tf.cast(features['batch_size'], tf.int32) 128 | 129 | image_shape = tf.pack([batch_size,227,227,3]) 130 | image_tf = tf.reshape(image,image_shape) 131 | 132 | images = tf.train.shuffle_batch([image_tf],batch_size=self.batch_size,enqueue_many=True,num_threads=1,capacity=50,min_after_dequeue=10) 133 | 134 | return images 135 | 136 | def load_model(self,model_path): 137 | for var in tf.all_variables(): 138 | if var.name.find('weights') != -1: 139 | if var.name.find('conv') != -1: 140 | self.sess.run(var.assign(np.load(model_path+'/'+var.name.split('/')[0]+'/W.npy').transpose(2,3,1,0))) 141 | else: 142 | self.sess.run(var.assign(np.load(model_path+'/'+var.name.split('/')[0]+'/W.npy').T)) 143 | if var.name.find('biases') != -1: 144 | self.sess.run(var.assign(np.load(model_path+'/'+var.name.split('/')[0]+'/b.npy'))) 145 | 146 | print "Done Loading" 147 | 148 | def test_hyperface(self,ip_img,nms_threshold=0.2,irp_count=2): 149 | # 1) Take the input as image 150 | # 2) Run DLIB's selective search on that 151 | # 3) Pass the regions to the trained model 152 | # 4) For all the regions having detection score greater than a threshold. 153 | # 4.1) Perform Iterative Region Proposal on it. 154 | # 5) Use the new localized boxes to perform landmark based LMS 155 | # 6) Again run the network on the localized boxes from the IRP 156 | # 7) Find precision boxes as the min and max of the fids 157 | # 8) Run NMS 158 | # 9) Keep the top k boxes and use the median of each to give the final output 159 | # 10) Apply Face Rect Calculator on the final fids 160 | 161 | ip_img_size = ip_img.shape[0:-1] 162 | total_boxes = None 163 | ip_img = ip_img.astype(np.float32)/255.0 164 | 165 | for i in range(1+irp_count): 166 | if i ==0: 167 | boxes_op,iou_dump,coords_dump = selective_search.perform_selective_search(ip_img.astype(np.float32),ground_truth=None) 168 | 169 | cropped_imgs = tf.image.crop_and_resize(ip_img[np.newaxis,:].astype(np.float32),boxes_op, [0]*boxes_op.shape[0], crop_size=[227,227]).eval(session=self.sess) 170 | # cropped_imgs = np.load('db_imgs.npy') 171 | # cropped_labels = np.load('db_labels.npy') 172 | # cropped_landmarks = np.load('db_landmarks.npy') 173 | brk() 174 | normalized_imgs = cropped_imgs - 0.5 175 | 176 | # a = np.load('/home/shashank/Documents/CSE-252C/chainer_ref/hyperface/gt_ip.npy') 177 | # a = a.transpose(0,2,3,1) 178 | 179 | input_feed={self.X:normalized_imgs} 180 | net_op = self.sess.run([self.net_output],feed_dict=input_feed) 181 | 182 | all_landmarks = np.asarray(net_op[0][1]).reshape(-1,42) 183 | all_landmarks_x = all_landmarks[:,::2].reshape(-1,21) 184 | all_landmarks_y = all_landmarks[:,1::2].reshape(-1,21) 185 | loc_w = (boxes_op[:,3] - boxes_op[:,1])*ip_img_size[1] 186 | loc_h = (boxes_op[:,2] - boxes_op[:,0])*ip_img_size[0] 187 | c_x = boxes_op[:,1]*ip_img_size[1] + loc_w/2.0 188 | c_y = boxes_op[:,0]*ip_img_size[0] + loc_h/2.0 189 | all_landmarks_x = all_landmarks_x*loc_w.reshape(-1,1) + c_x.reshape(-1,1) 190 | all_landmarks_y = all_landmarks_y*loc_h.reshape(-1,1) + c_y.reshape(-1,1) 191 | all_landmarks_x = all_landmarks_x[:,np.newaxis,:] 192 | all_landmarks_y = all_landmarks_y[:,np.newaxis,:] 193 | 194 | all_landmarks = np.concatenate([all_landmarks_x,all_landmarks_y],axis=1) 195 | 196 | detections = np.exp(np.asarray(net_op[0][0]).reshape(-1,2)) 197 | detections = (detections/(np.sum(detections,axis=1).reshape(-1,1)))[:,1].reshape(-1,1) 198 | 199 | interests = np.where(detections>0.25)[0] 200 | 201 | visibility = np.asarray(net_op[0][2]).reshape(-1,21)[interests,:] 202 | visibility_mask = np.zeros_like(visibility) 203 | visibility_mask[np.where(visibility>0.5)] = 1 204 | 205 | landmarks = all_landmarks[interests,:,:].reshape(-1,2,21) 206 | 207 | detected_boxes =[] 208 | for i in range(len(interests)): 209 | mask = np.where(visibility_mask[i,:]==1)[0] 210 | y1,x1,y2,x2 = irp.region_proposal(landmarks[i,:,mask],mask,ip_img_size) 211 | if (y1 == y2) or (x1 == x2): 212 | continue 213 | detected_boxes.append([y1/float(ip_img_size[0]),x1/float(ip_img_size[1]),y2/float(ip_img_size[0]),x2/float(ip_img_size[1])]) 214 | boxes_op = np.asarray(detected_boxes).astype(np.float32) 215 | 216 | 217 | #DO the final model run 218 | 219 | # cropped_imgs = tf.image.crop_and_resize(ip_img[np.newaxis,:].astype(np.float32),boxes_op, [0]*boxes_op.shape[0], crop_size=[227,227]).eval(session=self.sess) 220 | # normalized_imgs = (cropped_imgs - 127.5)/128.0 221 | # input_feed={self.X:normalized_imgs} 222 | # net_op = self.sess.run([self.net_output],feed_dict=input_feed) 223 | 224 | interests = np.where(detections>0.5)[0] 225 | landmarks = all_landmarks[interests,:,:].reshape(-1,2,21) 226 | visibility = np.asarray(net_op[0][2]).reshape(-1,21)[interests,:] 227 | poses = np.asarray(net_op[0][3]).reshape(-1,3)[interests,:] 228 | genders = np.asarray(net_op[0][4])[:,1].reshape(-1,1)[interests,:] 229 | 230 | visibility_mask = np.zeros_like(visibility) 231 | visibility_mask[np.where(visibility>0.5)] = 1 232 | 233 | # min_x = np.min(landmarks[:,0,visibility_mask],axis=1).reshape(-1,1) 234 | # min_y = np.min(landmarks[:,1,visibility_mask],axis=1).reshape(-1,1) 235 | # max_x = np.max(landmarks[:,0,visibility_mask],axis=1).reshape(-1,1) 236 | # max_y = np.max(landmarks[:,1,visibility_mask],axis=1).reshape(-1,1) 237 | precise_boxes = [] 238 | for i in range(landmarks.shape[0]): 239 | min_x = np.min(landmarks[i,0,np.where(visibility_mask[i,:]==1)[0]]) 240 | min_y = np.min(landmarks[i,1,np.where(visibility_mask[i,:]==1)[0]]) 241 | max_x = np.max(landmarks[i,0,np.where(visibility_mask[i,:]==1)[0]]) 242 | max_y = np.max(landmarks[i,1,np.where(visibility_mask[i,:]==1)[0]]) 243 | precise_boxes.append([min_x,min_y,max_x,max_y]) 244 | precise_boxes = np.asarray(precise_boxes) 245 | 246 | #precise_boxes = np.concatenate([min_x,min_y,max_x,max_y],axis=1) 247 | nms_op_dict = lnms.fast_nms(precise_boxes,nms_threshold) 248 | final_res = {'landmarks':[],'gender':[],'location':[],'pose':[]} 249 | for key in nms_op_dict: 250 | value = nms_op_dict[key] 251 | final_res['gender'].append(np.median(genders[value,:],axis=0)) 252 | final_res['pose'].append(np.median(poses[value,:],axis=0)) 253 | temp = np.median(landmarks[value,:,:],axis=0).T[np.where(np.median(visibility[value,:],axis=0)>0.5)[0],:] 254 | brk() 255 | final_res['landmarks'].append(temp) 256 | y1,x1,y2,x2 = irp.region_proposal(temp,np.where(np.median(visibility[value,:],axis=0)>0.5)[0],ip_img_size) 257 | final_res['location'].append([x1,y1,x2,y2]) 258 | 259 | print "Done" 260 | return final_res 261 | 262 | 263 | 264 | 265 | def print_variables(self): 266 | variables = slim.get_model_variables() 267 | print 'Model Variables:' 268 | for var in variables: 269 | print var.name, ' ', var.get_shape() 270 | 271 | 272 | 273 | 274 | -------------------------------------------------------------------------------- /new_files/selective_search.py: -------------------------------------------------------------------------------- 1 | #!/home/shashank/anaconda2/bin 2 | import dlib 3 | from skimage import io 4 | import cv2 5 | import numpy as np 6 | import tensorflow as tf 7 | import csv 8 | # from multiprocessing import Pool 9 | # from multiprocessing import Manager 10 | # from multiprocessing import Queue 11 | # from multiprocessing.dummy import Pool as ThreadPool 12 | import os 13 | import math 14 | import time 15 | from tqdm import tqdm 16 | from pdb import set_trace as brk 17 | # import pdb 18 | # pdb.set_trace() 19 | 20 | DEBUG_FLAG = False 21 | VIS_FLAG = False 22 | MAKE_TF_RECORD = False 23 | tfrecords_full_filename = 'aflw_test.tfrecords' 24 | 25 | record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_full_filename) 26 | tfrecords_training_pos_filename = 'aflw_training_pos.tfrecords' 27 | tfrecords_training_neg_filename = 'aflw_training_neg.tfrecords' 28 | writer_pos = tf.python_io.TFRecordWriter(tfrecords_training_pos_filename) 29 | writer_neg = tf.python_io.TFRecordWriter(tfrecords_training_neg_filename) 30 | 31 | N_TRAIN = 20000 32 | N_TEST = 1000 33 | 34 | if DEBUG_FLAG: 35 | debug_fp = open('debug.csv','wb') 36 | debug_fp_csv = csv.writer(debug_fp) 37 | 38 | def _bytes_feature(value): 39 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 40 | 41 | def _float_feature(value): 42 | return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) 43 | 44 | def _int64_feature(value): 45 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 46 | 47 | 48 | 49 | 50 | def calc_2D_IOU(bb1,bb2): 51 | top_left_x1 = bb1[0] 52 | top_left_y1 = bb1[1] 53 | bottom_right_x1 = bb1[2] 54 | bottom_right_y1 = bb1[3] 55 | 56 | top_left_x2 = bb2[0] 57 | top_left_y2 = bb2[1] 58 | bottom_right_x2 = bb2[2] 59 | bottom_right_y2 = bb2[3] 60 | 61 | intersect_top_left_x = max(bb1[0],bb2[0]) 62 | intersect_top_left_y = max(bb1[1],bb2[1]) 63 | intersect_bottom_right_x = max(min(bb1[2],bb2[2]),intersect_top_left_x) 64 | intersect_bottom_right_y = max(min(bb1[3],bb2[3]),intersect_top_left_y) 65 | 66 | intersect_area = (intersect_bottom_right_x-intersect_top_left_x)*(intersect_bottom_right_y-intersect_top_left_y) 67 | total_area = (bottom_right_x1-top_left_x1)*(bottom_right_y1-top_left_y1) + (bottom_right_x2-top_left_x2)*(bottom_right_y2-top_left_y2) - intersect_area 68 | iou = float(intersect_area)/float(total_area+0.0) 69 | return iou 70 | 71 | def perform_scale_down(image,max_size_allowed): 72 | 73 | orig_h = image.shape[0] 74 | orig_w = image.shape[1] 75 | 76 | new_h = orig_h 77 | new_w = orig_w 78 | 79 | if new_h > max_size_allowed[1]: 80 | new_w = float(new_w*max_size_allowed[1]) / float(new_h) 81 | new_h = max_size_allowed[1] 82 | 83 | if new_w > max_size_allowed[0]: 84 | new_h = float(new_h*max_size_allowed[0]) / float(new_w) 85 | new_w = max_size_allowed[0] 86 | 87 | if new_h != orig_h or new_w != orig_w: 88 | return cv2.resize(image, (int(new_w), int(new_h))),float(orig_h)/float(new_h) 89 | else: 90 | return image,1.0 91 | 92 | def perform_selective_search(img,ground_truth,gt2): 93 | 94 | rects=[] 95 | 96 | max_size = (500,500) 97 | h = float(img.shape[0]) 98 | w = float(img.shape[1]) 99 | img,scale = perform_scale_down(img,max_size) 100 | dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=1200) 101 | filter_positive_rects=[] 102 | filter_negative_rects_hard=[] 103 | filter_negative_rects_easy=[] 104 | max_negatives = 50 105 | hard_negative_ratio = 0.6 106 | iou_list = [] 107 | filter_negative_rects=[] 108 | for rect in rects: 109 | descaled_top_x = (rect.left()*scale) 110 | descaled_top_y = (rect.top()*scale) 111 | descaled_bottom_x = (rect.right()*scale) 112 | descaled_bottom_y = (rect.bottom()*scale) 113 | descaled_width = descaled_bottom_x - descaled_top_x#int(rect.width()*scale) 114 | descaled_height = descaled_bottom_y - descaled_top_y #int(rect.height()*scale) 115 | descaled_center_x = descaled_top_x + (descaled_width/2.0) 116 | descaled_center_y = descaled_top_y + (descaled_height/2.0) 117 | 118 | #iou,a1,a2 = rect_overlap_rate(gt2,(descaled_top_x,descaled_top_y,descaled_width,descaled_height)) 119 | iou = calc_2D_IOU(ground_truth,(descaled_top_x,descaled_top_y,descaled_bottom_x,descaled_bottom_y)) 120 | 121 | iou_list.append(iou) 122 | if DEBUG_FLAG: 123 | debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()]) 124 | if iou > 0.50: 125 | if VIS_FLAG: 126 | filter_positive_rects.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) 127 | else: 128 | filter_positive_rects.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w, 129 | descaled_center_x,descaled_center_y,descaled_width,descaled_height]) 130 | elif iou <= 0.0: 131 | if VIS_FLAG: 132 | filter_negative_rects.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) 133 | else: 134 | filter_negative_rects.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w, 135 | descaled_center_x,descaled_center_y,descaled_width,descaled_height]) 136 | # elif 0.25 <= iou < 0.35: 137 | # if VIS_FLAG: 138 | # filter_negative_rects_hard.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) 139 | # else: 140 | # filter_negative_rects_hard.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w]) 141 | # elif iou < 0.25: 142 | # if VIS_FLAG: 143 | # filter_negative_rects_easy.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) 144 | # else: 145 | # filter_negative_rects_easy.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w]) 146 | 147 | # if len(filter_negative_rects_easy) + len(filter_negative_rects_hard) < max_negatives: 148 | # filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0) 149 | # filter_negative_rects = filter_negative_rects.tolist() 150 | # else: 151 | # if len(filter_negative_rects_hard) < int(hard_negative_ratio*max_negatives): 152 | # index = np.random.choice(np.arange(len(filter_negative_rects_easy)),max_negatives -len(filter_negative_rects_hard) 153 | # ,replace=False) 154 | # filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist() 155 | # elif len(filter_negative_rects_easy) < int((1-hard_negative_ratio)*max_negatives): 156 | # index = np.random.choice(np.arange(len(filter_negative_rects_hard)),max_negatives -len(filter_negative_rects_easy), 157 | # replace=False) 158 | # filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist() 159 | # else: 160 | # index = np.random.choice(np.arange(len(filter_negative_rects_hard)),int(hard_negative_ratio*max_negatives),replace=False) 161 | # filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist() 162 | # index = np.random.choice(np.arange(len(filter_negative_rects_easy)),int((1-hard_negative_ratio)*max_negatives),replace=False) 163 | # filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist() 164 | # filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0) 165 | # filter_negative_rects = filter_negative_rects.tolist() 166 | 167 | # Jittering the ground truth 168 | 169 | gt_top_x1 = ground_truth[0] 170 | gt_top_y1 = ground_truth[1] 171 | gt_bottom_x2 = ground_truth[2] 172 | gt_bottom_y2 = ground_truth[3] 173 | 174 | gt_w = gt_bottom_x2 - gt_top_x1 175 | gt_h = gt_bottom_y2 - gt_top_y1 176 | 177 | w_list = np.arange(-0.5*gt_w,0.5*gt_w,0.1*gt_w).tolist() 178 | h_list = np.arange(-0.5*gt_h,0.5*gt_h,0.1*gt_h).tolist() 179 | 180 | for w_shift in w_list: 181 | for h_shift in h_list: 182 | new_x1 = gt_top_x1 + w_shift 183 | new_y1 = gt_top_y1 + h_shift 184 | new_x2 = gt_bottom_x2 + w_shift 185 | new_y2 = gt_bottom_y2 + h_shift 186 | 187 | if new_x1 < 0.0: 188 | new_x1 = 0.0 189 | elif new_x1 > w : 190 | new_x1 = w 191 | 192 | if new_y1 < 0.0: 193 | new_y1 = 0.0 194 | elif new_y1 > h : 195 | new_y1 = h 196 | 197 | if new_x2 < 0.0: 198 | new_x2 = 0.0 199 | elif new_x2 > w : 200 | new_x2 = w 201 | 202 | if new_y2 < 0.0: 203 | new_y2 = 0.0 204 | elif new_y2 > h : 205 | new_y2 = h 206 | 207 | iou = calc_2D_IOU(ground_truth,(new_x1,new_y1,new_x2,new_y2)) 208 | if iou > 0.50: 209 | if VIS_FLAG: 210 | filter_positive_rects.append([int(new_x1),int(new_y1),int(new_x2),int(new_y2)]) 211 | else: 212 | descaled_width = new_x2 - new_x1#int(rect.width()*scale) 213 | descaled_height = new_y2 - new_y1#int(rect.height()*scale) 214 | descaled_center_x = new_x1 + (descaled_width/2.0) 215 | descaled_center_y = new_y1 + (descaled_height/2.0) 216 | filter_positive_rects.append([(new_y1)/h,(new_x1)/w,(new_y2)/h,(new_x2)/w, 217 | descaled_center_x,descaled_center_y,descaled_width,descaled_height]) 218 | 219 | if VIS_FLAG: 220 | return filter_positive_rects,filter_negative_rects 221 | else: 222 | return np.asarray(filter_positive_rects).astype(np.float32),np.asarray(filter_negative_rects).astype(np.float32) 223 | 224 | def visualise(img,rects,gt): 225 | 226 | 227 | for rect in rects: 228 | #new_img = img 229 | r,g,b = np.random.randint(0,255,3) 230 | 231 | cv2.rectangle(img,(rect[0],rect[1]),(rect[2],rect[3]),(b,g,r),2) 232 | cv2.imshow('result',img) 233 | cv2.namedWindow('result', cv2.WINDOW_NORMAL) 234 | cv2.resizeWindow('result', 320,240) 235 | 236 | 237 | cv2.rectangle(img,(gt[0],gt[1]),(gt[2],gt[3]),(0,255,0),1) 238 | cv2.imshow('result',img/255.0) 239 | cv2.waitKey(0) 240 | 241 | 242 | 243 | def extract_tfrecord(it): 244 | try: 245 | example = tf.train.Example() 246 | example.ParseFromString(it) 247 | session = tf.Session() 248 | img_string = example.features.feature['image_raw'].bytes_list.value[0] 249 | img_width = int(example.features.feature['width'].int64_list.value[0]) 250 | img_height = int(example.features.feature['height'].int64_list.value[0]) 251 | 252 | img_2d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3) 253 | 254 | loc_x = int(example.features.feature['loc_x'].int64_list.value[0]) 255 | loc_y = int(example.features.feature['loc_y'].int64_list.value[0]) 256 | loc_w = int(example.features.feature['loc_w'].int64_list.value[0]) 257 | loc_h = int(example.features.feature['loc_h'].int64_list.value[0]) 258 | face_id = int(example.features.feature['face_id'].int64_list.value[0]) 259 | 260 | landmark_string = example.features.feature['landmarks'].bytes_list.value[0] 261 | landmarks = np.fromstring(landmark_string, dtype=np.float32).reshape(21,2) 262 | sex = int(example.features.feature['sex'].int64_list.value[0]) 263 | roll = float(example.features.feature['roll'].float_list.value[0]) 264 | pitch = float(example.features.feature['pitch'].float_list.value[0]) 265 | yaw = float(example.features.feature['yaw'].float_list.value[0]) 266 | 267 | hard_postives,hard_negatives = perform_selective_search(img_2d,(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h),(loc_x,loc_y,loc_w,loc_h)) 268 | print "****************************" 269 | if os.path.exists('locations_test/'+str(face_id)): 270 | print face_id 271 | np.save('locations_test/'+str(face_id)+'/positive.npy',hard_postives) 272 | np.save('locations_test/'+str(face_id)+'/negative.npy',hard_negatives) 273 | else: 274 | os.mkdir('locations_test/'+str(face_id)) 275 | np.save('locations_test/'+str(face_id)+'/positive.npy',hard_postives) 276 | np.save('locations_test/'+str(face_id)+'/negative.npy',hard_negatives) 277 | 278 | if VIS_FLAG: 279 | 280 | visualise(img_2d,hard_postives,(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h)) 281 | 282 | if MAKE_TF_RECORD: 283 | 284 | if hard_postives.shape[0] > 0: 285 | 286 | resized_and_cropped_image_pos = tf.image.crop_and_resize(img_2d[np.newaxis,:].astype(np.float32),hard_postives, [0]*hard_postives.shape[0], crop_size=[227,227]).eval(session=session) 287 | np.save('positive.npy',resized_and_cropped_image_pos) 288 | # np.random.shuffle(resized_and_cropped_image_pos) 289 | # resized_and_cropped_image_pos = resized_and_cropped_image_pos[:40,:,:,:] 290 | 291 | # example_pos = tf.train.Example(features=tf.train.Features(feature={ 292 | # 'image_raw':_bytes_feature(resized_and_cropped_image_pos.astype(np.uint8).tostring()), 293 | # 'width': _int64_feature(img_2d.shape[1]), 294 | # 'height': _int64_feature(img_2d.shape[0]), 295 | # 'batch_size': _int64_feature(resized_and_cropped_image_pos.shape[0]), 296 | # 'roll': _float_feature(roll), 297 | # 'pitch':_float_feature(pitch), 298 | # 'yaw':_float_feature(yaw), 299 | # 'landmarks':_bytes_feature(landmarks.tostring()), 300 | # 'gender':_int64_feature(sex), 301 | # 'locations':_bytes_feature(hard_postives.tostring()) 302 | # })) 303 | # writer_pos.write(example_pos.SerializeToString()) 304 | #np.save('pos_files/'+str(face_id)+'.npy',resized_and_cropped_image_pos) 305 | 306 | if hard_negatives.shape[0] > 0: 307 | 308 | resized_and_cropped_image_neg = tf.image.crop_and_resize(img_2d[np.newaxis,:].astype(np.float32),hard_negatives, [0]*hard_negatives.shape[0], crop_size=[227,227]).eval(session=session) 309 | np.save('negative.npy',resized_and_cropped_image_neg) 310 | 311 | # np.random.shuffle(resized_and_cropped_image_neg) 312 | # resized_and_cropped_image_neg = resized_and_cropped_image_neg[:40,:,:,:] 313 | 314 | # example_neg = tf.train.Example(features=tf.train.Features(feature={ 315 | # 'image_raw':_bytes_feature(resized_and_cropped_image_neg.astype(np.uint8).tostring()), 316 | # 'width': _int64_feature(img_2d.shape[1]), 317 | # 'height': _int64_feature(img_2d.shape[0]), 318 | # 'batch_size': _int64_feature(resized_and_cropped_image_neg.shape[0]), 319 | # 'roll': _float_feature(roll), 320 | # 'pitch':_float_feature(pitch), 321 | # 'yaw':_float_feature(yaw), 322 | # 'landmarks':_bytes_feature(landmarks.tostring()), 323 | # 'gender':_int64_feature(sex), 324 | # 'locations':_bytes_feature(hard_negatives.tostring()) 325 | # })) 326 | # writer_neg.write(example_neg.SerializeToString()) 327 | return 1 328 | except Exception as e: 329 | print e 330 | return 0 331 | #np.save('neg_files/'+str(face_id)+'.npy',resized_and_cropped_image_neg) 332 | 333 | def listener(q): 334 | tfrecords_training_pos_filename = 'aflw_training_pos.tfrecords' 335 | tfrecords_training_neg_filename = 'aflw_training_neg.tfrecords' 336 | 337 | writer_pos = tf.python_io.TFRecordWriter(tfrecords_training_pos_filename) 338 | writer_neg = tf.python_io.TFRecordWriter(tfrecords_training_neg_filename) 339 | #f = open('check.txt','wb') 340 | while(1): 341 | m = q.get() 342 | if m == 'kill': 343 | break 344 | writer_pos.write(m.SerializeToString()) 345 | #f.close() 346 | writer_neg.close() 347 | writer_pos.close() 348 | 349 | if __name__ == '__main__': 350 | 351 | #pool = Pool(processes=4) 352 | 353 | # manager = Manager() 354 | # q = manager.Queue() 355 | # watcher = pool.apply_async(listener, (q,)) 356 | 357 | #start_time = time.clock() 358 | # jobs =[] 359 | # for i in range(10): 360 | # job = pool.apply_async(extract_tfrecord,(record_iterator.next(),q)) 361 | # jobs.append(job) 362 | # for job in jobs: 363 | # job.wait() 364 | # job.get() 365 | # q.put('kill') 366 | # pool.close() 367 | # results = [pool.apply_async(extract_tfrecord,args=(record_iterator.next())) for i in range(100)] 368 | # pool.close() 369 | # pool.join() 370 | # results = [p.get() for p in results] 371 | #jobs.append(job) 372 | # for job in jobs: 373 | # job.wait() 374 | # job.get() 375 | 376 | # result = pool.map_async(extract_tfrecord,[record_iterator.next() for i in range(100)]) 377 | # result.get() 378 | #print "Done in {}".format(time.clock() - start_time) 379 | 380 | 381 | while (1): 382 | try: 383 | extract_tfrecord(record_iterator.next()) 384 | 385 | except Exception as e: 386 | print e 387 | break 388 | writer_pos.close() 389 | writer_neg.close() 390 | 391 | 392 | 393 | 394 | 395 | 396 | -------------------------------------------------------------------------------- /new_files/split_tf_record.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import dlib 4 | import cv2 5 | 6 | tf_record_file = 'aflw_train.tfrecords' 7 | 8 | def calc_2D_IOU(bb1,bb2): 9 | top_left_x1 = bb1[0] 10 | top_left_y1 = bb1[1] 11 | bottom_right_x1 = bb1[2] 12 | bottom_right_y1 = bb1[3] 13 | 14 | top_left_x2 = bb2[0] 15 | top_left_y2 = bb2[1] 16 | bottom_right_x2 = bb2[2] 17 | bottom_right_y2 = bb2[3] 18 | 19 | intersect_top_left_x = max(bb1[0],bb2[0]) 20 | intersect_top_left_y = max(bb1[1],bb2[1]) 21 | intersect_bottom_right_x = max(min(bb1[2],bb2[2]),intersect_top_left_x) 22 | intersect_bottom_right_y = max(min(bb1[3],bb2[3]),intersect_top_left_y) 23 | 24 | intersect_area = (intersect_bottom_right_x-intersect_top_left_x+1)*(intersect_bottom_right_y-intersect_top_left_y+1) 25 | total_area = (bottom_right_x1-top_left_x1+1)*(bottom_right_y1-top_left_y1+1) + (bottom_right_x2-top_left_x2+1)*(bottom_right_y2-top_left_y2+1) - intersect_area 26 | iou = float(intersect_area)/float(total_area+0.0) 27 | return iou 28 | 29 | def perform_scale_down(image,max_size_allowed): 30 | 31 | orig_h = image.shape[0] 32 | orig_w = image.shape[1] 33 | 34 | new_h = orig_h 35 | new_w = orig_w 36 | 37 | if new_h > max_size_allowed(1): 38 | new_w = (new_w*max_size_allowed(1)) / (new_h+0.0) 39 | new_h = max_size_allowed(1) 40 | 41 | if new_w > max_size_allowed(0): 42 | new_h = (new_h*max_size_allowed(0)) / (new_w+0.0) 43 | new_w = max_size_allowed(0) 44 | 45 | if new_h != orig_h or new_w != orig_w: 46 | return cv2.resize(image, (int(new_w), int(new_h))) 47 | else: 48 | return image 49 | 50 | 51 | def perform_selective_search(img,w,h,ground_truth): 52 | rects=[] 53 | max_size=(500,500) 54 | img = perform_scale_down(img,max_size) 55 | 56 | dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=2200) 57 | filter_positive_rects=[] 58 | filter_negative_rects=[] 59 | 60 | for rect in rects: 61 | iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom())) 62 | 63 | if DEBUG_FLAG: 64 | debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()]) 65 | if iou > 0.5: 66 | filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) 67 | elif iou < 0.35: 68 | filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) 69 | 70 | return np.asarray(filter_positive_rects),np.asarray(filter_negative_rects) 71 | 72 | def split_(filename_queue): 73 | reader = tf.TFRecordReader() 74 | _, serialized_example = reader.read(filename_queue) 75 | 76 | features = tf.parse_single_example( 77 | serialized_example, 78 | features={ 79 | 'image_raw':tf.FixedLenFeature([], tf.string), 80 | 'width': tf.FixedLenFeature([], tf.int64), 81 | 'height': tf.FixedLenFeature([], tf.int64), 82 | 'batch_size':tf.FixedLenFeature([], tf.int64) 83 | # 'roll':tf.FixedLenFeature([], tf.float32), 84 | # 'pitch':tf.FixedLenFeature([], tf.float32), 85 | # 'yaw':tf.FixedLenFeature([], tf.float32), 86 | # 'gender':tf.FixedLenFeature([], tf.int64), 87 | # 'roll':tf.FixedLenFeature([], tf.float32), 88 | # 'roll':tf.FixedLenFeature([], tf.float32), 89 | # 'landmarks':tf.FixedLenFeature([], tf.string), 90 | # 'locations':tf.FixedLenFeature([], tf.string) 91 | }) 92 | 93 | image = tf.decode_raw(features['image_raw'], tf.uint8) 94 | # locations = tf.decode_raw(features['locations'], tf.float32) 95 | # landmarks = tf.decode_raw(features['landmarks'], tf.float32) 96 | 97 | batch_size = tf.cast(features['batch_size'], tf.int32) 98 | orig_height = tf.cast(features['height'], tf.int32) 99 | orig_width = tf.cast(features['width'], tf.int32) 100 | 101 | image_shape = tf.pack([batch_size,227,227,3]) 102 | 103 | image_tf = tf.reshape(image,image_shape) 104 | 105 | #resized_image = tf.image.resize_image_with_crop_or_pad(image_tf,target_height=500,target_width=500) 106 | 107 | # image_shape = tf.pack([height, width, 3]) 108 | # image = tf.reshape(image, image_shape) 109 | # boxes,box_ind = perform_selective_search(,tf.cast(width,tf.float32),tf.cast(height,tf.float32),(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h)) 110 | 111 | # resized_and_cropped_image = tf.image.crop_and_resize(image, boxes, box_ind, crop_size=[227,227]) 112 | 113 | images = tf.train.shuffle_batch([image_tf],enqueue_many=True,batch_size=32,num_threads=1,capacity=50000,min_after_dequeue=10000) 114 | 115 | return images 116 | def split_spn(filename_queue): 117 | reader = tf.TFRecordReader() 118 | _, serialized_example = reader.read(filename_queue) 119 | 120 | features = tf.parse_single_example( 121 | serialized_example, 122 | features={ 123 | 'image_raw':tf.FixedLenFeature([], tf.string), 124 | 'width': tf.FixedLenFeature([], tf.int64), 125 | 'height': tf.FixedLenFeature([], tf.int64), 126 | 'loc_x': tf.FixedLenFeature([], tf.int64), 127 | 'loc_y': tf.FixedLenFeature([], tf.int64), 128 | 'loc_w': tf.FixedLenFeature([], tf.int64), 129 | 'loc_h': tf.FixedLenFeature([], tf.int64) 130 | }) 131 | 132 | image = tf.decode_raw(features['image_raw'], tf.uint8) 133 | 134 | height = tf.cast(features['height'], tf.int32) 135 | width = tf.cast(features['width'], tf.int32) 136 | loc_x = tf.cast(features['loc_x'], tf.float32) 137 | loc_y = tf.cast(features['loc_y'], tf.float32) 138 | loc_w = tf.cast(features['loc_w'], tf.float32) 139 | loc_h = tf.cast(features['loc_h'], tf.float32) 140 | 141 | image_shape = tf.pack([height, width, 3]) 142 | image_1 = tf.reshape(image, image_shape) 143 | image_shape = tf.pack([1,height, width, 3]) 144 | image_2 = tf.cast(tf.reshape(image, image_shape),tf.float32) 145 | height = tf.cast(features['height'], tf.float32) 146 | width = tf.cast(features['width'], tf.float32) 147 | crop_index = tf.pack([[tf.divide(loc_y,height),tf.divide(loc_x,width),tf.divide(loc_y+loc_h,height),tf.divide(loc_w+loc_x,width)]]) 148 | #boxes,box_ind = perform_selective_search(,tf.cast(width,tf.float32),tf.cast(height,tf.float32),(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h)) 149 | 150 | resized_image = tf.image.resize_image_with_crop_or_pad(image=image_1,target_height=500,target_width=500) 151 | resized_and_cropped_image = tf.image.crop_and_resize(image_2,crop_index,[0]*1,crop_size=[227,227]) 152 | orig_images,cropped_images = tf.train.shuffle_batch([resized_image,resized_and_cropped_image],batch_size=10,num_threads=1,capacity=50,min_after_dequeue=10) 153 | 154 | return orig_images,cropped_images 155 | filename_queue = tf.train.string_input_producer([tf_record_file], num_epochs=1) 156 | 157 | ip1,ip2 = split_spn(filename_queue) 158 | 159 | init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()) 160 | 161 | print "Model Done" 162 | with tf.Session() as sess: 163 | sess.run(init_op) 164 | coord = tf.train.Coordinator() 165 | threads = tf.train.start_queue_runners(coord=coord) 166 | op = sess.run([ip1,ip2]) 167 | 168 | output = np.asarray(op[0]) 169 | #print output.shape 170 | for i in range(output.shape[0]): 171 | cv2.imshow('result',output[i,:,:,:]/255.0) 172 | cv2.waitKey(0) 173 | break 174 | output = np.asarray(op[1]) 175 | #print output.shape 176 | for i in range(output.shape[0]): 177 | cv2.imshow('result',output[i,0,:,:,:]/255.0) 178 | cv2.waitKey(0) 179 | break 180 | coord.request_stop() 181 | coord.join(threads) -------------------------------------------------------------------------------- /new_files/vis.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from pdb import set_trace as brk 3 | def vis_results(img,res_dict): 4 | 5 | for i in range(len(res_dict['location'])): 6 | cv2.rectangle(img,(int(res_dict['location'][i][0]),int(res_dict['location'][i][1])),(int(res_dict['location'][i][2]), 7 | int(res_dict['location'][i][3])),(0,255,0),2) 8 | for j in range(res_dict['landmarks'][i].shape[0]): 9 | print (int(res_dict['landmarks'][i][j,0]),int(res_dict['landmarks'][i][j,1])) 10 | cv2.circle(img,(int(res_dict['landmarks'][i][j,0]),int(res_dict['landmarks'][i][j,1])), 1, (0,0,255), 2) 11 | #Write M for male, F for Female 12 | center_x = int(int(res_dict['location'][i][0]) + (int(res_dict['location'][i][2]) - int(res_dict['location'][i][0]))) 13 | center_y = int(int(res_dict['location'][i][1]) + (int(res_dict['location'][i][3]) - int(res_dict['location'][i][1]))) 14 | 15 | if res_dict['gender'][i][0] < 0.5: 16 | #cv2.putText(img,'M',(center_x,center_y), cv2.FONT_HERSHEY_SIMPLEX, 1,(153,0,76),2,cv2.LINE_AA) 17 | cv2.putText(img,'M',(center_x,center_y), cv2.FONT_HERSHEY_SIMPLEX, 1,(127,0,255),2,cv2.LINE_AA) 18 | elif res_dict['gender'][i][0] >= 0.5: 19 | cv2.putText(img,'F',(center_x,center_y), cv2.FONT_HERSHEY_SIMPLEX, 1,(127,0,255),2,cv2.LINE_AA) 20 | 21 | cv2.imshow('result',img/255.0) 22 | cv2.waitKey(0) -------------------------------------------------------------------------------- /results_analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | np.set_printoptions(linewidth=200) 3 | from sklearn.metrics import roc_curve, precision_recall_curve 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def softmax(x): 8 | return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True) 9 | 10 | 11 | pred = np.load('test_results.npy') 12 | truth = np.load('truth.npy') 13 | 14 | print pred.shape, truth.shape 15 | 16 | 17 | prob = softmax(pred[:,:2]) 18 | # print prob.sum(axis = 1) 19 | print pred 20 | 21 | 22 | print 'Detection accuracy: ', np.sum(np.argmax(prob, axis = 1) == truth[:,0].astype(np.bool))/float(prob.shape[0]) 23 | 24 | print 'Gender accuracy: ', np.sum(np.argmax(softmax(pred[:,68:70]), axis = 1) == truth[:,-1].astype(np.bool))/float(prob.shape[0]) 25 | 26 | # detection 27 | fpr, tpr, thresholds = roc_curve(truth[:,0], prob[:,1]) 28 | precision, recall, th = precision_recall_curve(truth[:,0], prob[:,1]) 29 | 30 | # plt.plot(recall, precision) 31 | # plt.xlabel('Recall') 32 | # plt.ylabel('Precision') 33 | # # plt.savefig('detection_pr_re.eps', format='eps', dpi=1000) 34 | # plt.show() 35 | 36 | 37 | # plt.plot(fpr, tpr) 38 | # plt.xlabel('False positive rate') 39 | # plt.ylabel('True positive rate') 40 | # # plt.savefig('detection_roc.eps', format='eps', dpi=1000) 41 | # plt.show() 42 | 43 | -------------------------------------------------------------------------------- /selective_search.py: -------------------------------------------------------------------------------- 1 | #!/home/shashank/anaconda2/bin 2 | import dlib 3 | from skimage import io 4 | import cv2 5 | import numpy as np 6 | import tensorflow as tf 7 | import csv 8 | from multiprocessing import Pool 9 | from multiprocessing.dummy import Pool as ThreadPool 10 | 11 | pool = ThreadPool(4) 12 | DEBUG_FLAG = False 13 | tfrecords_full_filename = 'aflw.tfrecords' 14 | tfrecords_training_filename = 'aflw_training.tfrecords' 15 | writer = tf.python_io.TFRecordWriter(tfrecords_training_filename) 16 | 17 | if DEBUG_FLAG: 18 | debug_fp = open('debug.csv','wb') 19 | debug_fp_csv = csv.writer(debug_fp) 20 | 21 | def calc_2D_IOU(bb1,bb2): 22 | top_left_x1 = bb1[0] 23 | top_left_y1 = bb1[1] 24 | bottom_right_x1 = bb1[2] 25 | bottom_right_y1 = bb1[3] 26 | 27 | top_left_x2 = bb2[0] 28 | top_left_y2 = bb2[1] 29 | bottom_right_x2 = bb2[2] 30 | bottom_right_y2 = bb2[3] 31 | 32 | intersect_top_left_x = max(bb1[0],bb2[0]) 33 | intersect_top_left_y = max(bb1[1],bb2[1]) 34 | intersect_bottom_right_x = min(bb1[2],bb2[2]) 35 | intersect_bottom_right_y = min(bb1[3],bb2[3]) 36 | 37 | intersect_area = (intersect_bottom_right_x-intersect_top_left_x+1)*(intersect_bottom_right_y-intersect_top_left_y+1) 38 | total_area = (bottom_right_x1-top_left_x1+1)*(bottom_right_y1-top_left_y1+1) + (bottom_right_x2-top_left_x2+1)*(bottom_right_y2-top_left_y2+1) - intersect_area 39 | iou = float(intersect_area)/float(total_area+0.0) 40 | return iou 41 | 42 | 43 | def perform_selective_search(img,ground_truth): 44 | rects=[] 45 | dlib.find_candidate_object_locations(img, rects, min_size=500) 46 | filter_positive_rects=[] 47 | filter_negative_rects=[] 48 | 49 | for rect in rects: 50 | iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom())) 51 | 52 | if DEBUG_FLAG: 53 | debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()]) 54 | if iou > 0.5: 55 | filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) 56 | elif iou < 0.35: 57 | filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) 58 | 59 | return filter_positive_rects,filter_negative_rects 60 | 61 | def visualise(img,rects): 62 | cv2.namedWindow('result', cv2.WINDOW_NORMAL) 63 | cv2.resizeWindow('result', 600,600) 64 | for rect in rects: 65 | cv2.rectangle(img,(rect.left(),rect.top()),(rect.right(),rect.bottom()),(0,255,0),2) 66 | 67 | cv2.imshow('result',img) 68 | cv2.waitKey(0) 69 | 70 | 71 | def extract_tfrecord(): 72 | record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_full_filename) 73 | 74 | for string_record in record_iterator: 75 | example = tf.train.Example() 76 | example.ParseFromString(string_record) 77 | 78 | img_string = example.features.feature['image_raw'].bytes_list.value[0] 79 | img_width = int(example.features.feature['width'].int64_list.value[0]) 80 | img_height = int(example.features.feature['height'].int64_list.value[0]) 81 | img_2d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3) 82 | loc_x = int(example.features.feature['loc_x'].int64_list.value[0]) 83 | loc_y = int(example.features.feature['loc_y'].int64_list.value[0]) 84 | loc_w = int(example.features.feature['loc_w'].int64_list.value[0]) 85 | loc_h = int(example.features.feature['loc_h'].int64_list.value[0]) 86 | hard_postives,hard_negatives = perform_selective_search(img_2d,(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h)) 87 | 88 | resized_and_cropped_image = tf.image.crop_and_resize(img_2d[np.newaxis,:], boxes, [0]*hard_postives.shape[0], crop_size=[227,227]) 89 | break 90 | #visualise(img_2d,hard_postives) 91 | break 92 | 93 | 94 | if __name__ == '__main__': 95 | extract_tfrecord() 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /split_tf_record.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import dlib 4 | from pdb import set_trace as brk 5 | 6 | tf_record_file = 'aflw_train.tfrecords' 7 | 8 | def calc_2D_IOU(bb1,bb2): 9 | top_left_x1 = bb1[0] 10 | top_left_y1 = bb1[1] 11 | bottom_right_x1 = bb1[2] 12 | bottom_right_y1 = bb1[3] 13 | 14 | top_left_x2 = bb2[0] 15 | top_left_y2 = bb2[1] 16 | bottom_right_x2 = bb2[2] 17 | bottom_right_y2 = bb2[3] 18 | 19 | intersect_top_left_x = max(bb1[0],bb2[0]) 20 | intersect_top_left_y = max(bb1[1],bb2[1]) 21 | intersect_bottom_right_x = min(bb1[2],bb2[2]) 22 | intersect_bottom_right_y = min(bb1[3],bb2[3]) 23 | 24 | intersect_area = (intersect_bottom_right_x-intersect_top_left_x+1)*(intersect_bottom_right_y-intersect_top_left_y+1) 25 | total_area = (bottom_right_x1-top_left_x1+1)*(bottom_right_y1-top_left_y1+1) + (bottom_right_x2-top_left_x2+1)*(bottom_right_y2-top_left_y2+1) - intersect_area 26 | iou = float(intersect_area)/float(total_area+0.0) 27 | return iou 28 | 29 | 30 | def perform_selective_search(img,w,h,ground_truth): 31 | print "Came:" 32 | rects=[] 33 | dlib.find_candidate_object_locations(img, rects, min_size=500) 34 | filter_positive_rects=[] 35 | filter_negative_rects=[] 36 | 37 | for rect in rects: 38 | iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom())) 39 | 40 | if DEBUG_FLAG: 41 | debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()]) 42 | if iou > 0.5: 43 | filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) 44 | elif iou < 0.35: 45 | filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) 46 | 47 | return np.asarray(filter_positive_rects),np.asarray(filter_negative_rects) 48 | 49 | def split_(filename_queue, sess): 50 | brk() 51 | reader = tf.TFRecordReader() 52 | _, serialized_example = reader.read(filename_queue) 53 | 54 | features = tf.parse_single_example( 55 | serialized_example, 56 | features={ 57 | 'image_raw':tf.FixedLenFeature([], tf.string), 58 | 'width': tf.FixedLenFeature([], tf.int64), 59 | 'height': tf.FixedLenFeature([], tf.int64), 60 | 'loc_x': tf.FixedLenFeature([], tf.int64), 61 | 'loc_y': tf.FixedLenFeature([], tf.int64), 62 | 'loc_w': tf.FixedLenFeature([], tf.int64), 63 | 'loc_h': tf.FixedLenFeature([], tf.int64) 64 | }) 65 | 66 | image = tf.decode_raw(features['image_raw'], tf.uint8) 67 | 68 | height = tf.cast(features['height'], tf.int32) 69 | width = tf.cast(features['width'], tf.int32) 70 | loc_x = tf.cast(features['loc_x'], tf.int32) 71 | loc_y = tf.cast(features['loc_y'], tf.int32) 72 | loc_w = tf.cast(features['loc_w'], tf.int32) 73 | loc_h = tf.cast(features['loc_h'], tf.int32) 74 | 75 | image_shape = tf.pack([height, width, 3]) 76 | image = tf.reshape(image, image_shape) 77 | height,width,loc_x,loc_y,loc_h,loc_w = sess.run([height,width,loc_x,loc_y,loc_h,loc_w]) 78 | # boxes,box_ind = perform_selective_search(,tf.cast(width,tf.float32),tf.cast(height,tf.float32),(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h)) 79 | boxes = np.asarray([[loc_y/float(height),loc_x/float(width),(loc_y+loc_h)/float(height),(loc_x+loc_w)/float(width)]]) 80 | resized_and_cropped_image = tf.image.crop_and_resize(image.astype(np.float32), boxes.astype(np.float32), [0]*1, crop_size=[227,227]) 81 | 82 | 83 | images = tf.train.shuffle_batch([resized_and_cropped_image],batch_size=10,num_threads=1,capacity=50,min_after_dequeue=10) 84 | 85 | return images 86 | 87 | filename_queue = tf.train.string_input_producer([tf_record_file], num_epochs=1) 88 | 89 | 90 | 91 | init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()) 92 | 93 | print "model done" 94 | 95 | with tf.Session() as sess: 96 | 97 | sess.run(init_op) 98 | images = split_(filename_queue, sess) 99 | 100 | coord = tf.train.Coordinator() 101 | threads = tf.train.start_queue_runners(coord=coord) 102 | op_images = sess.run([images]) 103 | print np.asarray(op_images).shape 104 | 105 | coord.request_stop() 106 | coord.join(threads) -------------------------------------------------------------------------------- /test_results.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/test_results.npy -------------------------------------------------------------------------------- /truth.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/truth.npy -------------------------------------------------------------------------------- /version_0.0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/version_0.0.txt -------------------------------------------------------------------------------- /with SPN/logs/events.out.tfevents.1494397553.shashanks-mbp.dynamic.ucsd.edu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/with SPN/logs/events.out.tfevents.1494397553.shashanks-mbp.dynamic.ucsd.edu -------------------------------------------------------------------------------- /with SPN/main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from model import * 4 | 5 | weights_path = '/Users/shashank/Tensorflow/SPN/weights/' 6 | imgs_path = '/Users/shashank/Tensorflow/CSE252C-Hyperface/git/truth_data.npy' 7 | tf_record_file_path = '../aflw_train.tfrecords' 8 | if not os.path.exists('./logs'): 9 | os.makedirs('./logs') 10 | 11 | map(os.unlink, (os.path.join( './logs',f) for f in os.listdir('./logs')) ) 12 | 13 | 14 | 15 | with tf.Session() as sess: 16 | print 'Building Graph...' 17 | model = Network(sess,tf_record_file_path) 18 | print 'Done!\nInitializing variables...' 19 | sess.run(tf.global_variables_initializer()) 20 | print 'Done!' 21 | model.train() -------------------------------------------------------------------------------- /with SPN/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | from spatial_transformer import transformer 5 | from tqdm import tqdm 6 | from pdb import set_trace as brk 7 | import time 8 | 9 | class Network(object): 10 | 11 | def __init__(self, sess,tf_record_file_path=None): 12 | 13 | self.sess = sess 14 | self.batch_size = 2 15 | self.img_height = 500 16 | self.img_width = 500 17 | self.out_height = 227 18 | self.out_width = 227 19 | self.channel = 3 20 | 21 | self.num_epochs = 10 22 | 23 | # Hyperparameters 24 | self.weight_detect = 1 25 | self.weight_landmarks = 5 26 | self.weight_visibility = 0.5 27 | self.weight_pose = 5 28 | self.weight_gender = 2 29 | 30 | #tf_Record Paramters 31 | self.filename_queue = tf.train.string_input_producer([tf_record_file_path], num_epochs=self.num_epochs) 32 | self.build_network() 33 | 34 | 35 | def build_network(self): 36 | 37 | self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images') 38 | self.detection = tf.placeholder(tf.float32, [self.batch_size,2], name='detection') 39 | self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks') 40 | self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility') 41 | self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose') 42 | self.gender = tf.placeholder(tf.float32, [self.batch_size,2], name='gender') 43 | 44 | 45 | theta = self.localization_squeezenet(self.X) 46 | self.T_mat = tf.reshape(theta, [-1, 2,3]) 47 | self.cropped = transformer(self.X, self.T_mat, [self.out_height, self.out_width]) 48 | 49 | net_output = self.hyperface(self.cropped) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender) 50 | 51 | 52 | loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[0], self.detection)) 53 | 54 | visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1]) 55 | loss_landmarks = tf.reduce_mean(tf.square(visibility_mask*(net_output[1] - self.landmarks))) 56 | 57 | loss_visibility = tf.reduce_mean(tf.square(net_output[2] - self.visibility)) 58 | loss_pose = tf.reduce_mean(tf.square(net_output[3] - self.pose)) 59 | loss_gender = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[4], self.gender)) 60 | 61 | self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks \ 62 | + self.weight_visibility*loss_visibility + self.weight_pose*loss_pose \ 63 | + self.weight_gender*loss_gender 64 | 65 | 66 | 67 | def get_transformation_matrix(self, theta): 68 | with tf.name_scope('T_matrix'): 69 | theta = tf.expand_dims(theta, 2) 70 | mat = tf.constant(np.repeat(np.array([[[1,0,0],[0,0,0],[0,1,0],[0,0,0],[0,1,0],[0,0,1]]]), 71 | self.batch_size, axis=0), dtype=tf.float32) 72 | tr_matrix = tf.squeeze(tf.matmul(mat, theta)) 73 | 74 | return tr_matrix 75 | 76 | 77 | 78 | def train(self): 79 | 80 | optimizer = tf.train.AdamOptimizer().minimize(self.loss) 81 | 82 | writer = tf.summary.FileWriter('./logs', self.sess.graph) 83 | loss_summ = tf.summary.scalar('loss', self.loss) 84 | img_summ = tf.summary.image('cropped_image', self.cropped) 85 | 86 | 87 | tic = time.time() 88 | print self.sess.run(self.T_mat, feed_dict={self.X: np.random.randn(self.batch_size, self.img_height, self.img_width, self.channel)}) 89 | toc = time.time() 90 | print toc-tic 91 | images = self.load_from_tfRecord(self.filename_queue) 92 | 93 | coord = tf.train.Coordinator() 94 | threads = tf.train.start_queue_runners(sess = self.sess, coord = coord) 95 | 96 | for i in xrange(2): 97 | img_batch = self.sess.run(images) 98 | print img_batch.shape 99 | 100 | 101 | def hyperface(self,inputs, reuse = False): 102 | 103 | if reuse: 104 | tf.get_variable_scope().reuse_variables() 105 | 106 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 107 | activation_fn = tf.nn.relu, 108 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ): 109 | 110 | conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') 111 | max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') 112 | 113 | conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a') 114 | 115 | conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') 116 | max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') 117 | conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') 118 | 119 | conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a') 120 | 121 | conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') 122 | conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') 123 | pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') 124 | 125 | concat_feat = tf.concat(3, [conv1a, conv3a, pool5]) 126 | conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all') 127 | 128 | shape = int(np.prod(conv_all.get_shape()[1:])) 129 | fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full') 130 | 131 | fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1') 132 | fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1') 133 | fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1') 134 | fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1') 135 | fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1') 136 | 137 | out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn = None) 138 | out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn = None) 139 | out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn = None) 140 | out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn = None) 141 | out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn = None) 142 | 143 | return [tf.nn.softmax(out_detection), out_landmarks, out_visibility, out_pose, tf.nn.softmax(out_gender)] 144 | 145 | 146 | 147 | def localization_VGG16(self,inputs): 148 | 149 | with tf.variable_scope('localization_network'): 150 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 151 | activation_fn = tf.nn.relu, 152 | weights_initializer = tf.constant_initializer(0.0)): 153 | 154 | net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') 155 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 156 | net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') 157 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 158 | net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') 159 | net = slim.max_pool2d(net, [2, 2], scope='pool3') 160 | net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') 161 | net = slim.max_pool2d(net, [2, 2], scope='pool4') 162 | net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') 163 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 164 | shape = int(np.prod(net.get_shape()[1:])) 165 | 166 | net = slim.fully_connected(tf.reshape(net, [-1, shape]), 4096, scope='fc6') 167 | net = slim.fully_connected(net, 1024, scope='fc7') 168 | identity = np.array([[1., 0., 0.], 169 | [0., 1., 0.]]) 170 | identity = identity.flatten() 171 | net = slim.fully_connected(net, 6, biases_initializer = tf.constant_initializer(identity) , scope='fc8') 172 | 173 | return net 174 | 175 | 176 | def localization_squeezenet(self, inputs): 177 | 178 | with tf.variable_scope('localization_network'): 179 | with slim.arg_scope([slim.conv2d], activation_fn = tf.nn.relu, 180 | padding = 'SAME', 181 | weights_initializer = tf.constant_initializer(0.0)): 182 | 183 | conv1 = slim.conv2d(inputs, 64, [3,3], 2, padding = 'VALID', scope='conv1') 184 | pool1 = slim.max_pool2d(conv1, [2,2], 2, scope='pool1') 185 | fire2 = self.fire_module(pool1, 16, 64, scope = 'fire2') 186 | fire3 = self.fire_module(fire2, 16, 64, scope = 'fire3', res_connection=True) 187 | fire4 = self.fire_module(fire3, 32, 128, scope = 'fire4') 188 | pool4 = slim.max_pool2d(fire4, [2,2], 2, scope='pool4') 189 | fire5 = self.fire_module(pool4, 32, 128, scope = 'fire5', res_connection=True) 190 | fire6 = self.fire_module(fire5, 48, 192, scope = 'fire6') 191 | fire7 = self.fire_module(fire6, 48, 192, scope = 'fire7', res_connection=True) 192 | fire8 = self.fire_module(fire7, 64, 256, scope = 'fire8') 193 | pool8 = slim.max_pool2d(fire8, [2,2], 2, scope='pool8') 194 | fire9 = self.fire_module(pool8, 64, 256, scope = 'fire9', res_connection=True) 195 | conv10 = slim.conv2d(fire9, 128, [1,1], 1, scope='conv10') 196 | shape = int(np.prod(conv10.get_shape()[1:])) 197 | identity = np.array([[1., 0., 0.], 198 | [0., 1., 0.]]) 199 | identity = identity.flatten() 200 | fc11 = slim.fully_connected(tf.reshape(conv10, [-1, shape]), 6, biases_initializer = tf.constant_initializer(identity), scope='fc11') 201 | return fc11 202 | 203 | 204 | def fire_module(self, inputs, s_channels, e_channels, scope, res_connection = False): 205 | with tf.variable_scope(scope): 206 | sq = self.squeeze(inputs, s_channels, 'squeeze') 207 | ex = self.expand(sq, e_channels, 'expand') 208 | if res_connection: 209 | ret = tf.nn.relu(tf.add(inputs,ex)) 210 | else: 211 | ret = tf.nn.relu(ex) 212 | return ret 213 | 214 | 215 | def squeeze(self, inputs, channels, scope): 216 | with slim.arg_scope([slim.conv2d], activation_fn = None, 217 | padding = 'SAME', 218 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): 219 | sq = slim.conv2d(inputs, channels, [1,1], 1, scope = scope) 220 | return sq 221 | 222 | def expand(self, inputs, channels, scope): 223 | with slim.arg_scope([slim.conv2d], activation_fn = None, 224 | padding = 'SAME', 225 | weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): 226 | with tf.variable_scope(scope): 227 | e1x1 = slim.conv2d(inputs, channels, [1,1], 1, scope='e1x1') 228 | e3x3 = slim.conv2d(inputs, channels, [3,3], 1, scope='e3x3') 229 | expand = tf.concat(3, [e1x1, e3x3]) 230 | 231 | return expand 232 | 233 | 234 | 235 | def predict(self, imgs_path): 236 | print 'Running inference...' 237 | np.set_printoptions(suppress=True) 238 | imgs = (np.load(imgs_path) - 127.5)/128.0 239 | shape = imgs.shape 240 | self.X = tf.placeholder(tf.float32, [shape[0], self.img_height, self.img_width, self.channel], name='images') 241 | pred = self.network(self.X, reuse = True) 242 | 243 | net_preds = self.sess.run(pred, feed_dict={self.X: imgs}) 244 | 245 | print net_preds[-1] 246 | import matplotlib.pyplot as plt 247 | plt.imshow(imgs[-1]);plt.show() 248 | 249 | brk() 250 | 251 | def load_from_tfRecord(self,filename_queue,resize_size=None): 252 | 253 | reader = tf.TFRecordReader() 254 | _, serialized_example = reader.read(filename_queue) 255 | 256 | features = tf.parse_single_example( 257 | serialized_example, 258 | features={ 259 | 'image_raw':tf.FixedLenFeature([], tf.string), 260 | 'width': tf.FixedLenFeature([], tf.int64), 261 | 'height': tf.FixedLenFeature([], tf.int64) 262 | }) 263 | 264 | image = tf.decode_raw(features['image_raw'], tf.float32) 265 | orig_height = tf.cast(features['height'], tf.int32) 266 | orig_width = tf.cast(features['width'], tf.int32) 267 | 268 | image_shape = tf.pack([orig_height,orig_width,3]) 269 | image_tf = tf.reshape(image,image_shape) 270 | print image_shape 271 | resized_image = tf.image.resize_image_with_crop_or_pad(image_tf,target_height=resize_size[1],target_width=resize_size[0]) 272 | 273 | images = tf.train.shuffle_batch([resized_image],batch_size=self.batch_size,num_threads=1,capacity=50,min_after_dequeue=10) 274 | 275 | return images 276 | 277 | 278 | 279 | def load_weights(self, path): 280 | variables = slim.get_model_variables() 281 | print 'Loading weights...' 282 | for var in tqdm(variables): 283 | if ('conv' in var.name) and ('weights' in var.name): 284 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').transpose((2,3,1,0)))) 285 | elif ('fc' in var.name) and ('weights' in var.name): 286 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').T)) 287 | elif 'biases' in var.name: 288 | self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/b.npy'))) 289 | print 'Weights loaded!!' 290 | 291 | def print_variables(self): 292 | variables = slim.get_model_variables() 293 | print 'Model Variables:\n' 294 | for var in variables: 295 | print var.name, ' ', var.get_shape() 296 | 297 | 298 | 299 | 300 | -------------------------------------------------------------------------------- /with SPN/model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/with SPN/model.pyc -------------------------------------------------------------------------------- /with SPN/spatial_transformer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | import tensorflow as tf 16 | 17 | 18 | def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs): 19 | """Spatial Transformer Layer 20 | 21 | Implements a spatial transformer layer as described in [1]_. 22 | Based on [2]_ and edited by David Dao for Tensorflow. 23 | 24 | Parameters 25 | ---------- 26 | U : float 27 | The output of a convolutional net should have the 28 | shape [num_batch, height, width, num_channels]. 29 | theta: float 30 | The output of the 31 | localisation network should be [num_batch, 6]. 32 | out_size: tuple of two ints 33 | The size of the output of the network (height, width) 34 | 35 | References 36 | ---------- 37 | .. [1] Spatial Transformer Networks 38 | Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu 39 | Submitted on 5 Jun 2015 40 | .. [2] https://github.com/skaae/transformer_network/blob/master/transformerlayer.py 41 | 42 | Notes 43 | ----- 44 | To initialize the network to the identity transform init 45 | ``theta`` to : 46 | identity = np.array([[1., 0., 0.], 47 | [0., 1., 0.]]) 48 | identity = identity.flatten() 49 | theta = tf.Variable(initial_value=identity) 50 | 51 | """ 52 | 53 | def _repeat(x, n_repeats): 54 | with tf.variable_scope('_repeat'): 55 | rep = tf.transpose(tf.expand_dims(tf.ones(shape=tf.pack([n_repeats, ])), 1), [1, 0]) 56 | rep = tf.cast(rep, 'int32') 57 | x = tf.matmul(tf.reshape(x, (-1, 1)), rep) 58 | return tf.reshape(x, [-1]) 59 | 60 | def _interpolate(im, x, y, out_size): 61 | with tf.variable_scope('_interpolate'): 62 | # constants 63 | # num_batch = tf.shape(im)[0] 64 | # height = tf.shape(im)[1] 65 | # width = tf.shape(im)[2] 66 | # channels = tf.shape(im)[3] 67 | 68 | shape = im.get_shape() 69 | num_batch = shape[0] 70 | height = shape[1] 71 | width = shape[2] 72 | channels = shape[3] 73 | 74 | 75 | 76 | 77 | x = tf.cast(x, 'float32') 78 | y = tf.cast(y, 'float32') 79 | height_f = tf.cast(height, 'float32') 80 | width_f = tf.cast(width, 'float32') 81 | out_height = out_size[0] 82 | out_width = out_size[1] 83 | zero = tf.zeros([], dtype='int32') 84 | max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') 85 | max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') 86 | 87 | # scale indices from [-1, 1] to [0, width/height] 88 | x = (x + 1.0)*(width_f) / 2.0 89 | y = (y + 1.0)*(height_f) / 2.0 90 | 91 | # do sampling 92 | x0 = tf.cast(tf.floor(x), 'int32') 93 | x1 = x0 + 1 94 | y0 = tf.cast(tf.floor(y), 'int32') 95 | y1 = y0 + 1 96 | 97 | x0 = tf.clip_by_value(x0, zero, max_x) 98 | x1 = tf.clip_by_value(x1, zero, max_x) 99 | y0 = tf.clip_by_value(y0, zero, max_y) 100 | y1 = tf.clip_by_value(y1, zero, max_y) 101 | dim2 = width 102 | dim1 = width*height 103 | base = _repeat(tf.range(num_batch)*dim1, out_height*out_width) 104 | base_y0 = base + y0*dim2 105 | base_y1 = base + y1*dim2 106 | idx_a = base_y0 + x0 107 | idx_b = base_y1 + x0 108 | idx_c = base_y0 + x1 109 | idx_d = base_y1 + x1 110 | 111 | # use indices to lookup pixels in the flat image and restore 112 | # channels dim 113 | im_flat = tf.reshape(im, tf.pack([-1, channels])) 114 | im_flat = tf.cast(im_flat, 'float32') 115 | Ia = tf.gather(im_flat, idx_a) 116 | Ib = tf.gather(im_flat, idx_b) 117 | Ic = tf.gather(im_flat, idx_c) 118 | Id = tf.gather(im_flat, idx_d) 119 | 120 | # and finally calculate interpolated values 121 | x0_f = tf.cast(x0, 'float32') 122 | x1_f = tf.cast(x1, 'float32') 123 | y0_f = tf.cast(y0, 'float32') 124 | y1_f = tf.cast(y1, 'float32') 125 | wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1) 126 | wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1) 127 | wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1) 128 | wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1) 129 | output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id]) 130 | return output 131 | 132 | def _meshgrid(height, width): 133 | with tf.variable_scope('_meshgrid'): 134 | # This should be equivalent to: 135 | # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), 136 | # np.linspace(-1, 1, height)) 137 | # ones = np.ones(np.prod(x_t.shape)) 138 | # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) 139 | x_t = tf.matmul(tf.ones(shape=tf.pack([height, 1])), 140 | tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) 141 | y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), 142 | tf.ones(shape=tf.pack([1, width]))) 143 | 144 | x_t_flat = tf.reshape(x_t, (1, -1)) 145 | y_t_flat = tf.reshape(y_t, (1, -1)) 146 | 147 | ones = tf.ones_like(x_t_flat) 148 | grid = tf.concat(0, [x_t_flat, y_t_flat, ones]) 149 | return grid 150 | 151 | def _transform(theta, input_dim, out_size): 152 | with tf.variable_scope('_transform'): 153 | # num_batch = tf.shape(input_dim)[0] 154 | # height = tf.shape(input_dim)[1] 155 | # width = tf.shape(input_dim)[2] 156 | # num_channels = tf.shape(input_dim)[3] 157 | 158 | shape = input_dim.get_shape() 159 | num_batch = shape[0] 160 | height = shape[1] 161 | width = shape[2] 162 | num_channels = shape[3] 163 | 164 | 165 | 166 | 167 | theta = tf.reshape(theta, (-1, 2, 3)) 168 | theta = tf.cast(theta, 'float32') 169 | 170 | # grid of (x_t, y_t, 1), eq (1) in ref [1] 171 | height_f = tf.cast(height, 'float32') 172 | width_f = tf.cast(width, 'float32') 173 | out_height = out_size[0] 174 | out_width = out_size[1] 175 | grid = _meshgrid(out_height, out_width) 176 | grid = tf.expand_dims(grid, 0) 177 | grid = tf.reshape(grid, [-1]) 178 | grid = tf.tile(grid, tf.pack([num_batch])) 179 | grid = tf.reshape(grid, tf.pack([num_batch, 3, -1])) 180 | 181 | # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) 182 | T_g = tf.batch_matmul(theta, grid) 183 | x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) 184 | y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) 185 | x_s_flat = tf.reshape(x_s, [-1]) 186 | y_s_flat = tf.reshape(y_s, [-1]) 187 | 188 | input_transformed = _interpolate( 189 | input_dim, x_s_flat, y_s_flat, 190 | out_size) 191 | 192 | output = tf.reshape( 193 | input_transformed, tf.pack([num_batch, out_height, out_width, num_channels])) 194 | return output 195 | 196 | with tf.variable_scope(name): 197 | output = _transform(theta, U, out_size) 198 | return output 199 | 200 | 201 | def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer'): 202 | """Batch Spatial Transformer Layer 203 | 204 | Parameters 205 | ---------- 206 | 207 | U : float 208 | tensor of inputs [num_batch,height,width,num_channels] 209 | thetas : float 210 | a set of transformations for each input [num_batch,num_transforms,6] 211 | out_size : int 212 | the size of the output [out_height,out_width] 213 | 214 | Returns: float 215 | Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels] 216 | """ 217 | with tf.variable_scope(name): 218 | num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2]) 219 | indices = [[i]*num_transforms for i in xrange(num_batch)] 220 | input_repeated = tf.gather(U, tf.reshape(indices, [-1])) 221 | return transformer(input_repeated, thetas, out_size) 222 | -------------------------------------------------------------------------------- /with SPN/spatial_transformer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/with SPN/spatial_transformer.pyc --------------------------------------------------------------------------------