├── README.md
├── data_prep.py
├── main.py
├── main_prediction.py
├── model.py
├── model_prediction.py
├── new_files
    ├── data_prep.py
    ├── irp.py
    ├── lnms.py
    ├── main.py
    ├── model.py
    ├── selective_search.py
    ├── split_tf_record.py
    └── vis.py
├── results_analysis.py
├── selective_search.py
├── split_tf_record.py
├── test_results.npy
├── truth.npy
├── version_0.0.txt
└── with SPN
    ├── logs
        └── events.out.tfevents.1494397553.shashanks-mbp.dynamic.ucsd.edu
    ├── main.py
    ├── model.py
    ├── model.pyc
    ├── spatial_transformer.py
    └── spatial_transformer.pyc


/README.md:
--------------------------------------------------------------------------------
1 | # HyperFace
2 | 
3 | A TensorFlow implementation of the following paper:
4 | 
5 | HyperFace: A Deep Multi-task Learning Framework for Face Detection, Landmark Localization, Pose Estimation, and Gender Recognition (https://arxiv.org/abs/1603.01249) 
6 | 
7 | 


--------------------------------------------------------------------------------
/data_prep.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import tensorflow as tf
  3 | #from skimage import io
  4 | import sqlite3
  5 | #import cv2
  6 | import matplotlib.pyplot as plt
  7 | import os
  8 | import random
  9 | from tqdm import tqdm
 10 | 
 11 | # select_string = "faceimages.filepath, faces.face_id, facepose.roll, facepose.pitch, facepose.yaw, facerect.x, facerect.y, facerect.w, facerect.h"
 12 | # from_string = "faceimages, faces, facepose, facerect"
 13 | # where_string = "faces.face_id = facepose.face_id and faces.file_id = faceimages.file_id and faces.face_id = facerect.face_id"
 14 | # query_string = "SELECT " + select_string + " FROM " + from_string + " WHERE " + where_string
 15 | 
 16 | # conn = sqlite3.connect('/home/shashank/Documents/CSE-252C/AFLW/aflw/data/aflw.sqlite')
 17 | # c = conn.cursor()
 18 | 
 19 | img_path = '/home/shashank/Documents/CSE-252C/AFLW/'
 20 | 
 21 | # tfrecords_train_filename = 'aflw_train.tfrecords'
 22 | # tfrecords_test_filename = 'aflw_test.tfrecords'
 23 | tfrecords_filename = 'aflw_train.tfrecords'
 24 | # writer_train = tf.python_io.TFRecordWriter(tfrecords_train_filename)
 25 | # writer_test = tf.python_io.TFRecordWriter(tfrecords_test_filename)
 26 | 
 27 | def _bytes_feature(value):
 28 | 	return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 29 | 
 30 | def _float_feature(value):
 31 | 	return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
 32 | 
 33 | def _int64_feature(value):
 34 | 	return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 35 | 
 36 | def test_names():
 37 | 	l=[]
 38 | 	names = os.listdir(img_path+'0')
 39 | 	random.shuffle(names)
 40 | 	l.append(['0/'+name for name in names[:300]])
 41 | 	
 42 | 	names = os.listdir(img_path+'2')
 43 | 	random.shuffle(names)
 44 | 	l.append(['2/'+name for name in names[:300]])
 45 | 
 46 | 	names = os.listdir(img_path+'3')
 47 | 	random.shuffle(names)
 48 | 	l.append(['3/'+name for name in names[:400]])
 49 | 
 50 | 	return l[0]+l[1]+l[2]
 51 | 
 52 | def make_tfrecord(test_images):
 53 | 
 54 | 	it_test =0
 55 | 	it_train = 0
 56 | 
 57 | 	for row in c.execute(query_string):
 58 | 		'''
 59 | 		row[0] = image path str
 60 | 		row[1] = face id int
 61 | 		row[2] = roll float
 62 | 		row[3] = pitch float
 63 | 		row[4] = yaw float
 64 | 		row[5] = x int
 65 | 		row[6] = y int
 66 | 		row[7] = w int
 67 | 		row[8] = h int
 68 | 		'''
 69 | 		
 70 | 		try:
 71 | 			img_raw = np.asarray(io.imread(img_path+row[0]))
 72 | 			w = img_raw.shape[1]
 73 | 			h = img_raw.shape[0]
 74 | 		
 75 | 			img_raw = img_raw.tostring()
 76 | 			example = tf.train.Example(features=tf.train.Features(feature={
 77 | 				'image_raw':_bytes_feature(img_raw),
 78 | 				'width': _int64_feature(w),
 79 | 				'height': _int64_feature(h),
 80 | 				'face_id': _int64_feature(row[1]),
 81 | 				'roll': _float_feature(row[2]),
 82 | 				'pitch': _float_feature(row[3]),
 83 | 				'yaw': _float_feature(row[4]),
 84 | 				'loc_x': _int64_feature(row[5]),
 85 | 				'loc_y': _int64_feature(row[6]),
 86 | 				'loc_w': _int64_feature(row[7]),
 87 | 				'loc_h': _int64_feature(row[8])
 88 | 				}))
 89 | 			
 90 | 			if row[0] in test_images:
 91 | 				writer_test.write(example.SerializeToString())
 92 | 				it_test += 1
 93 | 			else:
 94 | 				writer_train.write(example.SerializeToString())
 95 | 				it_train += 1
 96 | 
 97 | 		except:
 98 | 			print row[0]
 99 | 		
100 | 		if it_train > 50:
101 | 			break
102 | 	print it_test,it_train	
103 | 	c.close()
104 | 	writer_train.close()
105 | 	writer_test.close()
106 | 
107 | def extract_tfrecord(session):
108 | 	record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
109 | 	save_data = None
110 | 	save_euler = []
111 | 	for string_record in record_iterator:
112 | 		example = tf.train.Example()
113 | 		example.ParseFromString(string_record)
114 | 
115 | 		img_string = example.features.feature['image_raw'].bytes_list.value[0]
116 | 		img_width = int(example.features.feature['width'].int64_list.value[0])
117 | 		img_height = int(example.features.feature['height'].int64_list.value[0])
118 | 		img_1d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3)
119 | 		loc_x = int(example.features.feature['loc_x'].int64_list.value[0])
120 | 		loc_y = int(example.features.feature['loc_y'].int64_list.value[0])
121 | 		loc_w = int(example.features.feature['loc_w'].int64_list.value[0])
122 | 		loc_h = int(example.features.feature['loc_h'].int64_list.value[0])
123 | 		roll = float(example.features.feature['roll'].float_list.value[0])
124 | 		yaw = float(example.features.feature['yaw'].float_list.value[0])
125 | 		pitch = float(example.features.feature['pitch'].float_list.value[0])
126 | 
127 | 		boxes = np.asarray([[loc_y/float(img_height),loc_x/float(img_width),(loc_y+loc_h)/float(img_height),(loc_x+loc_w)/float(img_width)]])
128 | 		resized_and_cropped_image = tf.image.crop_and_resize(img_1d[np.newaxis,:,:,:].astype(np.float32), boxes.astype(np.float32), [0]*1, crop_size=[227,227])
129 | 		if save_data is not None:
130 | 			save_data = np.concatenate([save_data,resized_and_cropped_image.eval(session=session)],axis=0)
131 | 		else:
132 | 			save_data = resized_and_cropped_image.eval(session=session)
133 | 		save_euler.append([roll,yaw,pitch])
134 | 
135 | 	np.save('truth_data.npy',save_data)
136 | 	np.save('annotations.npy',np.asarray(save_euler))
137 | 	
138 | 		# cv2.rectangle(img_1d,(loc_x,loc_y),(loc_x+loc_w,loc_y+loc_h),(0,255,0),3)
139 | 		# cv2.imshow('result',img_1d)
140 | 		# cv2.waitKey(0)
141 | 				
142 | 		
143 | if __name__ == '__main__':
144 | 	#test_images = test_names()
145 | 	#make_tfrecord(test_images)
146 | 	session = tf.Session()
147 | 	extract_tfrecord(session)
148 | 
149 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import os
 3 | from model import *
 4 | 
 5 | 
 6 | 
 7 | if not os.path.exists('../logs'):
 8 | 	os.makedirs('../logs')
 9 | 
10 | if not os.path.exists('../checkpoint'):
11 | 	os.makedirs('../checkpoint')
12 | 
13 | if not os.path.exists('../best_checkpoint'):
14 | 	os.makedirs('../best_checkpoint')
15 | 
16 | map(os.unlink, (os.path.join( '../logs',f) for f in os.listdir('../logs')) )
17 | 
18 | net = HyperFace(True, tf_record_file_path='../../aflw_train_new.tfrecords',model_save_path='../checkpoint/',best_model_save_path='../best_checkpoint/',
19 | restore_model_path='../full_best_checkpoint/')
20 | 
21 | with tf.Session() as sess:
22 | 		print 'Building Graph...'
23 | 		net.build_network(sess)
24 | 		print 'Graph Built!'
25 | 		# net.print_variables()
26 | 		# net.load_weights(weights_path)
27 | 		net.train()
28 | 
29 | 


--------------------------------------------------------------------------------
/main_prediction.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import os
 3 | from model_prediction import *
 4 | 
 5 | 
 6 | 
 7 | if not os.path.exists('../logs'):
 8 | 	os.makedirs('../logs')
 9 | 
10 | if not os.path.exists('../checkpoint'):
11 | 	os.makedirs('../checkpoint')
12 | 
13 | if not os.path.exists('../best_checkpoint'):
14 | 	os.makedirs('../best_checkpoint')
15 | 
16 | map(os.unlink, (os.path.join( '../logs',f) for f in os.listdir('../logs')) )
17 | 
18 | net = HyperFace(True, tf_record_file_path='./aflw_train_small_check.tfrecords',model_save_path='../checkpoint/',best_model_save_path='../best_checkpoint/',
19 | restore_model_path='../full_best_checkpoint/')
20 | 
21 | with tf.Session() as sess:
22 | 		print 'Building Graph...'
23 | 		net.build_network(sess)
24 | 		print 'Graph Built!'
25 | 		# net.print_variables()
26 | 		# net.load_weights('/Users/shashank/TensorFlow/SPN/weights/')
27 | 		net.predict()
28 | 		# net.train()
29 | 
30 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib.slim as slim
  3 | import numpy as np
  4 | from tqdm import tqdm
  5 | from pdb import set_trace as brk
  6 | import sys
  7 | 
  8 | class HyperFace(object):
  9 | 
 10 | 	def __init__(self,load_model,tf_record_file_path=None,model_save_path=None,best_model_save_path=None,restore_model_path=None):
 11 | 
 12 | 		self.batch_size = 32
 13 | 		self.img_height = 227
 14 | 		self.img_width = 227
 15 | 		self.channel = 3
 16 | 
 17 | 		self.num_epochs =10 
 18 | 
 19 | 		# Hyperparameters  1,5,0.5,5,2
 20 | 		self.weight_detect = 1      
 21 | 		self.weight_landmarks = 5
 22 | 		self.weight_visibility = 0.5
 23 | 		self.weight_pose = 5
 24 | 		self.weight_gender = 2
 25 | 
 26 | 		#tf_Record Paramters
 27 | 		self.tf_record_file_path = tf_record_file_path
 28 | 		self.filename_queue = tf.train.string_input_producer([self.tf_record_file_path], num_epochs=self.num_epochs)
 29 | 		self.images, self.labels, self.land, self.vis, self.po, self.gen= self.load_from_tfRecord(self.filename_queue)
 30 | 
 31 | 		self.model_save_path = model_save_path
 32 | 		self.best_model_save_path = best_model_save_path
 33 | 		self.restore_model_path = restore_model_path
 34 | 
 35 | 		self.save_after_steps = 200
 36 | 		self.print_after_steps = 50
 37 | 		self.load_model =  load_model
 38 | 	
 39 | 
 40 | 	def build_network(self, sess):
 41 | 
 42 | 		self.sess = sess
 43 | 
 44 | 		self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images')
 45 | 		self.detection = tf.placeholder(tf.int32, [self.batch_size], name='detection')
 46 | 		self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks')
 47 | 		self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility')
 48 | 		self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose')
 49 | 		self.gender = tf.placeholder(tf.int32, [self.batch_size], name='gender')
 50 | 		
 51 | 		net_output = self.network(self.X) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender)
 52 | 		self.test_model = net_output
 53 | 		self.loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=net_output[0], labels=tf.one_hot(self.detection, 2)))
 54 | 
 55 | 		detection_mask = tf.cast(tf.expand_dims(self.detection, axis=1),tf.float32)
 56 | 		
 57 | 		visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1])
 58 | 		self.loss_landmarks = tf.reduce_mean(tf.square(detection_mask*visibility_mask*(net_output[1] - self.landmarks)))
 59 | 		
 60 | 		self.loss_visibility = tf.reduce_mean(tf.square(detection_mask*(net_output[2] - self.visibility)))
 61 | 		self.loss_pose = tf.reduce_mean(tf.square(detection_mask*(net_output[3] - self.pose)))
 62 | 		self.loss_gender = tf.reduce_mean(detection_mask*tf.nn.sigmoid_cross_entropy_with_logits(logits=net_output[4], labels=tf.one_hot(self.gender,2)))
 63 | 
 64 | 		
 65 | 		self.loss = self.weight_detect*self.loss_detection + self.weight_landmarks*self.loss_landmarks  \
 66 | 		 			+ self.weight_visibility*self.loss_visibility + self.weight_pose*self.loss_pose  \
 67 | 		 			+ self.weight_gender*self.loss_gender
 68 | 
 69 | 		self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(tf.argmax(net_output[0],1),tf.int32),self.detection),tf.float32))
 70 | 
 71 | 		#self.loss = self.loss_detection
 72 | 		#self.optimizer = tf.train.AdamOptimizer(1e-7).minimize(self.loss)
 73 | 		self.optimizer = tf.train.MomentumOptimizer(1e-3,0.9,use_nesterov=True).minimize(self.loss)	
 74 | 		self.saver = tf.train.Saver(max_to_keep=4, keep_checkpoint_every_n_hours=4)
 75 | 		self.best_saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=4)
 76 | 
 77 | 
 78 | 	def train(self):
 79 | 		
 80 | 		
 81 | 		if self.load_model:
 82 | 			print "Restoring Model"
 83 | 			ckpt = tf.train.get_checkpoint_state(self.restore_model_path)
 84 | 			if ckpt and ckpt.model_checkpoint_path:
 85 | 				self.saver.restore(self.sess,ckpt.model_checkpoint_path)
 86 | 				self.sess.run(tf.local_variables_initializer())
 87 | 		else:
 88 | 			print "Initializing Model"
 89 | 			self.sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()))
 90 | 			
 91 | 		#self.load_det_weights(self.restore_model_path+'weights.npy')
 92 | 			
 93 | 		
 94 | 		coord = tf.train.Coordinator()
 95 | 		threads = tf.train.start_queue_runners(sess=self.sess,coord=coord)
 96 | 
 97 | 		writer = tf.summary.FileWriter('../logs', self.sess.graph)
 98 | 		loss_summ = tf.summary.scalar('loss', self.loss)
 99 | 		img_summ = tf.summary.image('images', self.images, max_outputs=5)
100 | 		label_summ = tf.summary.histogram('labels', self.detection)
101 | 		detect_summ = tf.summary.scalar('det_loss', self.loss_detection)
102 | 		landmarks_summ =  tf.summary.scalar('landmarks_loss', self.loss_landmarks)
103 | 		vis_summ = tf.summary.scalar('visibility_loss', self.loss_visibility)
104 | 		pose_summ = tf.summary.scalar('pose_loss', self.loss_pose)
105 | 		gender_summ = tf.summary.scalar('gender_loss', self.loss_gender)
106 | 
107 | 		summ_op = tf.summary.merge_all()
108 | 
109 | 		counter = 0
110 | 		best_loss = sys.maxint
111 | 		try:
112 | 			while not coord.should_stop():
113 | 				batch_imgs, batch_labels, batch_landmarks, batch_visibility, batch_pose, batch_gender = self.sess.run([self.images,self.labels,self.land, self.vis, self.po, self.gen])
114 | 				batch_imgs = (batch_imgs - 127.5) / 128.0
115 | 				input_feed={self.X: batch_imgs, self.detection: batch_labels, self.landmarks: batch_landmarks, self.visibility: batch_visibility, self.pose: batch_pose, self.gender: np.squeeze(batch_gender)}
116 | 				#input_feed={self.X: batch_imgs, self.detection: batch_labels}
117 | 
118 | 				_,model_op,loss,l_d,l_l,l_v,l_p,l_g, summ, accuracy = self.sess.run([self.optimizer,self.test_model,self.loss,self.loss_detection,
119 | self.loss_landmarks,self.loss_visibility,self.loss_pose,self.loss_gender, summ_op, self.accuracy], input_feed)
120 | 				
121 | 				writer.add_summary(summ, counter)
122 | 
123 | 				if counter % self.save_after_steps == 0:
124 | 					self.saver.save(self.sess,self.model_save_path+'hyperface_model',global_step=int(counter),write_meta_graph=False)
125 | 
126 | 				
127 | 				if loss <= best_loss:
128 | 					best_loss = loss
129 | 					self.best_saver.save(self.sess,self.best_model_save_path+'hyperface_best_model',global_step=int(counter),write_meta_graph=False)
130 | 					#self.save_weights(self.best_model_save_path)
131 | 					
132 | 				if counter % self.print_after_steps == 0:
133 | 					print "Iteration:{},Total Loss:{},Detection loss:{},Landmark loss:{},Visbility Loss :{},Pose Loss:{},Gender Loss:{},Accuracy:{}".format(counter,loss,l_d,l_l,l_v,l_p,l_g,accuracy)
134 | 					
135 | 				counter += 1
136 | 
137 | 		except tf.errors.OutOfRangeError:
138 | 			print('Done training -- epoch limit reached')
139 | 		finally:
140 | 			coord.request_stop()
141 | 
142 | 		coord.join(threads)	
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 	def network_det(self,inputs,reuse=False):
149 | 
150 | 		if reuse:
151 | 			tf.get_variable_scope().reuse_variables()
152 | 
153 | 		with slim.arg_scope([slim.conv2d, slim.fully_connected],
154 | 							 activation_fn = tf.nn.relu,
155 | 							 weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)):
156 | 			
157 | 			conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1')
158 | 			max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1')
159 | 
160 | 			conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2')
161 | 			max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2')
162 | 			conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3')
163 | 
164 | 			conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4')
165 | 			conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5')
166 | 			pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5')
167 | 			
168 | 			shape = int(np.prod(pool5.get_shape()[1:]))
169 | 			fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6')
170 | 			
171 | 			fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1')
172 | 			out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None)
173 | 			
174 | 		return out_detection
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 	def network(self,inputs,reuse=False):
183 | 
184 | 		if reuse:
185 | 			tf.get_variable_scope().reuse_variables()
186 | 
187 | 		with slim.arg_scope([slim.conv2d, slim.fully_connected],
188 | 							 activation_fn = tf.nn.relu,
189 | 							 weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ):
190 | 			
191 | 			conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1')
192 | 			max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1')
193 | 
194 | 			conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a')
195 | 
196 | 			conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2')
197 | 			max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2')
198 | 			conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3')
199 | 
200 | 			conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a')
201 | 
202 | 			conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4')
203 | 			conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5')
204 | 			pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5')
205 | 
206 | 			concat_feat = tf.concat([conv1a, conv3a, pool5],3)
207 | 			conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all')
208 | 			
209 | 			shape = int(np.prod(conv_all.get_shape()[1:]))
210 | 			fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full')
211 | 
212 | 			fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1')
213 | 			fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1')
214 | 			fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1')
215 | 			fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1')
216 | 			fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1')
217 | 
218 | 			out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn = None)
219 | 			out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn = None )
220 | 			out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn = None)
221 | 			out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn = None)
222 | 			out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn = None)
223 | 
224 | 		return [out_detection, out_landmarks, out_visibility, out_pose, out_gender]
225 | 
226 | 
227 | 
228 | 	def predict(self, imgs_path):
229 | 		print 'Running inference...'
230 | 		np.set_printoptions(suppress=True)
231 | 		imgs = (np.load(imgs_path) - 127.5)/128.0
232 | 		shape = imgs.shape
233 | 		self.X = tf.placeholder(tf.float32, [shape[0], self.img_height, self.img_width, self.channel], name='images')
234 | 		pred = self.network(self.X, reuse = True)
235 | 
236 | 		net_preds = self.sess.run(pred, feed_dict={self.X: imgs})
237 | 
238 | 		print 'gender: \n', net_preds[-1]
239 | 		import matplotlib.pyplot as plt
240 | 		plt.imshow(imgs[-1]);plt.show()
241 | 
242 | 		
243 | 
244 | 	def load_from_tfRecord(self,filename_queue):
245 | 		
246 | 		reader = tf.TFRecordReader()
247 | 		_, serialized_example = reader.read(filename_queue)
248 | 		
249 | 		features = tf.parse_single_example(
250 | 			serialized_example,
251 | 			features={
252 | 				'image_raw':tf.FixedLenFeature([], tf.string),
253 | 				'width': tf.FixedLenFeature([], tf.int64),
254 | 				'height': tf.FixedLenFeature([], tf.int64),
255 | 				'pos_locs':tf.FixedLenFeature([], tf.string),
256 | 				'neg_locs':tf.FixedLenFeature([], tf.string),
257 | 				'n_pos_locs':tf.FixedLenFeature([], tf.int64),
258 | 				'n_neg_locs':tf.FixedLenFeature([], tf.int64),
259 | 				'gender':tf.FixedLenFeature([], tf.int64),
260 | 				'pose': tf.FixedLenFeature([], tf.string),
261 | 				'landmarks':tf.FixedLenFeature([], tf.string),
262 | 				'visibility':tf.FixedLenFeature([], tf.string),
263 | 
264 | 			})
265 | 		
266 | 		landmarks = tf.decode_raw(features['landmarks'], tf.float32)
267 | 		pose = tf.decode_raw(features['pose'], tf.float32)
268 | 		visibility = tf.decode_raw(features['visibility'], tf.int32)
269 | 		gender = tf.cast(features['gender'], tf.int32)
270 | 
271 | 		landmarks_shape = tf.stack([1,21*2])
272 | 		pose_shape = tf.stack([1,3])
273 | 		visibility_shape = tf.stack([1,21])
274 | 		gender_shape = tf.stack([1,1])
275 | 
276 | 		landmarks = tf.reshape(landmarks,landmarks_shape)
277 | 		visibility = tf.reshape(visibility,visibility_shape)
278 | 		pose = tf.reshape(pose,pose_shape)
279 | 		gender = tf.reshape(gender,gender_shape)
280 | 
281 | 		image = tf.decode_raw(features['image_raw'], tf.uint8)
282 | 		pos_locs = tf.decode_raw(features['pos_locs'], tf.float32)
283 | 		neg_locs = tf.decode_raw(features['neg_locs'], tf.float32)
284 | 
285 | 		orig_height = tf.cast(features['height'], tf.int32)
286 | 		orig_width = tf.cast(features['width'], tf.int32)
287 | 		n_pos_locs = tf.cast(features['n_pos_locs'], tf.int32)
288 | 		n_neg_locs = tf.cast(features['n_neg_locs'], tf.int32)
289 | 
290 | 		image_shape = tf.stack([1,orig_height,orig_width,3])
291 | 		image = tf.cast(tf.reshape(image,image_shape),tf.float32)
292 | 
293 | 		pos_locs_shape = tf.stack([n_pos_locs,4])
294 | 		pos_locs = tf.reshape(pos_locs,pos_locs_shape)
295 | 
296 | 		neg_locs_shape = tf.stack([n_neg_locs,4])
297 | 		neg_locs = tf.reshape(neg_locs,neg_locs_shape)
298 | 
299 | 		positive_cropped = tf.image.crop_and_resize(image,pos_locs,tf.zeros([n_pos_locs],dtype=tf.int32),[227,227])
300 | 		negative_cropped = tf.image.crop_and_resize(image,neg_locs,tf.zeros([n_neg_locs],dtype=tf.int32),[227,227])
301 | 
302 | 		all_images = tf.concat([positive_cropped,negative_cropped],axis=0)
303 | 
304 | 		positive_labels = tf.ones([n_pos_locs])
305 | 		negative_labels = tf.zeros([n_neg_locs])
306 | 
307 | 
308 | 		positive_landmarks = tf.tile(landmarks,[n_pos_locs,1])
309 | 		negative_landmarks = tf.tile(landmarks,[n_neg_locs,1])
310 | 
311 | 		positive_visibility = tf.tile(visibility,[n_pos_locs,1])
312 | 		negative_visibility = tf.tile(visibility,[n_neg_locs,1])
313 | 
314 | 		positive_pose = tf.tile(pose,[n_pos_locs,1])
315 | 		negative_pose = tf.tile(pose,[n_neg_locs,1])
316 | 
317 | 		positive_gender = tf.tile(gender,[n_pos_locs,1])
318 | 		negative_gender = tf.tile(gender,[n_neg_locs,1])
319 | 		
320 | 		all_landmarks = tf.concat([positive_landmarks,negative_landmarks],axis=0)
321 | 		all_visibility = tf.concat([positive_visibility,negative_visibility],axis=0)
322 | 		all_pose = tf.concat([positive_pose,negative_pose],axis=0)
323 | 
324 | 		all_labels = tf.concat([positive_labels,negative_labels],axis=0)
325 | 		all_gender = tf.concat([positive_gender,negative_gender],axis=0)
326 | 
327 | 		tf.random_shuffle(all_images,seed=7)
328 | 		tf.random_shuffle(all_labels,seed=7)
329 | 		tf.random_shuffle(all_landmarks,seed=7)
330 | 		tf.random_shuffle(all_visibility,seed=7)
331 | 		tf.random_shuffle(all_pose,seed=7)
332 | 		tf.random_shuffle(all_gender,seed=7)
333 | 
334 | 		images,labels,landmarks_,visibility_,pose_,gender_ = tf.train.shuffle_batch([all_images,all_labels,all_landmarks,all_visibility,all_pose,all_gender]
335 | 			,enqueue_many=True,batch_size=self.batch_size,num_threads=1,capacity=1000,min_after_dequeue=500)
336 | 		
337 | 		return images,labels,landmarks_,visibility_,pose_,gender_
338 | 
339 | 	
340 | 	def load_weights(self, path):
341 | 		variables = slim.get_model_variables()
342 | 		print 'Loading weights...'
343 | 		for var in tqdm(variables):
344 | 			if ('conv' in var.name) and ('weights' in var.name):
345 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').transpose((2,3,1,0))))
346 | 			elif ('fc' in var.name) and ('weights' in var.name):
347 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').T))
348 | 			elif 'biases' in var.name:
349 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/b.npy')))
350 | 		print 'Weights loaded!!'
351 | 
352 | 	def print_variables(self):
353 | 		variables = slim.get_model_variables()
354 | 		print 'Model Variables:'
355 | 		for var in variables:
356 | 			print var.name, ' ', var.get_shape()
357 | 
358 | 
359 | 	def save_weights(self, path):
360 | 		variables = slim.get_model_variables()
361 | 		weights = {}
362 | 		for var in variables:
363 | 			weights[var.name] = self.sess.run(var)
364 | 
365 | 		np.save(path+ '/weights', weights)
366 | 
367 | 	def load_det_weights(self, path):
368 | 		variables = slim.get_model_variables()
369 | 		weights = np.load(path)
370 | 		for var in variables:
371 | 			if var.name in weights.item():
372 | 				print var.name
373 | 				self.sess.run(var.assign(weights.item()[var.name]))
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 			
382 | 
383 | 


--------------------------------------------------------------------------------
/model_prediction.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib.slim as slim
  3 | import numpy as np
  4 | from tqdm import tqdm
  5 | from pdb import set_trace as brk
  6 | import sys
  7 | 
  8 | class HyperFace(object):
  9 | 
 10 | 	def __init__(self,load_model,tf_record_file_path=None,model_save_path=None,best_model_save_path=None,restore_model_path=None):
 11 | 
 12 | 		self.batch_size = 32
 13 | 		self.img_height = 227
 14 | 		self.img_width = 227
 15 | 		self.channel = 3
 16 | 
 17 | 		self.num_epochs =1
 18 | 
 19 | 		# Hyperparameters  1,5,0.5,5,2
 20 | 		self.weight_detect = 1      
 21 | 		self.weight_landmarks = 5
 22 | 		self.weight_visibility = 0.5
 23 | 		self.weight_pose = 5
 24 | 		self.weight_gender = 2
 25 | 
 26 | 		#tf_Record Paramters
 27 | 		self.tf_record_file_path = tf_record_file_path
 28 | 		self.filename_queue = tf.train.string_input_producer([self.tf_record_file_path], num_epochs=self.num_epochs)
 29 | 		self.images, self.labels, self.land, self.vis, self.po, self.gen= self.load_from_tfRecord(self.filename_queue)
 30 | 
 31 | 		self.model_save_path = model_save_path
 32 | 		self.best_model_save_path = best_model_save_path
 33 | 		self.restore_model_path = restore_model_path
 34 | 
 35 | 		self.save_after_steps = 200
 36 | 		self.print_after_steps = 50
 37 | 		self.load_model =  load_model
 38 | 	
 39 | 
 40 | 	def build_network(self, sess):
 41 | 
 42 | 		self.sess = sess
 43 | 
 44 | 		self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images')
 45 | 		self.detection = tf.placeholder(tf.int32, [self.batch_size], name='detection')
 46 | 		self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks')
 47 | 		self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility')
 48 | 		self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose')
 49 | 		self.gender = tf.placeholder(tf.int32, [self.batch_size], name='gender')
 50 | 		
 51 | 		self.net_output = self.network(self.X) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender)
 52 | 		self.loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.net_output[0], labels=tf.one_hot(self.detection, 2)))
 53 | 
 54 | 		detection_mask = tf.cast(tf.expand_dims(self.detection, axis=1),tf.float32)
 55 | 		
 56 | 		visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1])
 57 | 		self.loss_landmarks = tf.reduce_mean(tf.square(detection_mask*visibility_mask*(self.net_output[1] - self.landmarks)))
 58 | 		
 59 | 		self.loss_visibility = tf.reduce_mean(tf.square(detection_mask*(self.net_output[2] - self.visibility)))
 60 | 		self.loss_pose = tf.reduce_mean(tf.square(detection_mask*(self.net_output[3] - self.pose)))
 61 | 		self.loss_gender = tf.reduce_mean(detection_mask*tf.nn.sigmoid_cross_entropy_with_logits(logits=self.net_output[4], labels=tf.one_hot(self.gender,2)))
 62 | 
 63 | 		
 64 | 		self.loss = self.weight_detect*self.loss_detection + self.weight_landmarks*self.loss_landmarks  \
 65 | 		 			+ self.weight_visibility*self.loss_visibility + self.weight_pose*self.loss_pose  \
 66 | 		 			+ self.weight_gender*self.loss_gender
 67 | 
 68 | 		self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(tf.argmax(self.net_output[0],1),tf.int32),self.detection),tf.float32))
 69 | 
 70 | 		#self.loss = self.loss_detection
 71 | 		#self.optimizer = tf.train.AdamOptimizer(1e-7).minimize(self.loss)
 72 | 		self.optimizer = tf.train.MomentumOptimizer(1e-3,0.9,use_nesterov=True).minimize(self.loss)	
 73 | 		self.saver = tf.train.Saver(max_to_keep=4, keep_checkpoint_every_n_hours=4)
 74 | 		self.best_saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=4)
 75 | 
 76 | 
 77 | 	def train(self):
 78 | 		
 79 | 		
 80 | 		if self.load_model:
 81 | 			print "Restoring Model"
 82 | 			ckpt = tf.train.get_checkpoint_state(self.restore_model_path)
 83 | 			if ckpt and ckpt.model_checkpoint_path:
 84 | 				self.saver.restore(self.sess,ckpt.model_checkpoint_path)
 85 | 				self.sess.run(tf.local_variables_initializer())
 86 | 		else:
 87 | 			print "Initializing Model"
 88 | 			self.sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()))
 89 | 			
 90 | 		#self.load_det_weights(self.restore_model_path+'weights.npy')
 91 | 			
 92 | 		
 93 | 		coord = tf.train.Coordinator()
 94 | 		threads = tf.train.start_queue_runners(sess=self.sess,coord=coord)
 95 | 
 96 | 		writer = tf.summary.FileWriter('../logs', self.sess.graph)
 97 | 		loss_summ = tf.summary.scalar('loss', self.loss)
 98 | 		img_summ = tf.summary.image('images', self.images, max_outputs=5)
 99 | 		label_summ = tf.summary.histogram('labels', self.detection)
100 | 		detect_summ = tf.summary.scalar('det_loss', self.loss_detection)
101 | 		landmarks_summ =  tf.summary.scalar('landmarks_loss', self.loss_landmarks)
102 | 		vis_summ = tf.summary.scalar('visibility_loss', self.loss_visibility)
103 | 		pose_summ = tf.summary.scalar('pose_loss', self.loss_pose)
104 | 		gender_summ = tf.summary.scalar('gender_loss', self.loss_gender)
105 | 
106 | 		summ_op = tf.summary.merge_all()
107 | 
108 | 		counter = 0
109 | 		best_loss = sys.maxint
110 | 		try:
111 | 			while not coord.should_stop():
112 | 				batch_imgs, batch_labels, batch_landmarks, batch_visibility, batch_pose, batch_gender = self.sess.run([self.images,self.labels,self.land, self.vis, self.po, self.gen])
113 | 				batch_imgs = (batch_imgs - 127.5) / 128.0
114 | 				input_feed={self.X: batch_imgs, self.detection: batch_labels, self.landmarks: batch_landmarks, self.visibility: batch_visibility, self.pose: batch_pose, self.gender: np.squeeze(batch_gender)}
115 | 				#input_feed={self.X: batch_imgs, self.detection: batch_labels}
116 | 
117 | 				_,model_op,loss,l_d,l_l,l_v,l_p,l_g, summ, accuracy = self.sess.run([self.optimizer,self.test_model,self.loss,self.loss_detection,
118 | self.loss_landmarks,self.loss_visibility,self.loss_pose,self.loss_gender, summ_op, self.accuracy], input_feed)
119 | 				
120 | 				writer.add_summary(summ, counter)
121 | 
122 | 				if counter % self.save_after_steps == 0:
123 | 					self.saver.save(self.sess,self.model_save_path+'hyperface_model',global_step=int(counter),write_meta_graph=False)
124 | 
125 | 				
126 | 				if loss <= best_loss:
127 | 					best_loss = loss
128 | 					self.best_saver.save(self.sess,self.best_model_save_path+'hyperface_best_model',global_step=int(counter),write_meta_graph=False)
129 | 					#self.save_weights(self.best_model_save_path)
130 | 					
131 | 				if counter % self.print_after_steps == 0:
132 | 					print "Iteration:{},Total Loss:{},Detection loss:{},Landmark loss:{},Visbility Loss :{},Pose Loss:{},Gender Loss:{},Accuracy:{}".format(counter,loss,l_d,l_l,l_v,l_p,l_g,accuracy)
133 | 					
134 | 				counter += 1
135 | 
136 | 		except tf.errors.OutOfRangeError:
137 | 			print('Done training -- epoch limit reached')
138 | 		finally:
139 | 			coord.request_stop()
140 | 
141 | 		coord.join(threads)	
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 	def network_det(self,inputs,reuse=False):
148 | 
149 | 		if reuse:
150 | 			tf.get_variable_scope().reuse_variables()
151 | 
152 | 		with slim.arg_scope([slim.conv2d, slim.fully_connected],
153 | 							 activation_fn = tf.nn.relu,
154 | 							 weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)):
155 | 			
156 | 			conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1')
157 | 			max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1')
158 | 
159 | 			conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2')
160 | 			max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2')
161 | 			conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3')
162 | 
163 | 			conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4')
164 | 			conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5')
165 | 			pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5')
166 | 			
167 | 			shape = int(np.prod(pool5.get_shape()[1:]))
168 | 			fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6')
169 | 			
170 | 			fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1')
171 | 			out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None)
172 | 			
173 | 		return out_detection
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 	def network(self,inputs,reuse=False):
182 | 
183 | 		if reuse:
184 | 			tf.get_variable_scope().reuse_variables()
185 | 
186 | 		with slim.arg_scope([slim.conv2d, slim.fully_connected],
187 | 							 activation_fn = tf.nn.relu,
188 | 							 weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ):
189 | 			
190 | 			conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1')
191 | 			max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1')
192 | 
193 | 			conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a')
194 | 
195 | 			conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2')
196 | 			max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2')
197 | 			conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3')
198 | 
199 | 			conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a')
200 | 
201 | 			conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4')
202 | 			conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5')
203 | 			pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5')
204 | 
205 | 			concat_feat = tf.concat([conv1a, conv3a, pool5],3)
206 | 			conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all')
207 | 			
208 | 			shape = int(np.prod(conv_all.get_shape()[1:]))
209 | 			fc_full = slim.fully_connected(tf.reshape(tf.transpose(conv_all, [0,3,1,2]), [-1, shape]), 3072, scope='fc_full')
210 | 
211 | 			fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1')
212 | 			fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1')
213 | 			fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1')
214 | 			fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1')
215 | 			fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1')
216 | 
217 | 			out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn = None)
218 | 			out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn = None )
219 | 			out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn = None)
220 | 			out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn = None)
221 | 			out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn = None)
222 | 
223 | 		return [out_detection, out_landmarks, out_visibility, out_pose, out_gender]
224 | 
225 | 
226 | 
227 | 	def predict(self):
228 | 		print 'Running inference...'
229 | 		self.sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()))
230 | 		self.load_weights('/Users/shashank/TensorFlow/SPN/weights/')
231 | 		coord = tf.train.Coordinator()
232 | 		threads = tf.train.start_queue_runners(sess=self.sess,coord=coord)
233 | 
234 | 		result = []
235 | 		truth = []
236 | 		count =0
237 | 		try:
238 | 			while not coord.should_stop():
239 | 				print count
240 | 				batch_imgs, batch_labels, batch_landmarks, batch_visibility, batch_pose, batch_gender = self.sess.run([self.images,self.labels,self.land, self.vis, self.po, self.gen])
241 | 				batch_imgs = (batch_imgs - 127.5) / 128.0
242 | 				
243 | 				net_preds = self.sess.run(self.net_output, feed_dict={self.X: batch_imgs})
244 | 				result.append(np.concatenate(net_preds, axis=1))
245 | 				truth.append(np.concatenate([batch_labels[:, np.newaxis], batch_landmarks, batch_visibility, batch_pose, batch_gender], axis=1))
246 | 				count += 1
247 | 
248 | 		except tf.errors.OutOfRangeError:
249 | 			print('Done training -- epoch limit reached')
250 | 		finally:
251 | 			coord.request_stop()
252 | 
253 | 		coord.join(threads)	
254 | 		np.save('test_results', np.concatenate(result, axis = 0))
255 | 		np.save('truth', np.concatenate(truth, axis = 0))
256 | 
257 | 	def load_from_tfRecord(self,filename_queue):
258 | 		
259 | 		reader = tf.TFRecordReader()
260 | 		_, serialized_example = reader.read(filename_queue)
261 | 		
262 | 		features = tf.parse_single_example(
263 | 			serialized_example,
264 | 			features={
265 | 				'image_raw':tf.FixedLenFeature([], tf.string),
266 | 				'width': tf.FixedLenFeature([], tf.int64),
267 | 				'height': tf.FixedLenFeature([], tf.int64),
268 | 				'pos_locs':tf.FixedLenFeature([], tf.string),
269 | 				'neg_locs':tf.FixedLenFeature([], tf.string),
270 | 				'n_pos_locs':tf.FixedLenFeature([], tf.int64),
271 | 				'n_neg_locs':tf.FixedLenFeature([], tf.int64),
272 | 				'gender':tf.FixedLenFeature([], tf.int64),
273 | 				'pose': tf.FixedLenFeature([], tf.string),
274 | 				'landmarks':tf.FixedLenFeature([], tf.string),
275 | 				'visibility':tf.FixedLenFeature([], tf.string),
276 | 
277 | 			})
278 | 		
279 | 		landmarks = tf.decode_raw(features['landmarks'], tf.float32)
280 | 		pose = tf.decode_raw(features['pose'], tf.float32)
281 | 		visibility = tf.decode_raw(features['visibility'], tf.int32)
282 | 		gender = tf.cast(features['gender'], tf.int32)
283 | 
284 | 		landmarks_shape = tf.stack([1,21*2])
285 | 		pose_shape = tf.stack([1,3])
286 | 		visibility_shape = tf.stack([1,21])
287 | 		gender_shape = tf.stack([1,1])
288 | 
289 | 		landmarks = tf.reshape(landmarks,landmarks_shape)
290 | 		visibility = tf.reshape(visibility,visibility_shape)
291 | 		pose = tf.reshape(pose,pose_shape)
292 | 		gender = tf.reshape(gender,gender_shape)
293 | 
294 | 		image = tf.decode_raw(features['image_raw'], tf.uint8)
295 | 		pos_locs = tf.decode_raw(features['pos_locs'], tf.float32)
296 | 		neg_locs = tf.decode_raw(features['neg_locs'], tf.float32)
297 | 
298 | 		orig_height = tf.cast(features['height'], tf.int32)
299 | 		orig_width = tf.cast(features['width'], tf.int32)
300 | 		n_pos_locs = tf.cast(features['n_pos_locs'], tf.int32)
301 | 		n_neg_locs = tf.cast(features['n_neg_locs'], tf.int32)
302 | 
303 | 		image_shape = tf.stack([1,orig_height,orig_width,3])
304 | 		image = tf.cast(tf.reshape(image,image_shape),tf.float32)
305 | 
306 | 		pos_locs_shape = tf.stack([n_pos_locs,4])
307 | 		pos_locs = tf.reshape(pos_locs,pos_locs_shape)
308 | 
309 | 		neg_locs_shape = tf.stack([n_neg_locs,4])
310 | 		neg_locs = tf.reshape(neg_locs,neg_locs_shape)
311 | 
312 | 		positive_cropped = tf.image.crop_and_resize(image,pos_locs,tf.zeros([n_pos_locs],dtype=tf.int32),[227,227])
313 | 		negative_cropped = tf.image.crop_and_resize(image,neg_locs,tf.zeros([n_neg_locs],dtype=tf.int32),[227,227])
314 | 
315 | 		all_images = tf.concat([positive_cropped,negative_cropped],axis=0)
316 | 
317 | 		positive_labels = tf.ones([n_pos_locs])
318 | 		negative_labels = tf.zeros([n_neg_locs])
319 | 
320 | 
321 | 		positive_landmarks = tf.tile(landmarks,[n_pos_locs,1])
322 | 		negative_landmarks = tf.tile(landmarks,[n_neg_locs,1])
323 | 
324 | 		positive_visibility = tf.tile(visibility,[n_pos_locs,1])
325 | 		negative_visibility = tf.tile(visibility,[n_neg_locs,1])
326 | 
327 | 		positive_pose = tf.tile(pose,[n_pos_locs,1])
328 | 		negative_pose = tf.tile(pose,[n_neg_locs,1])
329 | 
330 | 		positive_gender = tf.tile(gender,[n_pos_locs,1])
331 | 		negative_gender = tf.tile(gender,[n_neg_locs,1])
332 | 		
333 | 		all_landmarks = tf.concat([positive_landmarks,negative_landmarks],axis=0)
334 | 		all_visibility = tf.concat([positive_visibility,negative_visibility],axis=0)
335 | 		all_pose = tf.concat([positive_pose,negative_pose],axis=0)
336 | 
337 | 		all_labels = tf.concat([positive_labels,negative_labels],axis=0)
338 | 		all_gender = tf.concat([positive_gender,negative_gender],axis=0)
339 | 
340 | 		tf.random_shuffle(all_images,seed=7)
341 | 		tf.random_shuffle(all_labels,seed=7)
342 | 		tf.random_shuffle(all_landmarks,seed=7)
343 | 		tf.random_shuffle(all_visibility,seed=7)
344 | 		tf.random_shuffle(all_pose,seed=7)
345 | 		tf.random_shuffle(all_gender,seed=7)
346 | 
347 | 		images,labels,landmarks_,visibility_,pose_,gender_ = tf.train.shuffle_batch([all_images,all_labels,all_landmarks,all_visibility,all_pose,all_gender]
348 | 			,enqueue_many=True,batch_size=self.batch_size,num_threads=1,capacity=1000,min_after_dequeue=500)
349 | 		
350 | 		return images,labels,landmarks_,visibility_,pose_,gender_
351 | 
352 | 	
353 | 	def load_weights(self, path):
354 | 		variables = slim.get_model_variables()
355 | 		print 'Loading weights...'
356 | 		for var in tqdm(variables):
357 | 			if ('conv' in var.name) and ('weights' in var.name):
358 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').transpose((2,3,1,0))))
359 | 			elif ('fc' in var.name) and ('weights' in var.name):
360 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').T))
361 | 			elif 'biases' in var.name:
362 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/b.npy')))
363 | 		print 'Weights loaded!!'
364 | 
365 | 	def print_variables(self):
366 | 		variables = slim.get_model_variables()
367 | 		print 'Model Variables:'
368 | 		for var in variables:
369 | 			print var.name, ' ', var.get_shape()
370 | 
371 | 
372 | 	def save_weights(self, path):
373 | 		variables = slim.get_model_variables()
374 | 		weights = {}
375 | 		for var in variables:
376 | 			weights[var.name] = self.sess.run(var)
377 | 
378 | 		np.save(path+ '/weights', weights)
379 | 
380 | 	def load_det_weights(self, path):
381 | 		variables = slim.get_model_variables()
382 | 		weights = np.load(path)
383 | 		for var in variables:
384 | 			if var.name in weights.item():
385 | 				print var.name
386 | 				self.sess.run(var.assign(weights.item()[var.name]))
387 | 
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | 			
395 | 
396 | 


--------------------------------------------------------------------------------
/new_files/data_prep.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import tensorflow as tf
  3 | from skimage import io
  4 | from skimage import color
  5 | import sqlite3
  6 | import cv2
  7 | import matplotlib.pyplot as plt
  8 | import os
  9 | import random
 10 | from tqdm import tqdm
 11 | from pdb import set_trace as brk
 12 | import sys
 13 | # The following are the database properties available (last updated version 2012-11-28):
 14 | #
 15 | # databases: db_id, path, description
 16 | # faceellipse: face_id, x, y, ra, rb, theta, annot_type_id, upsidedown
 17 | # faceimages: image_id, db_id, file_id, filepath, bw, widht, height
 18 | # facemetadata: face_id, sex, occluded, glasses, bw, annot_type_id
 19 | # facepose: face_id, roll, pitch, yaw, annot_type_id
 20 | # facerect: face_id, x, y, w, h, annot_type_id
 21 | # faces: face_id, file_id, db_id
 22 | # featurecoords: face_id, feature_id, x, y
 23 | # featurecoordtype: feature_id, descr, code, x, y, z
 24 | # AFLW 21 points landmark
 25 | #  0|LeftBrowLeftCorner
 26 | #  1|LeftBrowCenter
 27 | #  2|LeftBrowRightCorner
 28 | #  3|RightBrowLeftCorner
 29 | #  4|RightBrowCenter
 30 | #  5|RightBrowRightCorner
 31 | #  6|LeftEyeLeftCorner
 32 | #  7|LeftEyeCenter
 33 | #  8|LeftEyeRightCorner
 34 | #  9|RightEyeLeftCorner
 35 | #  10|RightEyeCenter
 36 | #  11|RightEyeRightCorner
 37 | #  12|LeftEar
 38 | #  13|NoseLeft
 39 | #  14|NoseCenter
 40 | #  15|NoseRight
 41 | #  16|RightEar
 42 | #  17|MouthLeftCorner
 43 | #  18|MouthCenter
 44 | #  19|MouthRightCorner
 45 | #  20|ChinCenter
 46 | 
 47 | select_string = "faceimages.filepath, faces.face_id, facepose.roll, facepose.pitch, facepose.yaw, facerect.x, facerect.y, facerect.w, facerect.h,faceimages.image_id,facemetadata.sex"
 48 | from_string = "faceimages, faces, facepose, facerect,facemetadata"
 49 | where_string = "faces.face_id = facepose.face_id and faces.file_id = faceimages.file_id and faces.face_id = facerect.face_id and faces.face_id = facemetadata.face_id"
 50 | query_string = "SELECT " + select_string + " FROM " + from_string + " WHERE " + where_string
 51 | 
 52 | 
 53 | 
 54 | 
 55 | conn = sqlite3.connect('/home/shashank/Documents/CSE-252C/AFLW/aflw/data/aflw.sqlite')
 56 | c = conn.cursor()
 57 | 
 58 | img_path = '/home/shashank/Documents/CSE-252C/AFLW/'
 59 | loc_file_path = '/home/shashank/Documents/CSE-252C/hyperface/code/locations_test/'
 60 | tfrecords_train_filename = 'test_check.tfrecords'
 61 | tfrecords_test_filename = 'aflw_test_new.tfrecords'
 62 | 
 63 | writer_train = tf.python_io.TFRecordWriter(tfrecords_train_filename)
 64 | writer_test = tf.python_io.TFRecordWriter(tfrecords_test_filename)
 65 | 
 66 | def _bytes_feature(value):
 67 | 	return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 68 | 
 69 | def _float_feature(value):
 70 | 	return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
 71 | 
 72 | def _int64_feature(value):
 73 | 	return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 74 | 
 75 | def test_names():
 76 | 	l=[]
 77 | 	names = os.listdir(img_path+'0')
 78 | 	random.shuffle(names)
 79 | 	l.append(['0/'+name for name in names[:300]])
 80 | 	
 81 | 	names = os.listdir(img_path+'2')
 82 | 	random.shuffle(names)
 83 | 	l.append(['2/'+name for name in names[:300]])
 84 | 
 85 | 	names = os.listdir(img_path+'3')
 86 | 	random.shuffle(names)
 87 | 	l.append(['3/'+name for name in names[:400]])
 88 | 
 89 | 	return l[0]+l[1]+l[2]
 90 | 
 91 | def make_tfrecord(test_images):
 92 | 	
 93 | 	it_test =0
 94 | 	it_train = 0
 95 | 	gender_dict={'m':1,'f':0}
 96 | 
 97 | 	for row in (c.execute(query_string)):
 98 | 		'''
 99 | 		row[0] = image path str
100 | 		row[1] = face id int
101 | 		row[2] = roll float
102 | 		row[3] = pitch float
103 | 		row[4] = yaw float
104 | 		row[5] = x int
105 | 		row[6] = y int
106 | 		row[7] = w int
107 | 		row[8] = h int
108 | 		'''
109 | 		
110 | 		
111 | 		center_x = float(row[5]) + float(row[7])/2
112 | 		center_y = float(row[6]) + float(row[8])/2
113 | 
114 | 		
115 | 		if not os.path.exists(loc_file_path+str(row[1])):
116 | 			continue
117 | 
118 | 		select_str = "coords.feature_id, coords.x, coords.y"
119 | 		from_str = "featurecoords coords"
120 | 		where_str = "coords.face_id = {}".format(row[1])
121 | 		query_str = "SELECT " + select_str + " FROM " + from_str + " WHERE " + where_str
122 | 		landmark = np.zeros((21,2)).astype(np.float32)
123 | 		visibility = np.zeros((21,1)).astype(np.int32)
124 | 
125 | 		c2 = conn.cursor()			
126 | 		
127 | 		for xx in c2.execute(query_str):
128 | 			landmark[xx[0]-1][0] = xx[1]#(xx[1] - center_x)/float(row[7])
129 | 			landmark[xx[0]-1][1] = xx[2]#(xx[2] - center_y)/float(row[8])
130 | 			visibility[xx[0]-1] = 1
131 | 		landmark = landmark.reshape(-1,42)
132 | 
133 | 		c2.close()
134 | 
135 | 		try:
136 | 
137 | 			img_raw = (np.asarray(cv2.imread(img_path+row[0])).astype(np.float32))/255.0
138 | 			cv2.imwrite('save_im.jpg',img_raw*255)
139 | 			landmark_pos = None
140 | 
141 | 			if len(img_raw.shape) !=3:
142 | 				continue#img_raw = color.gray2rgb(img_raw)
143 | 			if len(img_raw.shape) !=3 or img_raw.shape[2] != 3:
144 | 				continue
145 | 			print row[1]
146 | 			
147 | 			w = img_raw.shape[1]
148 | 			h = img_raw.shape[0]
149 | 			if os.path.isfile(loc_file_path+str(row[1])+'/positive.npy'):
150 | 				pos_locs = np.load(loc_file_path+str(row[1])+'/positive.npy')[:,:4]
151 | 				cof_locs = np.tile(np.load(loc_file_path+str(row[1])+'/positive.npy')[:,4:6],(1,21))
152 | 				dim_locs = np.tile(np.load(loc_file_path+str(row[1])+'/positive.npy')[:,6:8],(1,21))
153 | 				n_pos_locs = pos_locs.shape[0]
154 | 				
155 | 				landmark_pos = (landmark - cof_locs)/dim_locs
156 | 				visibility_pos = np.ones((landmark_pos.shape[0],21))
157 | 				visibility_pos[(np.where(landmark_pos > 0.5)[0],np.where(landmark_pos > 0.5)[1]/2)] = 0
158 | 				visibility_pos[(np.where(landmark_pos < -0.5)[0],np.where(landmark_pos < -0.5)[1]/2)] = 0
159 | 
160 | 				# visibility_pos[np.where(landmark_pos)]
161 | 				pos_locs = pos_locs.astype(np.float32).tostring()
162 | 
163 | 			# 	if pos_locs.shape[0] > 0:
164 | 			# 		pos_locs = np.concatenate([pos_locs,np.asarray([row[6]/float(h),row[5]/float(w),
165 | 			# 			(row[6]+row[8])/float(h),(row[5]+row[7])/float(w)]).reshape(1,4)],axis=0)
166 | 				
167 | 			# 		n_pos_locs = pos_locs.shape[0]
168 | 				
169 | 			# 		pos_locs = pos_locs.astype(np.float32).tostring()
170 | 			# 	else:
171 | 			# 		pos_locs = np.asarray([[row[6]/float(h),row[5]/float(w),(row[6]+row[8])/float(h),(row[5]+row[7])/float(w)]]).reshape(1,4)
172 | 			# 		n_pos_locs = pos_locs.shape[0]
173 | 			# 		pos_locs = pos_locs.astype(np.float32).tostring()	
174 | 
175 | 			# else:
176 | 			# 	pos_locs = np.asarray([[row[6]/float(h),row[5]/float(w),(row[6]+row[8])/float(h),(row[5]+row[7])/float(w)]]).reshape(1,4)
177 | 			# 	n_pos_locs = pos_locs.shape[0]
178 | 			# 	pos_locs = pos_locs.astype(np.float32).tostring()
179 | 			
180 | 
181 | 
182 | 			if os.path.isfile(loc_file_path+str(row[1])+'/negative.npy'):
183 | 				neg_locs = np.load(loc_file_path+str(row[1])+'/negative.npy')[:,:4]
184 | 				n_neg_locs = neg_locs.shape[0]
185 | 				cof_locs = np.tile(np.load(loc_file_path+str(row[1])+'/negative.npy')[:,4:6],(1,21))
186 | 				dim_locs = np.tile(np.load(loc_file_path+str(row[1])+'/negative.npy')[:,6:8],(1,21))
187 | 				
188 | 				landmark_neg = (landmark - cof_locs)/dim_locs
189 | 				visibility_neg = np.zeros((landmark_neg.shape[0],21))
190 | 
191 | 				# visibility_pos[np.where(landmark_pos)]
192 | 				neg_locs = neg_locs.astype(np.float32).tostring()
193 | 			
194 | 			all_landmarks =   np.concatenate([landmark_pos,landmark_neg],axis=0)
195 | 			all_visibilities = np.concatenate([visibility_pos,visibility_neg],axis=0)
196 | 			all_landmarks = all_landmarks.astype(np.float32).tostring()
197 | 			all_visibilities = all_visibilities.astype(np.int32).tostring()
198 | 
199 | 			img_raw = img_raw.tostring()
200 | 				
201 | 			print "{},{}".format(n_pos_locs,n_neg_locs)
202 | 
203 | 			pose_array = np.asarray([row[2],row[3],row[4]]).astype(np.float32)
204 | 			
205 | 
206 | 			pose_array = pose_array.tostring()
207 | 			# landmark = landmark.tostring()
208 | 			# visibility=visibility.tostring()
209 | 			
210 | 
211 | 			example = tf.train.Example(features=tf.train.Features(feature={
212 | 				'image_raw':_bytes_feature(img_raw),
213 | 				'width': _int64_feature(w),
214 | 				'height': _int64_feature(h),
215 | 				'face_id': _int64_feature(row[1]),
216 | 				'pose': _bytes_feature(pose_array),
217 | 				'loc_x': _int64_feature(row[5]),
218 | 				'loc_y': _int64_feature(row[6]),
219 | 				'loc_w': _int64_feature(row[7]),
220 | 				'loc_h': _int64_feature(row[8]),
221 | 				'gender':_int64_feature(gender_dict[row[10]]),
222 | 				'landmarks':_bytes_feature(all_landmarks),
223 | 				'visibility':_bytes_feature(all_visibilities),
224 | 				'pos_locs':_bytes_feature(pos_locs),
225 | 				'neg_locs':_bytes_feature(neg_locs),
226 | 				'n_pos_locs':_int64_feature(n_pos_locs),
227 | 				'n_neg_locs':_int64_feature(n_neg_locs)
228 | 				}))
229 | 			
230 | 			writer_train.write(example.SerializeToString())
231 | 			it_train += 1
232 | 			break
233 | 			# if it_train >= 1:
234 | 			# 	break
235 | 			# if row[0] in test_images:
236 | 			# 	writer_test.write(example.SerializeToString())
237 | 			# 	it_test += 1
238 | 			# else:
239 | 			# 	writer_train.write(example.SerializeToString())
240 | 			# 	it_train += 1
241 | 
242 | 		except Exception as e:
243 | 			exc_type, exc_obj, exc_tb = sys.exc_info()
244 | 			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
245 | 			print(exc_type, fname, exc_tb.tb_lineno)
246 | 		
247 | 		
248 | 	print it_test,it_train	
249 | 	c.close()
250 | 	writer_train.close()
251 | 	writer_test.close()
252 | 
253 | def extract_tfrecord():
254 | 	record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_train_filename)
255 | 	count =0
256 | 	for string_record in tqdm(record_iterator):
257 | 		
258 | 		count += 1
259 | 		example = tf.train.Example()
260 | 		example.ParseFromString(string_record)
261 | 
262 | 		img_string = example.features.feature['image_raw'].bytes_list.value[0]
263 | 		landmark_string = example.features.feature['landmarks'].bytes_list.value[0]
264 | 		landmarks = np.fromstring(landmark_string, dtype=np.float32).reshape(21,2)
265 | 		img_width = int(example.features.feature['width'].int64_list.value[0])
266 | 		img_height = int(example.features.feature['height'].int64_list.value[0])
267 | 		
268 | 		img_2 = np.fromstring(img_string, dtype=np.uint8).reshape(-1,1)
269 | 		
270 | 		img_1d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3)
271 | 		print img_1d.shape
272 | 		loc_x = int(example.features.feature['loc_x'].int64_list.value[0])
273 | 		loc_y = int(example.features.feature['loc_y'].int64_list.value[0])
274 | 		loc_w = int(example.features.feature['loc_w'].int64_list.value[0])
275 | 		loc_h = int(example.features.feature['loc_h'].int64_list.value[0])
276 | 		sex = int(example.features.feature['gender'].int64_list.value[0])
277 | 		
278 | 		
279 | 		# center_x = img_width/2.0
280 | 		# center_y = img_height/2.0 
281 | 
282 | 		# centers = np.tile(np.array([center_x,center_y]).reshape(1,2),(21,1))
283 | 		# normalized = landmarks - centers
284 | 		# w_h = np.tile(np.array([img_width,img_height]).reshape(1,2),(21,1))
285 | 
286 | 		# normalized = normalized/w_h
287 | 		
288 | 		# for i in range(normalized.shape[0]):
289 | 		# 	if i == 5 or i == 9 or i==15 or i==16:
290 | 		# 		continue
291 | 		# 	point_x = normalized[i][0]*img_width + img_width/2.0
292 | 		# 	point_y = normalized[i][1]*img_height + img_height/2.0
293 | 			
294 | 		# 	cv2.circle(img_1d,(int(point_x),int(point_y)), 1, (0,0,255), 2)
295 | 
296 | 		# cv2.rectangle(img_1d,(loc_x,loc_y),(loc_x+loc_w,loc_y+loc_h),(0,255,0),3)
297 | 		# cv2.imshow('result',img_1d)
298 | 		# cv2.waitKey(0)
299 | 		
300 | 		
301 | 		
302 | if __name__ == '__main__':
303 | 	test_images = test_names()
304 | 	print len(test_images)
305 | 	make_tfrecord(test_images)
306 | 	#extract_tfrecord()
307 | 
308 | 


--------------------------------------------------------------------------------
/new_files/irp.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | # import pdb
  5 | # pdb.set_trace()
  6 | aflw_template_landmark_coords=np.array([[-0.479962468147, 0.471864163876],[-0.30303606391, 0.508996844292],[-0.106451146305, 0.498075485229],[0.106451146305, 0.498075485229],[0.30303606391, 0.508996844292],[0.479962468147, 0.471864163876],[-0.447198301554, 0.321149080992],[-0.318325966597, 0.325517624617],[-0.163242310286, 0.308043420315],[0.163242310286, 0.308043420315],[0.318325966597, 0.325517624617],[0.447198301554, 0.321149080992],[-0.674257874489, -0.151652157307],[-0.170000001788, -0.075740583241],[0.0, 0.0],[0.170000001788, -0.075740583241],[0.674257874489, -0.151652157307],[-0.272456139326, -0.347239643335],[0.0, -0.336318254471],[0.272456139326, -0.347239643335],[0.0, -0.737950384617]], dtype=np.float32)
  7 | # tfrecords_train_filename = '/home/shashank/Documents/CSE-252C/hyperface/code/aflw_train.tfrecords'
  8 | 
  9 | def region_proposal(landmark_pts,visible_landmark_index,image_size,pad=0.1):
 10 | 	
 11 | 	x_template,y_template,w_template,h_template = cv2.boundingRect(aflw_template_landmark_coords)
 12 | 	
 13 | 	x_selective,y_selective,w_selective,h_selective = cv2.boundingRect(landmark_pts.astype(np.float32))
 14 | 
 15 | 	x_selective = x_selective - (pad*w_selective)/2.0
 16 | 	y_selective = y_selective - (pad*h_selective)/2.0
 17 | 	w_selective = w_selective *(1+ pad)
 18 | 	h_selective = h_selective *(1+ pad)
 19 | 
 20 | 	visible_template_landmarks = aflw_template_landmark_coords[visible_landmark_index,:]
 21 | 
 22 | 	#Now we have got the corresponding points or features in the two images. Using 2D Homography, find the projection matrix.
 23 | 	#For the homography we need at least 4 features,hence
 24 | 	if len(visible_landmark_index) < 4:
 25 | 		return (0,0,0,0)
 26 | 
 27 | 	H,__ = cv2.findHomography(visible_template_landmarks,landmark_pts,cv2.RANSAC)
 28 | 
 29 | 	if H is None:
 30 | 		return (0,0,0,0)
 31 | 	source_pts = np.asarray([ [x_template,y_template,1.0],[x_template,y_template+h_template,1.0],[x_template+w_template,y_template,1.0],[x_template+w_template,y_template+h_template,1.0] ]).astype(np.float32)
 32 | 	
 33 | 	dst_points = np.dot(H,source_pts.T)
 34 | 	dst_points = dst_points/dst_points[2,:]
 35 | 	dst_points = dst_points[:2,:]
 36 | 
 37 | 	
 38 | 	min_x_proposed = np.min(dst_points[0,:])
 39 | 	min_y_proposed = np.min(dst_points[1,:])
 40 | 
 41 | 	max_x_proposed = np.max(dst_points[0,:])
 42 | 	max_y_proposed = np.max(dst_points[1,:])
 43 | 
 44 | 	w_proposed = max_x_proposed - min_x_proposed
 45 | 	h_proposed = max_y_proposed - min_y_proposed
 46 | 
 47 | 	final_x1 = min(min_x_proposed,x_selective)
 48 | 	final_y1 = min(min_y_proposed,y_selective)
 49 | 
 50 | 	final_x2 = max(max_x_proposed,x_selective+w_selective)
 51 | 	final_y2 = max(max_y_proposed,y_selective+h_selective)
 52 | 
 53 | 	final_x1 = max(final_x1,0)
 54 | 	final_y1 = max(final_y1,0)
 55 | 
 56 | 	final_x2 = min(image_size[1],final_x2)
 57 | 	final_y2 = min(image_size[0],final_y2)
 58 | 
 59 | 	return (final_y1,final_x1,final_y2,final_x2)
 60 | 
 61 | # def extract_tfrecord():
 62 | # 	record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_train_filename)
 63 | 
 64 | # 	for string_record in record_iterator:
 65 | # 		example = tf.train.Example()
 66 | # 		example.ParseFromString(string_record)
 67 | 
 68 | # 		img_string = example.features.feature['image_raw'].bytes_list.value[0]
 69 | # 		landmark_string = example.features.feature['landmarks'].bytes_list.value[0]
 70 | # 		landmarks = np.fromstring(landmark_string, dtype=np.float32).reshape(21,2)
 71 | # 		img_width = int(example.features.feature['width'].int64_list.value[0])
 72 | # 		img_height = int(example.features.feature['height'].int64_list.value[0])
 73 | # 		img_1d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3)
 74 | # 		loc_x = int(example.features.feature['loc_x'].int64_list.value[0])
 75 | # 		loc_y = int(example.features.feature['loc_y'].int64_list.value[0])
 76 | # 		loc_w = int(example.features.feature['loc_w'].int64_list.value[0])
 77 | # 		loc_h = int(example.features.feature['loc_h'].int64_list.value[0])
 78 | # 		sex = int(example.features.feature['sex'].int64_list.value[0])
 79 | 		
 80 | # 		center_x = loc_x + (loc_w/2.0)
 81 | # 		center_y = loc_y + (loc_h/2.0) 
 82 | 
 83 | # 		centers = np.tile(np.array([center_x,center_y]).reshape(1,2),(21,1))
 84 | # 		normalized = landmarks - centers
 85 | # 		w_h = np.tile(np.array([loc_w,loc_h]).reshape(1,2),(21,1))
 86 | 
 87 | # 		normalized = normalized/w_h
 88 | # 		landmarks_for_irp =[]
 89 | # 		visibility_for_irp=[]
 90 | 		
 91 | # 		for i in range(normalized.shape[0]):
 92 | # 			if (landmarks[i][0] == 0.0) and (landmarks[i][0] == 0.0) :
 93 | # 				visibility_for_irp.append([0])
 94 | # 				continue
 95 | # 			else:
 96 | # 				visibility_for_irp.append([1])	
 97 | # 			point_x = normalized[i][0]*loc_w + center_x
 98 | # 			point_y = normalized[i][1]*loc_h + center_y
 99 | # 			landmarks_for_irp.append([point_x,point_y])	
100 | 			
101 | # 			#cv2.circle(img_1d,(int(point_x),int(point_y)), 1, (0,0,255), 2)
102 | # 		landmarks_for_irp = np.asarray(landmarks_for_irp)
103 | # 		visibility_for_irp = np.asarray(visibility_for_irp)
104 | # 		l1,l2,l3,l4 = region_proposal(landmarks_for_irp,visibility_for_irp,(img_width,img_height))
105 | # 		cv2.rectangle(img_1d,(int(l1),int(l2)),(int(l3),int(l4)),(0,255,0),3)
106 | # 		cv2.imshow('result',img_1d)
107 | # 		cv2.waitKey(0)
108 | 
109 | # if __name__ == '__main__':
110 | # 	extract_tfrecord()
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/new_files/lnms.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy as np
 4 | # import pdb
 5 | # pdb.set_trace()
 6 | def fast_nms(ip_boxes, ov_threshold):
 7 | 
 8 | 	if len(ip_boxes) == 0:
 9 | 		return None
10 | 
11 | 	#Save the Area Computation
12 | 	area = (ip_boxes[:,0] - ip_boxes[:,2])*(ip_boxes[:,1] - ip_boxes[:,3])
13 | 	area = area.reshape(-1,1)
14 | 
15 | 	#sorted_y_index = np.argsort(ip_boxes[:,3])
16 | 	sorted_y_index = np.argsort(area[:,0])
17 | 	keep = {}
18 | 	
19 | 	while len(sorted_y_index) > 0:
20 | 		index = sorted_y_index[-1]
21 | 		
22 | 		to_find = sorted_y_index[:-1]
23 | 		x1 = np.maximum(ip_boxes[to_find,0],ip_boxes[index,0])
24 | 		x2 = np.maximum(np.minimum(ip_boxes[to_find,2],ip_boxes[index,2]),x1)
25 | 		y1 = np.maximum(ip_boxes[to_find,1],ip_boxes[index,1])
26 | 		y2 = np.maximum(np.minimum(ip_boxes[to_find,3],ip_boxes[index,3]),y1)
27 | 		w = x2 - x1  
28 | 		h = y2 - y1
29 | 		intersection_area = (w*h).reshape(-1,1)
30 | 		total_area = (ip_boxes[to_find,2] - ip_boxes[to_find,0]).reshape(-1,1)*(ip_boxes[to_find,3] - ip_boxes[to_find,1]).reshape(-1,1) + (ip_boxes[index,2] - ip_boxes[index,0]).reshape(-1,1)*(ip_boxes[index,3] - ip_boxes[index,1]).reshape(-1,1) - intersection_area
31 | 		#overlap = intersection_area/(area[to_find,:]+1e-5)
32 | 		overlap = intersection_area/total_area
33 | 		keep[index]=list(to_find[np.where(overlap >ov_threshold)[0]])
34 | 		keep[index].append(index)
35 | 		
36 | 		sorted_y_index= np.delete(sorted_y_index,np.concatenate([[len(sorted_y_index)-1],np.where(overlap > ov_threshold)[0]]))
37 | 
38 | 	return keep
39 | 
40 | # if __name__ == '__main__':
41 | # 	a = np.load('/home/shashank/Documents/CSE-252C/chainer_ref/hyperface/ip1.npy')
42 | # 	x1 = a[:,0].reshape(-1,1)
43 | # 	y1 = a[:,1].reshape(-1,1)
44 | # 	x2 = a[:,2].reshape(-1,1)
45 | # 	y2 = a[:,3].reshape(-1,1)
46 | # 	x2 = x2.reshape(-1,1) + x1
47 | # 	y2 = y2.reshape(-1,1) + y1
48 | # 	a = np.concatenate([x1,y1,x2,y2],axis=1)
49 | # 	fast_nms(a,0.2)
50 | 
51 | 
52 | 			
53 | 


--------------------------------------------------------------------------------
/new_files/main.py:
--------------------------------------------------------------------------------
 1 | import vis
 2 | import cv2
 3 | import tensorflow as tf
 4 | import os
 5 | import argparse
 6 | from skimage import io
 7 | from model import *
 8 | # import pdb
 9 | # pdb.set_trace()
10 | 
11 | if not os.path.exists('./logs'):
12 | 	os.makedirs('./logs')
13 | 
14 | map(os.unlink, (os.path.join( './logs',f) for f in os.listdir('./logs')) )
15 | 
16 | def parse_args():
17 | 	parser = argparse.ArgumentParser()
18 | 	parser.add_argument('-f','--forward_only',dest='forward_only',help='Test/Train Mode Flag',default=0,type=int)
19 | 	parser.add_argument('-b','--batchsize',dest='batch_size',help='Batch Size to calculate the number of iterations per epoch',default=32,type=int)
20 | 	parser.add_argument('-e','--n_epochs',dest='num_epochs',help='Number of Epochs for Training',default=10,type=int)
21 | 	parser.add_argument('-p','--model_path',dest='model_path',help='Enter the path for the model to use for testing',default=None,type=str)
22 | 	parser.add_argument('-t','--tf_record_path',dest='tf_record_file_path',help='Enter the path for the Tf Record File to use for training',default=None,type=str)	
23 | 	parser.add_argument('-i','--test_image_path',dest='test_image_path',help='Enter the test image path',default=None,type=str)	
24 | 	args = parser.parse_args()
25 | 	return args
26 | 
27 | with tf.Session() as sess:
28 | 	print "Parsing Argument"
29 | 	args = parse_args()
30 | 	print 'Building Graph...'
31 | 	net = HyperFace(sess,batch_size=args.batch_size,num_epochs=args.num_epochs,forward_only=args.forward_only)
32 | 	print 'Graph Built!'
33 | 	sess.run(tf.global_variables_initializer())
34 | 	if args.forward_only == 1:
35 | 		print "Loading Model"
36 | 		net.load_model(args.model_path)
37 | 		print "Start Testing"
38 | 		#img_raw = np.asarray()
39 | 		img_raw = np.asarray(cv2.imread(args.test_image_path))
40 | 		print img_raw.shape
41 | 		output_set = net.test_hyperface(img_raw)
42 | 		vis.vis_results(img_raw,output_set)
43 | 	else:
44 | 		filename_queue = tf.train.string_input_producer([args.tf_record_file_path], num_epochs=args.num_epochs)
45 | 		#net.train()
46 | 		print "Start Training"
47 | 	
48 | 		# net.train()
49 | 
50 | 


--------------------------------------------------------------------------------
/new_files/model.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import irp
  3 | import lnms
  4 | import selective_search
  5 | import tensorflow as tf
  6 | import tensorflow.contrib.slim as slim
  7 | import numpy as np
  8 | from ipdb import set_trace as brk
  9 | 
 10 | class HyperFace(object):
 11 | 
 12 | 	def __init__(self, sess,batch_size=None,num_epochs=None,forward_only=None):
 13 | 
 14 | 		self.sess = sess
 15 | 		self.forward_only = forward_only
 16 | 		
 17 | 		if self.forward_only == 1:
 18 | 			self.batch_size = None
 19 | 		else:
 20 | 			self.batch_size = batch_size
 21 | 
 22 | 		self.img_height = 227
 23 | 		self.img_width = 227
 24 | 		self.channel = 3
 25 | 
 26 | 		self.num_epochs = num_epochs
 27 | 
 28 | 		# Hyperparameters
 29 | 		self.weight_detect = 1
 30 | 		self.weight_landmarks = 5
 31 | 		self.weight_visibility = 0.5
 32 | 		self.weight_pose = 5
 33 | 		self.weight_gender = 2
 34 | 		self.build_network()
 35 | 
 36 | 
 37 | 	def build_network(self):
 38 | 
 39 | 		self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images')
 40 | 		self.detection = tf.placeholder(tf.float32, [self.batch_size,2], name='detection')
 41 | 		self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks')
 42 | 		self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility')
 43 | 		self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose')
 44 | 		self.gender = tf.placeholder(tf.float32, [self.batch_size,2], name='gender')
 45 | 		
 46 | 		self.net_output = self.network(self.X) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender)
 47 | 		if self.forward_only == 0:
 48 | 			loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.net_output[0], self.detection))
 49 | 			
 50 | 			visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1])
 51 | 			loss_landmarks = tf.reduce_mean(tf.square(visibility_mask*(self.net_output[1] - self.landmarks)))
 52 | 			
 53 | 			loss_visibility = tf.reduce_mean(tf.square(self.net_output[2] - self.visibility))
 54 | 			loss_pose = tf.reduce_mean(tf.square(self.net_output[3] - self.pose))
 55 | 			loss_gender = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.net_output[4], self.gender))
 56 | 
 57 | 			self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks  \
 58 | 						+ self.weight_visibility*loss_visibility + self.weight_pose*loss_pose  \
 59 | 						+ self.weight_gender*loss_gender
 60 | 
 61 | 
 62 | 	def train(self):
 63 | 		
 64 | 		optimizer = tf.train.AdamOptimizer().minimize(self.loss)
 65 | 		writer = tf.summary.FileWriter('./logs', self.sess.graph)
 66 | 		loss_summ = tf.summary.scalar('loss', self.loss)
 67 | 
 68 | 	def network(self,inputs):
 69 | 
 70 | 		with slim.arg_scope([slim.conv2d, slim.fully_connected],
 71 | 							 activation_fn = tf.nn.relu,
 72 | 							 weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ):
 73 | 			
 74 | 			conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1')
 75 | 			max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1')
 76 | 
 77 | 			conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a')
 78 | 
 79 | 			conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2')
 80 | 			max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2')
 81 | 			conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3')
 82 | 
 83 | 			conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a')
 84 | 
 85 | 			conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4')
 86 | 			conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5')
 87 | 			pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5')
 88 | 
 89 | 			concat_feat = tf.concat( [conv1a, conv3a, pool5],3)
 90 | 			conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all')
 91 | 			
 92 | 			shape = int(np.prod(conv_all.get_shape()[1:]))
 93 | 			fc_full = slim.fully_connected(tf.reshape(tf.transpose(conv_all, [0,3,1,2]), [-1, shape]), 3072, scope='fc_full')
 94 | 			#fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full')
 95 | 			fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection')
 96 | 			fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks')
 97 | 			fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility')
 98 | 			fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose')
 99 | 			fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender')
100 | 
101 | 			out_detection = slim.fully_connected(fc_detection, 2, scope='out_detection',activation_fn = None)
102 | 			out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='out_landmarks',activation_fn = None)
103 | 			out_visibility = slim.fully_connected(fc_visibility, 21, scope='out_visibility',activation_fn = None)
104 | 			out_pose = slim.fully_connected(fc_pose, 3, scope='out_pose',activation_fn = None)
105 | 			out_gender = slim.fully_connected(fc_gender, 2, scope='out_gender',activation_fn = None)
106 | 
107 | 		
108 | 		return [out_detection, out_landmarks, out_visibility, out_pose, tf.nn.softmax(out_gender),conv_all]
109 | 
110 | 	def load_from_tfRecord(self,filename_queue):
111 | 		
112 | 		reader = tf.TFRecordReader()
113 | 		_, serialized_example = reader.read(filename_queue)
114 | 		
115 | 		features = tf.parse_single_example(
116 | 			serialized_example,
117 | 			features={
118 | 				'image_raw':tf.FixedLenFeature([], tf.string),
119 | 				'width': tf.FixedLenFeature([], tf.int64),
120 | 				'height': tf.FixedLenFeature([], tf.int64),
121 | 				'batch_size':tf.FixedLenFeature([], tf.int64)
122 | 			})
123 | 		
124 | 		image = tf.decode_raw(features['image_raw'], tf.float32)
125 | 		orig_height = tf.cast(features['height'], tf.int32)
126 | 		orig_width = tf.cast(features['width'], tf.int32)
127 | 		batch_size = tf.cast(features['batch_size'], tf.int32)
128 | 
129 | 		image_shape = tf.pack([batch_size,227,227,3])
130 | 		image_tf = tf.reshape(image,image_shape)
131 | 
132 | 		images = tf.train.shuffle_batch([image_tf],batch_size=self.batch_size,enqueue_many=True,num_threads=1,capacity=50,min_after_dequeue=10)
133 | 		
134 | 		return images
135 | 
136 | 	def load_model(self,model_path):
137 | 		for var in tf.all_variables():
138 | 			if var.name.find('weights') != -1:
139 | 				if var.name.find('conv') != -1:
140 | 					self.sess.run(var.assign(np.load(model_path+'/'+var.name.split('/')[0]+'/W.npy').transpose(2,3,1,0)))
141 | 				else:
142 | 					self.sess.run(var.assign(np.load(model_path+'/'+var.name.split('/')[0]+'/W.npy').T))
143 | 			if var.name.find('biases') != -1:
144 | 				self.sess.run(var.assign(np.load(model_path+'/'+var.name.split('/')[0]+'/b.npy')))
145 | 
146 | 		print "Done Loading"
147 | 
148 | 	def test_hyperface(self,ip_img,nms_threshold=0.2,irp_count=2):
149 | 		# 1) Take the input as image
150 | 		# 2) Run DLIB's selective search on that
151 | 		# 3) Pass the regions to the trained model
152 | 		# 4) For all the regions having detection score greater than a threshold.
153 | 		# 	4.1) Perform Iterative Region Proposal on it.
154 | 		# 5) Use the new localized boxes to perform landmark based LMS
155 | 		# 6) Again run the network on the localized boxes from the IRP 
156 | 		# 7) Find precision boxes as the  min and max of the fids
157 | 		# 8) Run NMS
158 | 		# 9) Keep the top k boxes and use the median of each to give the final output
159 | 		# 10) Apply Face Rect Calculator on the final fids
160 | 		
161 | 		ip_img_size = ip_img.shape[0:-1] 
162 | 		total_boxes = None
163 | 		ip_img = ip_img.astype(np.float32)/255.0
164 | 
165 | 		for i in range(1+irp_count):
166 | 			if i ==0:
167 | 				boxes_op,iou_dump,coords_dump = selective_search.perform_selective_search(ip_img.astype(np.float32),ground_truth=None)
168 | 
169 | 			cropped_imgs = tf.image.crop_and_resize(ip_img[np.newaxis,:].astype(np.float32),boxes_op, [0]*boxes_op.shape[0], crop_size=[227,227]).eval(session=self.sess)
170 | 			# cropped_imgs = np.load('db_imgs.npy')
171 | 			# cropped_labels = np.load('db_labels.npy')
172 | 			# cropped_landmarks = np.load('db_landmarks.npy')
173 | 			brk()
174 | 			normalized_imgs = cropped_imgs - 0.5
175 | 
176 | 			# a = np.load('/home/shashank/Documents/CSE-252C/chainer_ref/hyperface/gt_ip.npy')
177 | 			# a = a.transpose(0,2,3,1)
178 | 			
179 | 			input_feed={self.X:normalized_imgs}
180 | 			net_op = self.sess.run([self.net_output],feed_dict=input_feed)
181 | 			
182 | 			all_landmarks = np.asarray(net_op[0][1]).reshape(-1,42)
183 | 			all_landmarks_x = all_landmarks[:,::2].reshape(-1,21)
184 | 			all_landmarks_y = all_landmarks[:,1::2].reshape(-1,21)
185 | 			loc_w = (boxes_op[:,3] - boxes_op[:,1])*ip_img_size[1]
186 | 			loc_h = (boxes_op[:,2] - boxes_op[:,0])*ip_img_size[0]
187 | 			c_x = boxes_op[:,1]*ip_img_size[1] + loc_w/2.0
188 | 			c_y = boxes_op[:,0]*ip_img_size[0] + loc_h/2.0
189 | 			all_landmarks_x = all_landmarks_x*loc_w.reshape(-1,1) + c_x.reshape(-1,1)
190 | 			all_landmarks_y = all_landmarks_y*loc_h.reshape(-1,1) + c_y.reshape(-1,1)
191 | 			all_landmarks_x = all_landmarks_x[:,np.newaxis,:]
192 | 			all_landmarks_y = all_landmarks_y[:,np.newaxis,:]
193 | 
194 | 			all_landmarks = np.concatenate([all_landmarks_x,all_landmarks_y],axis=1)
195 | 
196 | 			detections = np.exp(np.asarray(net_op[0][0]).reshape(-1,2))
197 | 			detections = (detections/(np.sum(detections,axis=1).reshape(-1,1)))[:,1].reshape(-1,1)
198 | 			
199 | 			interests = np.where(detections>0.25)[0]
200 | 
201 | 			visibility = np.asarray(net_op[0][2]).reshape(-1,21)[interests,:]
202 | 			visibility_mask = np.zeros_like(visibility)
203 | 			visibility_mask[np.where(visibility>0.5)] = 1
204 | 
205 | 			landmarks = all_landmarks[interests,:,:].reshape(-1,2,21)
206 | 			
207 | 			detected_boxes =[]
208 | 			for i in range(len(interests)):
209 | 				mask = np.where(visibility_mask[i,:]==1)[0]
210 | 				y1,x1,y2,x2 = irp.region_proposal(landmarks[i,:,mask],mask,ip_img_size)
211 | 				if (y1 == y2) or (x1 == x2):
212 | 					continue
213 | 				detected_boxes.append([y1/float(ip_img_size[0]),x1/float(ip_img_size[1]),y2/float(ip_img_size[0]),x2/float(ip_img_size[1])])
214 | 			boxes_op = np.asarray(detected_boxes).astype(np.float32)
215 | 			
216 | 
217 | 		#DO the final model run
218 | 
219 | 		# cropped_imgs = tf.image.crop_and_resize(ip_img[np.newaxis,:].astype(np.float32),boxes_op, [0]*boxes_op.shape[0], crop_size=[227,227]).eval(session=self.sess)
220 | 		# normalized_imgs = (cropped_imgs - 127.5)/128.0
221 | 		# input_feed={self.X:normalized_imgs}
222 | 		# net_op = self.sess.run([self.net_output],feed_dict=input_feed)
223 | 		
224 | 		interests = np.where(detections>0.5)[0]
225 | 		landmarks = all_landmarks[interests,:,:].reshape(-1,2,21)
226 | 		visibility = np.asarray(net_op[0][2]).reshape(-1,21)[interests,:]
227 | 		poses = np.asarray(net_op[0][3]).reshape(-1,3)[interests,:]
228 | 		genders = np.asarray(net_op[0][4])[:,1].reshape(-1,1)[interests,:]
229 | 
230 | 		visibility_mask = np.zeros_like(visibility)
231 | 		visibility_mask[np.where(visibility>0.5)] = 1
232 | 		
233 | 		# min_x = np.min(landmarks[:,0,visibility_mask],axis=1).reshape(-1,1)
234 | 		# min_y = np.min(landmarks[:,1,visibility_mask],axis=1).reshape(-1,1)
235 | 		# max_x = np.max(landmarks[:,0,visibility_mask],axis=1).reshape(-1,1)
236 | 		# max_y = np.max(landmarks[:,1,visibility_mask],axis=1).reshape(-1,1)
237 | 		precise_boxes = []
238 | 		for i in range(landmarks.shape[0]):
239 | 			min_x = np.min(landmarks[i,0,np.where(visibility_mask[i,:]==1)[0]])
240 | 			min_y = np.min(landmarks[i,1,np.where(visibility_mask[i,:]==1)[0]])
241 | 			max_x = np.max(landmarks[i,0,np.where(visibility_mask[i,:]==1)[0]])
242 | 			max_y = np.max(landmarks[i,1,np.where(visibility_mask[i,:]==1)[0]])
243 | 			precise_boxes.append([min_x,min_y,max_x,max_y])
244 | 		precise_boxes = np.asarray(precise_boxes)
245 | 
246 | 		#precise_boxes = np.concatenate([min_x,min_y,max_x,max_y],axis=1)
247 | 		nms_op_dict = lnms.fast_nms(precise_boxes,nms_threshold)
248 | 		final_res = {'landmarks':[],'gender':[],'location':[],'pose':[]}
249 | 		for key in nms_op_dict:
250 | 			value = nms_op_dict[key]
251 | 			final_res['gender'].append(np.median(genders[value,:],axis=0))
252 | 			final_res['pose'].append(np.median(poses[value,:],axis=0))
253 | 			temp = np.median(landmarks[value,:,:],axis=0).T[np.where(np.median(visibility[value,:],axis=0)>0.5)[0],:]
254 | 			brk()
255 | 			final_res['landmarks'].append(temp)
256 | 			y1,x1,y2,x2 = irp.region_proposal(temp,np.where(np.median(visibility[value,:],axis=0)>0.5)[0],ip_img_size)
257 | 			final_res['location'].append([x1,y1,x2,y2])
258 | 
259 | 		print "Done"
260 | 		return final_res
261 | 		
262 | 
263 | 
264 | 
265 | 	def print_variables(self):
266 | 		variables = slim.get_model_variables()
267 | 		print 'Model Variables:'
268 | 		for var in variables:
269 | 			print var.name, ' ', var.get_shape()
270 | 
271 | 
272 | 			
273 | 
274 | 


--------------------------------------------------------------------------------
/new_files/selective_search.py:
--------------------------------------------------------------------------------
  1 | #!/home/shashank/anaconda2/bin
  2 | import dlib
  3 | from skimage import io
  4 | import cv2
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import csv
  8 | # from multiprocessing import Pool
  9 | # from multiprocessing import Manager
 10 | # from multiprocessing import Queue
 11 | # from multiprocessing.dummy import Pool as ThreadPool
 12 | import os
 13 | import math
 14 | import time
 15 | from tqdm import tqdm
 16 | from pdb import set_trace as brk
 17 | # import pdb
 18 | # pdb.set_trace()
 19 | 
 20 | DEBUG_FLAG = False
 21 | VIS_FLAG = False
 22 | MAKE_TF_RECORD = False
 23 | tfrecords_full_filename = 'aflw_test.tfrecords'
 24 | 
 25 | record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_full_filename)
 26 | tfrecords_training_pos_filename = 'aflw_training_pos.tfrecords'
 27 | tfrecords_training_neg_filename = 'aflw_training_neg.tfrecords'
 28 | writer_pos = tf.python_io.TFRecordWriter(tfrecords_training_pos_filename)
 29 | writer_neg = tf.python_io.TFRecordWriter(tfrecords_training_neg_filename)
 30 | 
 31 | N_TRAIN = 20000
 32 | N_TEST = 1000
 33 | 
 34 | if DEBUG_FLAG:
 35 | 	debug_fp = open('debug.csv','wb')
 36 | 	debug_fp_csv = csv.writer(debug_fp)
 37 | 
 38 | def _bytes_feature(value):
 39 | 	return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 40 | 
 41 | def _float_feature(value):
 42 | 	return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
 43 | 
 44 | def _int64_feature(value):
 45 | 	return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 46 | 
 47 | 
 48 | 
 49 | 
 50 | def calc_2D_IOU(bb1,bb2):
 51 | 	top_left_x1 = bb1[0]
 52 | 	top_left_y1 = bb1[1]
 53 | 	bottom_right_x1 = bb1[2]
 54 | 	bottom_right_y1 = bb1[3]
 55 | 
 56 | 	top_left_x2 = bb2[0]
 57 | 	top_left_y2 = bb2[1]
 58 | 	bottom_right_x2 = bb2[2]
 59 | 	bottom_right_y2 = bb2[3]
 60 | 
 61 | 	intersect_top_left_x = max(bb1[0],bb2[0])
 62 | 	intersect_top_left_y = max(bb1[1],bb2[1])
 63 | 	intersect_bottom_right_x = max(min(bb1[2],bb2[2]),intersect_top_left_x)
 64 | 	intersect_bottom_right_y = max(min(bb1[3],bb2[3]),intersect_top_left_y)
 65 | 
 66 | 	intersect_area = (intersect_bottom_right_x-intersect_top_left_x)*(intersect_bottom_right_y-intersect_top_left_y)
 67 | 	total_area = (bottom_right_x1-top_left_x1)*(bottom_right_y1-top_left_y1) + (bottom_right_x2-top_left_x2)*(bottom_right_y2-top_left_y2) - intersect_area 
 68 | 	iou = float(intersect_area)/float(total_area+0.0)
 69 | 	return iou
 70 | 
 71 | def perform_scale_down(image,max_size_allowed):
 72 | 
 73 | 	orig_h = image.shape[0]
 74 | 	orig_w = image.shape[1]
 75 | 
 76 | 	new_h = orig_h
 77 | 	new_w = orig_w
 78 | 
 79 | 	if new_h > max_size_allowed[1]:
 80 | 		new_w = float(new_w*max_size_allowed[1]) / float(new_h)
 81 | 		new_h = max_size_allowed[1]
 82 | 
 83 | 	if new_w > max_size_allowed[0]:
 84 | 		new_h = float(new_h*max_size_allowed[0]) / float(new_w)
 85 | 		new_w = max_size_allowed[0]
 86 | 
 87 | 	if new_h != orig_h or new_w != orig_w:
 88 | 		return cv2.resize(image, (int(new_w), int(new_h))),float(orig_h)/float(new_h)
 89 | 	else:
 90 | 		return image,1.0
 91 | 
 92 | def perform_selective_search(img,ground_truth,gt2):
 93 | 	
 94 | 	rects=[]
 95 | 
 96 | 	max_size = (500,500)
 97 | 	h = float(img.shape[0])
 98 | 	w = float(img.shape[1])
 99 | 	img,scale  = perform_scale_down(img,max_size)
100 | 	dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=1200)
101 | 	filter_positive_rects=[]
102 | 	filter_negative_rects_hard=[]
103 | 	filter_negative_rects_easy=[]
104 | 	max_negatives = 50
105 | 	hard_negative_ratio = 0.6
106 | 	iou_list = []
107 | 	filter_negative_rects=[]
108 | 	for rect in rects:
109 | 		descaled_top_x = (rect.left()*scale)
110 | 		descaled_top_y = (rect.top()*scale)
111 | 		descaled_bottom_x = (rect.right()*scale)
112 | 		descaled_bottom_y = (rect.bottom()*scale)
113 | 		descaled_width = descaled_bottom_x - descaled_top_x#int(rect.width()*scale)
114 | 		descaled_height = descaled_bottom_y - descaled_top_y #int(rect.height()*scale)
115 | 		descaled_center_x = descaled_top_x + (descaled_width/2.0)
116 | 		descaled_center_y = descaled_top_y + (descaled_height/2.0)
117 | 
118 | 		#iou,a1,a2 = rect_overlap_rate(gt2,(descaled_top_x,descaled_top_y,descaled_width,descaled_height))
119 | 		iou = calc_2D_IOU(ground_truth,(descaled_top_x,descaled_top_y,descaled_bottom_x,descaled_bottom_y))
120 | 		
121 | 		iou_list.append(iou)
122 | 		if DEBUG_FLAG:
123 | 			debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()])
124 | 		if  iou > 0.50:
125 | 			if VIS_FLAG:
126 | 				filter_positive_rects.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)])
127 | 			else:
128 | 				filter_positive_rects.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w,
129 | 					descaled_center_x,descaled_center_y,descaled_width,descaled_height])
130 | 		elif iou <= 0.0:
131 | 			if VIS_FLAG:
132 | 				filter_negative_rects.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)])
133 | 			else:
134 | 				filter_negative_rects.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w,
135 | 					descaled_center_x,descaled_center_y,descaled_width,descaled_height])
136 | 		# elif 0.25 <= iou < 0.35:
137 | 		# 	if VIS_FLAG:
138 | 		# 		filter_negative_rects_hard.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)])
139 | 		# 	else:
140 | 		# 		filter_negative_rects_hard.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w])
141 | 		# elif iou < 0.25:
142 | 		# 	if VIS_FLAG:
143 | 		# 		filter_negative_rects_easy.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)])
144 | 		# 	else:
145 | 		# 		filter_negative_rects_easy.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w])
146 | 
147 | 	# if len(filter_negative_rects_easy) + len(filter_negative_rects_hard) < max_negatives:
148 | 	# 	filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0)
149 | 	# 	filter_negative_rects = filter_negative_rects.tolist()
150 | 	# else:
151 | 	# 	if len(filter_negative_rects_hard) < int(hard_negative_ratio*max_negatives):
152 | 	# 		index = np.random.choice(np.arange(len(filter_negative_rects_easy)),max_negatives -len(filter_negative_rects_hard)
153 | 	# 			,replace=False)
154 | 	# 		filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist()
155 | 	# 	elif len(filter_negative_rects_easy) < int((1-hard_negative_ratio)*max_negatives):
156 | 	# 		index = np.random.choice(np.arange(len(filter_negative_rects_hard)),max_negatives -len(filter_negative_rects_easy),
157 | 	# 			replace=False)
158 | 	# 		filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist()
159 | 	# 	else:
160 | 	# 		index = np.random.choice(np.arange(len(filter_negative_rects_hard)),int(hard_negative_ratio*max_negatives),replace=False)
161 | 	# 		filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist()
162 | 	# 		index = np.random.choice(np.arange(len(filter_negative_rects_easy)),int((1-hard_negative_ratio)*max_negatives),replace=False)
163 | 	# 		filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist()
164 | 	# 	filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0)
165 | 	# 	filter_negative_rects = filter_negative_rects.tolist()
166 | 
167 | 	# Jittering the ground truth
168 | 	
169 | 	gt_top_x1 = ground_truth[0]
170 | 	gt_top_y1 = ground_truth[1]
171 | 	gt_bottom_x2 = ground_truth[2]
172 | 	gt_bottom_y2 = ground_truth[3]
173 | 
174 | 	gt_w = gt_bottom_x2 - gt_top_x1
175 | 	gt_h = gt_bottom_y2 - gt_top_y1
176 | 
177 | 	w_list = np.arange(-0.5*gt_w,0.5*gt_w,0.1*gt_w).tolist()
178 | 	h_list = np.arange(-0.5*gt_h,0.5*gt_h,0.1*gt_h).tolist()
179 | 
180 | 	for w_shift in w_list:
181 | 		for h_shift in h_list:
182 | 			new_x1 = gt_top_x1 + w_shift
183 | 			new_y1 = gt_top_y1 + h_shift
184 | 			new_x2 = gt_bottom_x2 + w_shift
185 | 			new_y2 = gt_bottom_y2 + h_shift
186 | 			
187 | 			if new_x1 < 0.0:
188 | 				new_x1 = 0.0
189 | 			elif new_x1 > w :
190 | 				new_x1 = w
191 | 
192 | 			if new_y1 < 0.0:
193 | 				new_y1 = 0.0
194 | 			elif new_y1 > h :
195 | 				new_y1 = h
196 | 
197 | 			if new_x2 < 0.0:
198 | 				new_x2 = 0.0
199 | 			elif new_x2 > w :
200 | 				new_x2 = w
201 | 			
202 | 			if new_y2 < 0.0:
203 | 				new_y2 = 0.0
204 | 			elif new_y2 > h :
205 | 				new_y2 = h
206 | 
207 | 			iou = calc_2D_IOU(ground_truth,(new_x1,new_y1,new_x2,new_y2))
208 | 			if  iou > 0.50:
209 | 				if VIS_FLAG:
210 | 					filter_positive_rects.append([int(new_x1),int(new_y1),int(new_x2),int(new_y2)])
211 | 				else:
212 | 					descaled_width = new_x2 - new_x1#int(rect.width()*scale)
213 | 					descaled_height = new_y2 - new_y1#int(rect.height()*scale)
214 | 					descaled_center_x = new_x1 + (descaled_width/2.0)
215 | 					descaled_center_y = new_y1 + (descaled_height/2.0)
216 | 					filter_positive_rects.append([(new_y1)/h,(new_x1)/w,(new_y2)/h,(new_x2)/w,
217 | 						descaled_center_x,descaled_center_y,descaled_width,descaled_height])
218 | 
219 | 	if VIS_FLAG:				
220 | 		return filter_positive_rects,filter_negative_rects
221 | 	else:
222 | 		return np.asarray(filter_positive_rects).astype(np.float32),np.asarray(filter_negative_rects).astype(np.float32)
223 | 
224 | def visualise(img,rects,gt):
225 | 
226 | 	
227 | 	for rect in rects:
228 | 		#new_img = img
229 | 		r,g,b = np.random.randint(0,255,3)
230 | 
231 | 		cv2.rectangle(img,(rect[0],rect[1]),(rect[2],rect[3]),(b,g,r),2)
232 | 		cv2.imshow('result',img)
233 | 		cv2.namedWindow('result', cv2.WINDOW_NORMAL)
234 | 		cv2.resizeWindow('result', 320,240)
235 | 		
236 | 	
237 | 	cv2.rectangle(img,(gt[0],gt[1]),(gt[2],gt[3]),(0,255,0),1)
238 | 	cv2.imshow('result',img/255.0)
239 | 	cv2.waitKey(0)
240 | 
241 |  
242 | 
243 | def extract_tfrecord(it):
244 | 	try:
245 | 		example = tf.train.Example()
246 | 		example.ParseFromString(it)
247 | 		session = tf.Session()
248 | 		img_string = example.features.feature['image_raw'].bytes_list.value[0]
249 | 		img_width = int(example.features.feature['width'].int64_list.value[0])
250 | 		img_height = int(example.features.feature['height'].int64_list.value[0])
251 | 
252 | 		img_2d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3)
253 | 
254 | 		loc_x = int(example.features.feature['loc_x'].int64_list.value[0])
255 | 		loc_y = int(example.features.feature['loc_y'].int64_list.value[0])
256 | 		loc_w = int(example.features.feature['loc_w'].int64_list.value[0])
257 | 		loc_h = int(example.features.feature['loc_h'].int64_list.value[0])
258 | 		face_id = int(example.features.feature['face_id'].int64_list.value[0])
259 | 
260 | 		landmark_string = example.features.feature['landmarks'].bytes_list.value[0]
261 | 		landmarks = np.fromstring(landmark_string, dtype=np.float32).reshape(21,2)
262 | 		sex = int(example.features.feature['sex'].int64_list.value[0])
263 | 		roll = float(example.features.feature['roll'].float_list.value[0])
264 | 		pitch = float(example.features.feature['pitch'].float_list.value[0])
265 | 		yaw = float(example.features.feature['yaw'].float_list.value[0])
266 | 
267 | 		hard_postives,hard_negatives = perform_selective_search(img_2d,(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h),(loc_x,loc_y,loc_w,loc_h))
268 | 		print "****************************"
269 | 		if os.path.exists('locations_test/'+str(face_id)):
270 | 			print face_id
271 | 			np.save('locations_test/'+str(face_id)+'/positive.npy',hard_postives)
272 | 			np.save('locations_test/'+str(face_id)+'/negative.npy',hard_negatives)
273 | 		else:
274 | 			os.mkdir('locations_test/'+str(face_id))
275 | 			np.save('locations_test/'+str(face_id)+'/positive.npy',hard_postives)
276 | 			np.save('locations_test/'+str(face_id)+'/negative.npy',hard_negatives)
277 | 
278 | 		if VIS_FLAG:
279 | 
280 | 			visualise(img_2d,hard_postives,(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h))
281 | 																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																																						
282 | 		if MAKE_TF_RECORD:
283 | 			
284 | 			if hard_postives.shape[0] > 0:
285 | 
286 | 				resized_and_cropped_image_pos = tf.image.crop_and_resize(img_2d[np.newaxis,:].astype(np.float32),hard_postives, [0]*hard_postives.shape[0], crop_size=[227,227]).eval(session=session)
287 | 				np.save('positive.npy',resized_and_cropped_image_pos)
288 | 				# np.random.shuffle(resized_and_cropped_image_pos)
289 | 				# resized_and_cropped_image_pos = resized_and_cropped_image_pos[:40,:,:,:]
290 | 				
291 | 				# example_pos = tf.train.Example(features=tf.train.Features(feature={
292 | 				# 	'image_raw':_bytes_feature(resized_and_cropped_image_pos.astype(np.uint8).tostring()),
293 | 				# 	'width': _int64_feature(img_2d.shape[1]),
294 | 				# 	'height': _int64_feature(img_2d.shape[0]),
295 | 				# 	'batch_size': _int64_feature(resized_and_cropped_image_pos.shape[0]),
296 | 				# 	'roll': _float_feature(roll),
297 | 				# 	'pitch':_float_feature(pitch),
298 | 				# 	'yaw':_float_feature(yaw),
299 | 				# 	'landmarks':_bytes_feature(landmarks.tostring()),
300 | 				# 	'gender':_int64_feature(sex),
301 | 				# 	'locations':_bytes_feature(hard_postives.tostring())
302 | 				# 	}))
303 | 				# writer_pos.write(example_pos.SerializeToString())
304 | 				#np.save('pos_files/'+str(face_id)+'.npy',resized_and_cropped_image_pos)
305 | 
306 | 			if hard_negatives.shape[0] > 0:
307 | 
308 | 				resized_and_cropped_image_neg = tf.image.crop_and_resize(img_2d[np.newaxis,:].astype(np.float32),hard_negatives, [0]*hard_negatives.shape[0], crop_size=[227,227]).eval(session=session)
309 | 				np.save('negative.npy',resized_and_cropped_image_neg)
310 | 
311 | 				# np.random.shuffle(resized_and_cropped_image_neg)
312 | 				# resized_and_cropped_image_neg = resized_and_cropped_image_neg[:40,:,:,:]
313 | 
314 | 				# example_neg = tf.train.Example(features=tf.train.Features(feature={
315 | 				# 	'image_raw':_bytes_feature(resized_and_cropped_image_neg.astype(np.uint8).tostring()),
316 | 				# 	'width': _int64_feature(img_2d.shape[1]),
317 | 				# 	'height': _int64_feature(img_2d.shape[0]),
318 | 				# 	'batch_size': _int64_feature(resized_and_cropped_image_neg.shape[0]),
319 | 				# 	'roll': _float_feature(roll),
320 | 				# 	'pitch':_float_feature(pitch),
321 | 				# 	'yaw':_float_feature(yaw),
322 | 				# 	'landmarks':_bytes_feature(landmarks.tostring()),
323 | 				# 	'gender':_int64_feature(sex),
324 | 				# 	'locations':_bytes_feature(hard_negatives.tostring())
325 | 				# 	}))
326 | 				# writer_neg.write(example_neg.SerializeToString())
327 | 		return 1
328 | 	except Exception as e:
329 | 		print e
330 | 		return 0
331 | 		#np.save('neg_files/'+str(face_id)+'.npy',resized_and_cropped_image_neg)
332 | 
333 | def listener(q):
334 | 	tfrecords_training_pos_filename = 'aflw_training_pos.tfrecords'
335 | 	tfrecords_training_neg_filename = 'aflw_training_neg.tfrecords'
336 | 
337 | 	writer_pos = tf.python_io.TFRecordWriter(tfrecords_training_pos_filename)
338 | 	writer_neg = tf.python_io.TFRecordWriter(tfrecords_training_neg_filename)
339 | 	#f = open('check.txt','wb')
340 | 	while(1):
341 | 		m = q.get()
342 | 		if m == 'kill':
343 | 			break
344 | 		writer_pos.write(m.SerializeToString())
345 | 	#f.close()
346 | 	writer_neg.close()
347 | 	writer_pos.close()
348 | 
349 | if __name__ == '__main__':
350 | 	
351 | 	#pool = Pool(processes=4)
352 | 	
353 | 	# manager = Manager()
354 | 	# q = manager.Queue()
355 | 	# watcher = pool.apply_async(listener, (q,))
356 | 
357 | 	#start_time = time.clock()
358 | 	# jobs =[]
359 | 	# for i in range(10):
360 | 	# 	job = pool.apply_async(extract_tfrecord,(record_iterator.next(),q))
361 | 	# 	jobs.append(job)
362 | 	# for job in jobs:
363 | 	# 	job.wait()
364 | 	# 	job.get()
365 | 	# q.put('kill')
366 | 	# pool.close()
367 | 	# results = [pool.apply_async(extract_tfrecord,args=(record_iterator.next())) for i in range(100)]
368 | 	# pool.close()
369 | 	# pool.join()
370 | 	# results = [p.get() for p in results]
371 | 		#jobs.append(job)
372 | 	# for job in jobs:
373 | 	# 	job.wait()
374 | 	# 	job.get()				
375 | 
376 | 	# result = pool.map_async(extract_tfrecord,[record_iterator.next() for i in range(100)])
377 | 	# result.get()
378 | 	#print "Done in {}".format(time.clock() - start_time)
379 | 	
380 | 
381 | 	while (1):
382 | 		try:
383 | 			extract_tfrecord(record_iterator.next())
384 | 			
385 | 		except Exception as e:
386 | 			print e
387 | 			break
388 | 	writer_pos.close()
389 | 	writer_neg.close()
390 | 
391 | 
392 | 	
393 | 	
394 | 
395 | 
396 | 


--------------------------------------------------------------------------------
/new_files/split_tf_record.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import dlib
  4 | import cv2
  5 | 
  6 | tf_record_file = 'aflw_train.tfrecords'
  7 | 
  8 | def calc_2D_IOU(bb1,bb2):
  9 | 	top_left_x1 = bb1[0]
 10 | 	top_left_y1 = bb1[1]
 11 | 	bottom_right_x1 = bb1[2]
 12 | 	bottom_right_y1 = bb1[3]
 13 | 
 14 | 	top_left_x2 = bb2[0]
 15 | 	top_left_y2 = bb2[1]
 16 | 	bottom_right_x2 = bb2[2]
 17 | 	bottom_right_y2 = bb2[3]
 18 | 
 19 | 	intersect_top_left_x = max(bb1[0],bb2[0])
 20 | 	intersect_top_left_y = max(bb1[1],bb2[1])
 21 | 	intersect_bottom_right_x = max(min(bb1[2],bb2[2]),intersect_top_left_x)
 22 | 	intersect_bottom_right_y = max(min(bb1[3],bb2[3]),intersect_top_left_y)
 23 | 
 24 | 	intersect_area = (intersect_bottom_right_x-intersect_top_left_x+1)*(intersect_bottom_right_y-intersect_top_left_y+1)
 25 | 	total_area = (bottom_right_x1-top_left_x1+1)*(bottom_right_y1-top_left_y1+1) + (bottom_right_x2-top_left_x2+1)*(bottom_right_y2-top_left_y2+1) - intersect_area
 26 | 	iou = float(intersect_area)/float(total_area+0.0)
 27 | 	return iou
 28 | 
 29 | def perform_scale_down(image,max_size_allowed):
 30 | 
 31 | 	orig_h = image.shape[0]
 32 | 	orig_w = image.shape[1]
 33 | 
 34 | 	new_h = orig_h
 35 | 	new_w = orig_w
 36 | 
 37 | 	if new_h > max_size_allowed(1):
 38 | 		new_w = (new_w*max_size_allowed(1)) / (new_h+0.0)
 39 | 		new_h = max_size_allowed(1)
 40 | 
 41 | 	if new_w > max_size_allowed(0):
 42 | 		new_h = (new_h*max_size_allowed(0)) / (new_w+0.0)
 43 | 		new_w = max_size_allowed(0)
 44 | 
 45 | 	if new_h != orig_h or new_w != orig_w:
 46 | 		return cv2.resize(image, (int(new_w), int(new_h)))
 47 | 	else:
 48 | 		return image
 49 | 
 50 | 
 51 | def perform_selective_search(img,w,h,ground_truth):
 52 | 	rects=[]
 53 | 	max_size=(500,500)
 54 | 	img  = perform_scale_down(img,max_size)
 55 | 	
 56 | 	dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=2200)
 57 | 	filter_positive_rects=[]
 58 | 	filter_negative_rects=[]
 59 | 	
 60 | 	for rect in rects:
 61 | 		iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom()))
 62 | 		
 63 | 		if DEBUG_FLAG:
 64 | 			debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()])
 65 | 		if  iou > 0.5:
 66 | 			filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w])
 67 | 		elif iou < 0.35:
 68 | 			filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w])
 69 | 
 70 | 	return np.asarray(filter_positive_rects),np.asarray(filter_negative_rects)
 71 | 
 72 | def split_(filename_queue):
 73 | 	reader = tf.TFRecordReader()
 74 | 	_, serialized_example = reader.read(filename_queue)
 75 | 	
 76 | 	features = tf.parse_single_example(
 77 | 		serialized_example,
 78 | 		features={
 79 | 			'image_raw':tf.FixedLenFeature([], tf.string),
 80 | 			'width': tf.FixedLenFeature([], tf.int64),
 81 | 			'height': tf.FixedLenFeature([], tf.int64),
 82 | 			'batch_size':tf.FixedLenFeature([], tf.int64)
 83 | 			# 'roll':tf.FixedLenFeature([], tf.float32),
 84 | 			# 'pitch':tf.FixedLenFeature([], tf.float32),
 85 | 			# 'yaw':tf.FixedLenFeature([], tf.float32),
 86 | 			# 'gender':tf.FixedLenFeature([], tf.int64),
 87 | 			# 'roll':tf.FixedLenFeature([], tf.float32),
 88 | 			# 'roll':tf.FixedLenFeature([], tf.float32),
 89 | 			# 'landmarks':tf.FixedLenFeature([], tf.string),
 90 | 			# 'locations':tf.FixedLenFeature([], tf.string)
 91 | 		})
 92 | 	
 93 | 	image = tf.decode_raw(features['image_raw'], tf.uint8)
 94 | 	# locations = tf.decode_raw(features['locations'], tf.float32)
 95 | 	# landmarks = tf.decode_raw(features['landmarks'], tf.float32)
 96 | 	
 97 | 	batch_size = tf.cast(features['batch_size'], tf.int32)
 98 | 	orig_height = tf.cast(features['height'], tf.int32)
 99 | 	orig_width = tf.cast(features['width'], tf.int32)
100 | 	
101 | 	image_shape = tf.pack([batch_size,227,227,3])
102 | 
103 | 	image_tf = tf.reshape(image,image_shape)
104 | 	
105 | 	#resized_image = tf.image.resize_image_with_crop_or_pad(image_tf,target_height=500,target_width=500)
106 | 
107 | 	# image_shape = tf.pack([height, width, 3])
108 | 	# image = tf.reshape(image, image_shape)
109 | 	# boxes,box_ind = perform_selective_search(,tf.cast(width,tf.float32),tf.cast(height,tf.float32),(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h))
110 | 
111 | 	# resized_and_cropped_image = tf.image.crop_and_resize(image, boxes, box_ind, crop_size=[227,227])
112 | 	
113 | 	images = tf.train.shuffle_batch([image_tf],enqueue_many=True,batch_size=32,num_threads=1,capacity=50000,min_after_dequeue=10000)
114 | 	
115 | 	return images
116 | def split_spn(filename_queue):
117 | 	reader = tf.TFRecordReader()
118 | 	_, serialized_example = reader.read(filename_queue)
119 | 	
120 | 	features = tf.parse_single_example(
121 | 		serialized_example,
122 | 		features={
123 | 			'image_raw':tf.FixedLenFeature([], tf.string),
124 | 			'width': tf.FixedLenFeature([], tf.int64),
125 | 			'height': tf.FixedLenFeature([], tf.int64),
126 | 			'loc_x': tf.FixedLenFeature([], tf.int64),
127 | 			'loc_y': tf.FixedLenFeature([], tf.int64),
128 | 			'loc_w': tf.FixedLenFeature([], tf.int64),
129 | 			'loc_h': tf.FixedLenFeature([], tf.int64)
130 | 		})
131 | 	
132 | 	image = tf.decode_raw(features['image_raw'], tf.uint8)
133 | 	
134 | 	height = tf.cast(features['height'], tf.int32)
135 | 	width = tf.cast(features['width'], tf.int32)
136 | 	loc_x = tf.cast(features['loc_x'], tf.float32)
137 | 	loc_y = tf.cast(features['loc_y'], tf.float32)
138 | 	loc_w = tf.cast(features['loc_w'], tf.float32)
139 | 	loc_h = tf.cast(features['loc_h'], tf.float32)
140 | 
141 | 	image_shape = tf.pack([height, width, 3])
142 | 	image_1 = tf.reshape(image, image_shape)
143 | 	image_shape = tf.pack([1,height, width, 3])
144 | 	image_2 = tf.cast(tf.reshape(image, image_shape),tf.float32)
145 | 	height = tf.cast(features['height'], tf.float32)
146 | 	width = tf.cast(features['width'], tf.float32)
147 | 	crop_index = tf.pack([[tf.divide(loc_y,height),tf.divide(loc_x,width),tf.divide(loc_y+loc_h,height),tf.divide(loc_w+loc_x,width)]])
148 | 	#boxes,box_ind = perform_selective_search(,tf.cast(width,tf.float32),tf.cast(height,tf.float32),(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h))
149 | 	
150 | 	resized_image = tf.image.resize_image_with_crop_or_pad(image=image_1,target_height=500,target_width=500)
151 | 	resized_and_cropped_image = tf.image.crop_and_resize(image_2,crop_index,[0]*1,crop_size=[227,227])
152 | 	orig_images,cropped_images = tf.train.shuffle_batch([resized_image,resized_and_cropped_image],batch_size=10,num_threads=1,capacity=50,min_after_dequeue=10)
153 | 	
154 | 	return orig_images,cropped_images
155 | filename_queue = tf.train.string_input_producer([tf_record_file], num_epochs=1)
156 | 
157 | ip1,ip2 = split_spn(filename_queue)
158 | 
159 | init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())
160 | 
161 | print "Model Done"
162 | with tf.Session() as sess:
163 | 	sess.run(init_op)
164 | 	coord = tf.train.Coordinator()
165 | 	threads = tf.train.start_queue_runners(coord=coord)
166 | 	op = sess.run([ip1,ip2])
167 | 	
168 | 	output = np.asarray(op[0])
169 | 	#print output.shape 
170 | 	for i in range(output.shape[0]):
171 | 		cv2.imshow('result',output[i,:,:,:]/255.0)
172 | 		cv2.waitKey(0)
173 | 		break
174 | 	output = np.asarray(op[1]) 
175 | 	#print output.shape
176 | 	for i in range(output.shape[0]):
177 | 		cv2.imshow('result',output[i,0,:,:,:]/255.0)
178 | 		cv2.waitKey(0)
179 | 		break
180 | 	coord.request_stop()
181 | 	coord.join(threads)


--------------------------------------------------------------------------------
/new_files/vis.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from pdb import set_trace as brk
 3 | def vis_results(img,res_dict):
 4 | 		
 5 | 	for i in range(len(res_dict['location'])):
 6 | 		cv2.rectangle(img,(int(res_dict['location'][i][0]),int(res_dict['location'][i][1])),(int(res_dict['location'][i][2]),
 7 | 			int(res_dict['location'][i][3])),(0,255,0),2)
 8 | 		for j in range(res_dict['landmarks'][i].shape[0]):
 9 | 			print (int(res_dict['landmarks'][i][j,0]),int(res_dict['landmarks'][i][j,1]))
10 | 			cv2.circle(img,(int(res_dict['landmarks'][i][j,0]),int(res_dict['landmarks'][i][j,1])), 1, (0,0,255), 2)
11 | 		#Write M for male, F for Female
12 | 		center_x = int(int(res_dict['location'][i][0]) + (int(res_dict['location'][i][2]) - int(res_dict['location'][i][0])))
13 | 		center_y = int(int(res_dict['location'][i][1]) + (int(res_dict['location'][i][3]) - int(res_dict['location'][i][1])))
14 | 				
15 | 		if res_dict['gender'][i][0] < 0.5:
16 | 			#cv2.putText(img,'M',(center_x,center_y), cv2.FONT_HERSHEY_SIMPLEX, 1,(153,0,76),2,cv2.LINE_AA)
17 | 			cv2.putText(img,'M',(center_x,center_y), cv2.FONT_HERSHEY_SIMPLEX, 1,(127,0,255),2,cv2.LINE_AA)
18 | 		elif res_dict['gender'][i][0] >= 0.5:
19 | 			cv2.putText(img,'F',(center_x,center_y), cv2.FONT_HERSHEY_SIMPLEX, 1,(127,0,255),2,cv2.LINE_AA)
20 | 		
21 | 	cv2.imshow('result',img/255.0)
22 | 	cv2.waitKey(0)


--------------------------------------------------------------------------------
/results_analysis.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | np.set_printoptions(linewidth=200)
 3 | from sklearn.metrics import roc_curve, precision_recall_curve
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | 
 7 | def softmax(x):
 8 |     return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
 9 | 
10 | 
11 | pred = np.load('test_results.npy')
12 | truth = np.load('truth.npy')
13 | 
14 | print pred.shape, truth.shape
15 | 
16 | 
17 | prob = softmax(pred[:,:2])
18 | # print prob.sum(axis = 1)
19 | print pred
20 | 
21 | 
22 | print 'Detection accuracy: ', np.sum(np.argmax(prob, axis = 1) == truth[:,0].astype(np.bool))/float(prob.shape[0])
23 | 
24 | print 'Gender accuracy: ', np.sum(np.argmax(softmax(pred[:,68:70]), axis = 1) == truth[:,-1].astype(np.bool))/float(prob.shape[0])
25 | 
26 | # detection
27 | fpr, tpr, thresholds = roc_curve(truth[:,0], prob[:,1])
28 | precision, recall, th = precision_recall_curve(truth[:,0], prob[:,1])
29 | 
30 | # plt.plot(recall, precision)
31 | # plt.xlabel('Recall')
32 | # plt.ylabel('Precision')
33 | # # plt.savefig('detection_pr_re.eps', format='eps', dpi=1000)
34 | # plt.show()
35 | 
36 | 
37 | # plt.plot(fpr, tpr)
38 | # plt.xlabel('False positive rate')
39 | # plt.ylabel('True positive rate')
40 | # # plt.savefig('detection_roc.eps', format='eps', dpi=1000)
41 | # plt.show()
42 | 
43 | 


--------------------------------------------------------------------------------
/selective_search.py:
--------------------------------------------------------------------------------
  1 | #!/home/shashank/anaconda2/bin
  2 | import dlib
  3 | from skimage import io
  4 | import cv2
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import csv
  8 | from multiprocessing import Pool
  9 | from multiprocessing.dummy import Pool as ThreadPool
 10 | 
 11 | pool = ThreadPool(4)
 12 | DEBUG_FLAG = False
 13 | tfrecords_full_filename = 'aflw.tfrecords'
 14 | tfrecords_training_filename = 'aflw_training.tfrecords'
 15 | writer = tf.python_io.TFRecordWriter(tfrecords_training_filename)
 16 | 
 17 | if DEBUG_FLAG:
 18 | 	debug_fp = open('debug.csv','wb')
 19 | 	debug_fp_csv = csv.writer(debug_fp)
 20 | 
 21 | def calc_2D_IOU(bb1,bb2):
 22 | 	top_left_x1 = bb1[0]
 23 | 	top_left_y1 = bb1[1]
 24 | 	bottom_right_x1 = bb1[2]
 25 | 	bottom_right_y1 = bb1[3]
 26 | 
 27 | 	top_left_x2 = bb2[0]
 28 | 	top_left_y2 = bb2[1]
 29 | 	bottom_right_x2 = bb2[2]
 30 | 	bottom_right_y2 = bb2[3]
 31 | 
 32 | 	intersect_top_left_x = max(bb1[0],bb2[0])
 33 | 	intersect_top_left_y = max(bb1[1],bb2[1])
 34 | 	intersect_bottom_right_x = min(bb1[2],bb2[2])
 35 | 	intersect_bottom_right_y = min(bb1[3],bb2[3])
 36 | 
 37 | 	intersect_area = (intersect_bottom_right_x-intersect_top_left_x+1)*(intersect_bottom_right_y-intersect_top_left_y+1)
 38 | 	total_area = (bottom_right_x1-top_left_x1+1)*(bottom_right_y1-top_left_y1+1) + (bottom_right_x2-top_left_x2+1)*(bottom_right_y2-top_left_y2+1) - intersect_area
 39 | 	iou = float(intersect_area)/float(total_area+0.0)
 40 | 	return iou
 41 | 
 42 | 
 43 | def perform_selective_search(img,ground_truth):
 44 | 	rects=[]
 45 | 	dlib.find_candidate_object_locations(img, rects, min_size=500)
 46 | 	filter_positive_rects=[]
 47 | 	filter_negative_rects=[]
 48 | 	
 49 | 	for rect in rects:
 50 | 		iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom()))
 51 | 		
 52 | 		if DEBUG_FLAG:
 53 | 			debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()])
 54 | 		if  iou > 0.5:
 55 | 			filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w])
 56 | 		elif iou < 0.35:
 57 | 			filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w])
 58 | 
 59 | 	return filter_positive_rects,filter_negative_rects
 60 | 
 61 | def visualise(img,rects):
 62 | 	cv2.namedWindow('result', cv2.WINDOW_NORMAL)
 63 | 	cv2.resizeWindow('result', 600,600)
 64 | 	for rect in rects:
 65 | 		cv2.rectangle(img,(rect.left(),rect.top()),(rect.right(),rect.bottom()),(0,255,0),2)
 66 | 	
 67 | 	cv2.imshow('result',img)
 68 | 	cv2.waitKey(0)
 69 | 
 70 | 
 71 | def extract_tfrecord():
 72 | 	record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_full_filename)
 73 | 	
 74 | 	for string_record in record_iterator:
 75 | 		example = tf.train.Example()
 76 | 		example.ParseFromString(string_record)
 77 | 
 78 | 		img_string = example.features.feature['image_raw'].bytes_list.value[0]
 79 | 		img_width = int(example.features.feature['width'].int64_list.value[0])
 80 | 		img_height = int(example.features.feature['height'].int64_list.value[0])
 81 | 		img_2d = np.fromstring(img_string, dtype=np.uint8).reshape(img_height,img_width,3)
 82 | 		loc_x = int(example.features.feature['loc_x'].int64_list.value[0])
 83 | 		loc_y = int(example.features.feature['loc_y'].int64_list.value[0])
 84 | 		loc_w = int(example.features.feature['loc_w'].int64_list.value[0])
 85 | 		loc_h = int(example.features.feature['loc_h'].int64_list.value[0])
 86 | 		hard_postives,hard_negatives = perform_selective_search(img_2d,(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h))
 87 | 
 88 | 		resized_and_cropped_image = tf.image.crop_and_resize(img_2d[np.newaxis,:], boxes, [0]*hard_postives.shape[0], crop_size=[227,227])
 89 | 		break				
 90 | 		#visualise(img_2d,hard_postives)
 91 | 		break
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 | 	extract_tfrecord()
 96 | 
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/split_tf_record.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import dlib
  4 | from pdb import set_trace as brk
  5 | 
  6 | tf_record_file = 'aflw_train.tfrecords'
  7 | 
  8 | def calc_2D_IOU(bb1,bb2):
  9 | 	top_left_x1 = bb1[0]
 10 | 	top_left_y1 = bb1[1]
 11 | 	bottom_right_x1 = bb1[2]
 12 | 	bottom_right_y1 = bb1[3]
 13 | 
 14 | 	top_left_x2 = bb2[0]
 15 | 	top_left_y2 = bb2[1]
 16 | 	bottom_right_x2 = bb2[2]
 17 | 	bottom_right_y2 = bb2[3]
 18 | 
 19 | 	intersect_top_left_x = max(bb1[0],bb2[0])
 20 | 	intersect_top_left_y = max(bb1[1],bb2[1])
 21 | 	intersect_bottom_right_x = min(bb1[2],bb2[2])
 22 | 	intersect_bottom_right_y = min(bb1[3],bb2[3])
 23 | 
 24 | 	intersect_area = (intersect_bottom_right_x-intersect_top_left_x+1)*(intersect_bottom_right_y-intersect_top_left_y+1)
 25 | 	total_area = (bottom_right_x1-top_left_x1+1)*(bottom_right_y1-top_left_y1+1) + (bottom_right_x2-top_left_x2+1)*(bottom_right_y2-top_left_y2+1) - intersect_area
 26 | 	iou = float(intersect_area)/float(total_area+0.0)
 27 | 	return iou
 28 | 
 29 | 
 30 | def perform_selective_search(img,w,h,ground_truth):
 31 | 	print "Came:"
 32 | 	rects=[]
 33 | 	dlib.find_candidate_object_locations(img, rects, min_size=500)
 34 | 	filter_positive_rects=[]
 35 | 	filter_negative_rects=[]
 36 | 	
 37 | 	for rect in rects:
 38 | 		iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom()))
 39 | 		
 40 | 		if DEBUG_FLAG:
 41 | 			debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()])
 42 | 		if  iou > 0.5:
 43 | 			filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w])
 44 | 		elif iou < 0.35:
 45 | 			filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w])
 46 | 
 47 | 	return np.asarray(filter_positive_rects),np.asarray(filter_negative_rects)
 48 | 
 49 | def split_(filename_queue, sess):
 50 | 	brk()
 51 | 	reader = tf.TFRecordReader()
 52 | 	_, serialized_example = reader.read(filename_queue)
 53 | 	
 54 | 	features = tf.parse_single_example(
 55 | 		serialized_example,
 56 | 		features={
 57 | 			'image_raw':tf.FixedLenFeature([], tf.string),
 58 | 			'width': tf.FixedLenFeature([], tf.int64),
 59 | 			'height': tf.FixedLenFeature([], tf.int64),
 60 | 			'loc_x': tf.FixedLenFeature([], tf.int64),
 61 | 			'loc_y': tf.FixedLenFeature([], tf.int64),
 62 | 			'loc_w': tf.FixedLenFeature([], tf.int64),
 63 | 			'loc_h': tf.FixedLenFeature([], tf.int64)
 64 | 		})
 65 | 	
 66 | 	image = tf.decode_raw(features['image_raw'], tf.uint8)
 67 | 	
 68 | 	height = tf.cast(features['height'], tf.int32)
 69 | 	width = tf.cast(features['width'], tf.int32)
 70 | 	loc_x = tf.cast(features['loc_x'], tf.int32)
 71 | 	loc_y = tf.cast(features['loc_y'], tf.int32)
 72 | 	loc_w = tf.cast(features['loc_w'], tf.int32)
 73 | 	loc_h = tf.cast(features['loc_h'], tf.int32)
 74 | 
 75 | 	image_shape = tf.pack([height, width, 3])
 76 | 	image = tf.reshape(image, image_shape)
 77 | 	height,width,loc_x,loc_y,loc_h,loc_w = sess.run([height,width,loc_x,loc_y,loc_h,loc_w])
 78 | 	# boxes,box_ind = perform_selective_search(,tf.cast(width,tf.float32),tf.cast(height,tf.float32),(loc_x,loc_y,loc_x+loc_w,loc_y+loc_h))
 79 | 	boxes = np.asarray([[loc_y/float(height),loc_x/float(width),(loc_y+loc_h)/float(height),(loc_x+loc_w)/float(width)]])
 80 | 	resized_and_cropped_image = tf.image.crop_and_resize(image.astype(np.float32), boxes.astype(np.float32), [0]*1, crop_size=[227,227])
 81 | 	
 82 | 
 83 | 	images = tf.train.shuffle_batch([resized_and_cropped_image],batch_size=10,num_threads=1,capacity=50,min_after_dequeue=10)
 84 | 	
 85 | 	return images
 86 | 	
 87 | filename_queue = tf.train.string_input_producer([tf_record_file], num_epochs=1)
 88 | 
 89 | 
 90 | 
 91 | init_op = tf.group(tf.global_variables_initializer(),tf.local_variables_initializer())
 92 | 
 93 | print "model done"
 94 | 
 95 | with tf.Session() as sess:
 96 | 	
 97 | 	sess.run(init_op)
 98 | 	images = split_(filename_queue, sess)
 99 | 	
100 | 	coord = tf.train.Coordinator()
101 | 	threads = tf.train.start_queue_runners(coord=coord)
102 | 	op_images = sess.run([images])
103 | 	print np.asarray(op_images).shape
104 | 	
105 | 	coord.request_stop()
106 | 	coord.join(threads)


--------------------------------------------------------------------------------
/test_results.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/test_results.npy


--------------------------------------------------------------------------------
/truth.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/truth.npy


--------------------------------------------------------------------------------
/version_0.0.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/version_0.0.txt


--------------------------------------------------------------------------------
/with SPN/logs/events.out.tfevents.1494397553.shashanks-mbp.dynamic.ucsd.edu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/with SPN/logs/events.out.tfevents.1494397553.shashanks-mbp.dynamic.ucsd.edu


--------------------------------------------------------------------------------
/with SPN/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import os
 3 | from model import *
 4 | 
 5 | weights_path = '/Users/shashank/Tensorflow/SPN/weights/'
 6 | imgs_path = '/Users/shashank/Tensorflow/CSE252C-Hyperface/git/truth_data.npy'
 7 | tf_record_file_path = '../aflw_train.tfrecords'
 8 | if not os.path.exists('./logs'):
 9 | 	os.makedirs('./logs')
10 | 
11 | map(os.unlink, (os.path.join( './logs',f) for f in os.listdir('./logs')) )
12 | 
13 | 
14 | 
15 | with tf.Session() as sess:
16 | 		print 'Building Graph...'
17 | 		model = Network(sess,tf_record_file_path)
18 | 		print 'Done!\nInitializing variables...'
19 | 		sess.run(tf.global_variables_initializer())
20 | 		print 'Done!'
21 | 		model.train()


--------------------------------------------------------------------------------
/with SPN/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib.slim as slim
  3 | import numpy as np
  4 | from spatial_transformer import transformer
  5 | from tqdm import tqdm
  6 | from pdb import set_trace as brk
  7 | import time
  8 | 
  9 | class Network(object):
 10 | 
 11 | 	def __init__(self, sess,tf_record_file_path=None):
 12 | 
 13 | 		self.sess = sess
 14 | 		self.batch_size = 2
 15 | 		self.img_height = 500
 16 | 		self.img_width = 500
 17 | 		self.out_height = 227
 18 | 		self.out_width = 227
 19 | 		self.channel = 3
 20 | 
 21 | 		self.num_epochs = 10
 22 | 
 23 | 		# Hyperparameters
 24 | 		self.weight_detect = 1
 25 | 		self.weight_landmarks = 5
 26 | 		self.weight_visibility = 0.5
 27 | 		self.weight_pose = 5
 28 | 		self.weight_gender = 2
 29 | 
 30 | 		#tf_Record Paramters
 31 | 		self.filename_queue = tf.train.string_input_producer([tf_record_file_path], num_epochs=self.num_epochs)
 32 | 		self.build_network()
 33 | 
 34 | 
 35 | 	def build_network(self):
 36 | 
 37 | 		self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images')
 38 | 		self.detection = tf.placeholder(tf.float32, [self.batch_size,2], name='detection')
 39 | 		self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks')
 40 | 		self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility')
 41 | 		self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose')
 42 | 		self.gender = tf.placeholder(tf.float32, [self.batch_size,2], name='gender')
 43 | 
 44 | 
 45 | 		theta = self.localization_squeezenet(self.X)
 46 | 		self.T_mat = tf.reshape(theta, [-1, 2,3])
 47 | 		self.cropped = transformer(self.X, self.T_mat, [self.out_height, self.out_width])
 48 | 
 49 | 		net_output = self.hyperface(self.cropped) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender)
 50 | 
 51 | 
 52 | 		loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[0], self.detection))
 53 | 		
 54 | 		visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1])
 55 | 		loss_landmarks = tf.reduce_mean(tf.square(visibility_mask*(net_output[1] - self.landmarks)))
 56 | 		
 57 | 		loss_visibility = tf.reduce_mean(tf.square(net_output[2] - self.visibility))
 58 | 		loss_pose = tf.reduce_mean(tf.square(net_output[3] - self.pose))
 59 | 		loss_gender = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[4], self.gender))
 60 | 
 61 | 		self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks  \
 62 | 					+ self.weight_visibility*loss_visibility + self.weight_pose*loss_pose  \
 63 | 					+ self.weight_gender*loss_gender
 64 | 
 65 | 
 66 | 
 67 | 	def get_transformation_matrix(self, theta):
 68 | 		with tf.name_scope('T_matrix'):
 69 | 			theta = tf.expand_dims(theta, 2)
 70 | 			mat = tf.constant(np.repeat(np.array([[[1,0,0],[0,0,0],[0,1,0],[0,0,0],[0,1,0],[0,0,1]]]),
 71 | 										 self.batch_size, axis=0), dtype=tf.float32)
 72 | 			tr_matrix = tf.squeeze(tf.matmul(mat, theta))
 73 | 
 74 | 		return tr_matrix
 75 | 
 76 | 
 77 | 
 78 | 	def train(self):
 79 | 		
 80 | 		optimizer = tf.train.AdamOptimizer().minimize(self.loss)
 81 | 
 82 | 		writer = tf.summary.FileWriter('./logs', self.sess.graph)
 83 | 		loss_summ = tf.summary.scalar('loss', self.loss)
 84 | 		img_summ = tf.summary.image('cropped_image', self.cropped)
 85 | 
 86 | 
 87 | 		tic = time.time()
 88 | 		print self.sess.run(self.T_mat, feed_dict={self.X: np.random.randn(self.batch_size, self.img_height, self.img_width, self.channel)})
 89 | 		toc = time.time()
 90 | 		print toc-tic
 91 | 		images = self.load_from_tfRecord(self.filename_queue)
 92 | 
 93 | 		coord = tf.train.Coordinator()
 94 | 		threads = tf.train.start_queue_runners(sess = self.sess, coord = coord)
 95 | 
 96 | 		for i in xrange(2):
 97 | 			img_batch = self.sess.run(images)
 98 | 			print img_batch.shape
 99 | 
100 | 
101 | 	def hyperface(self,inputs, reuse = False):
102 | 
103 | 		if reuse:
104 | 			tf.get_variable_scope().reuse_variables()
105 | 
106 | 		with slim.arg_scope([slim.conv2d, slim.fully_connected],
107 | 							 activation_fn = tf.nn.relu,
108 | 							 weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ):
109 | 			
110 | 			conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1')
111 | 			max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1')
112 | 
113 | 			conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a')
114 | 
115 | 			conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2')
116 | 			max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2')
117 | 			conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3')
118 | 
119 | 			conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a')
120 | 
121 | 			conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4')
122 | 			conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5')
123 | 			pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5')
124 | 
125 | 			concat_feat = tf.concat(3, [conv1a, conv3a, pool5])
126 | 			conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all')
127 | 			
128 | 			shape = int(np.prod(conv_all.get_shape()[1:]))
129 | 			fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full')
130 | 
131 | 			fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1')
132 | 			fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1')
133 | 			fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1')
134 | 			fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1')
135 | 			fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1')
136 | 
137 | 			out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn = None)
138 | 			out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn = None)
139 | 			out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn = None)
140 | 			out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn = None)
141 | 			out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn = None)
142 | 
143 | 		return [tf.nn.softmax(out_detection), out_landmarks, out_visibility, out_pose, tf.nn.softmax(out_gender)]
144 | 
145 | 
146 | 
147 | 	def localization_VGG16(self,inputs):
148 | 
149 | 		with tf.variable_scope('localization_network'):
150 | 			with slim.arg_scope([slim.conv2d, slim.fully_connected],
151 | 								 activation_fn = tf.nn.relu,
152 | 								 weights_initializer = tf.constant_initializer(0.0)):
153 | 				
154 | 				net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
155 | 				net = slim.max_pool2d(net, [2, 2], scope='pool1')
156 | 				net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
157 | 				net = slim.max_pool2d(net, [2, 2], scope='pool2')
158 | 				net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
159 | 				net = slim.max_pool2d(net, [2, 2], scope='pool3')
160 | 				net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
161 | 				net = slim.max_pool2d(net, [2, 2], scope='pool4')
162 | 				net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
163 | 				net = slim.max_pool2d(net, [2, 2], scope='pool5')
164 | 				shape = int(np.prod(net.get_shape()[1:]))
165 | 
166 | 				net = slim.fully_connected(tf.reshape(net, [-1, shape]), 4096, scope='fc6')
167 | 				net = slim.fully_connected(net, 1024, scope='fc7')
168 | 				identity = np.array([[1., 0., 0.],
169 | 									[0., 1., 0.]])
170 | 				identity = identity.flatten()
171 | 				net = slim.fully_connected(net, 6, biases_initializer = tf.constant_initializer(identity) , scope='fc8')
172 | 			
173 | 		return net
174 | 
175 | 
176 | 	def localization_squeezenet(self, inputs):
177 | 
178 | 		with tf.variable_scope('localization_network'):	
179 | 			with slim.arg_scope([slim.conv2d], activation_fn = tf.nn.relu,
180 | 									padding = 'SAME',
181 | 									weights_initializer = tf.constant_initializer(0.0)):
182 | 
183 | 				conv1 = slim.conv2d(inputs, 64, [3,3], 2, padding = 'VALID', scope='conv1')
184 | 				pool1 = slim.max_pool2d(conv1, [2,2], 2, scope='pool1')
185 | 				fire2 = self.fire_module(pool1, 16, 64, scope = 'fire2')
186 | 				fire3 = self.fire_module(fire2, 16, 64, scope = 'fire3', res_connection=True)
187 | 				fire4 = self.fire_module(fire3, 32, 128, scope = 'fire4')
188 | 				pool4 = slim.max_pool2d(fire4, [2,2], 2, scope='pool4')
189 | 				fire5 = self.fire_module(pool4, 32, 128, scope = 'fire5', res_connection=True)
190 | 				fire6 = self.fire_module(fire5, 48, 192, scope = 'fire6')
191 | 				fire7 = self.fire_module(fire6, 48, 192, scope = 'fire7', res_connection=True)
192 | 				fire8 = self.fire_module(fire7, 64, 256, scope = 'fire8')
193 | 				pool8 = slim.max_pool2d(fire8, [2,2], 2, scope='pool8')
194 | 				fire9 = self.fire_module(pool8, 64, 256, scope = 'fire9', res_connection=True)
195 | 				conv10 = slim.conv2d(fire9, 128, [1,1], 1, scope='conv10')
196 | 				shape = int(np.prod(conv10.get_shape()[1:]))
197 | 				identity = np.array([[1., 0., 0.],
198 | 									[0., 1., 0.]])
199 | 				identity = identity.flatten()
200 | 				fc11 = slim.fully_connected(tf.reshape(conv10, [-1, shape]), 6, biases_initializer = tf.constant_initializer(identity), scope='fc11')
201 | 		return fc11
202 | 
203 | 
204 | 	def fire_module(self, inputs, s_channels, e_channels, scope, res_connection = False):
205 | 		with tf.variable_scope(scope):
206 | 			sq = self.squeeze(inputs, s_channels, 'squeeze')
207 | 			ex = self.expand(sq, e_channels, 'expand')
208 | 			if res_connection:
209 | 				ret = tf.nn.relu(tf.add(inputs,ex))
210 | 			else:
211 | 				ret = tf.nn.relu(ex)
212 | 		return ret
213 | 
214 | 
215 | 	def squeeze(self, inputs, channels, scope):
216 | 		with slim.arg_scope([slim.conv2d], activation_fn = None,
217 | 							padding = 'SAME',
218 | 							weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)):
219 | 			sq = slim.conv2d(inputs, channels, [1,1], 1, scope = scope)
220 | 		return sq
221 | 
222 | 	def expand(self, inputs, channels, scope):
223 | 		with slim.arg_scope([slim.conv2d], activation_fn = None,
224 | 							padding = 'SAME',
225 | 							weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)):
226 | 			with tf.variable_scope(scope):
227 | 				e1x1 = slim.conv2d(inputs, channels, [1,1], 1, scope='e1x1')
228 | 				e3x3 = slim.conv2d(inputs, channels, [3,3], 1, scope='e3x3')
229 | 				expand = tf.concat(3, [e1x1, e3x3])
230 | 		
231 | 		return expand
232 | 
233 | 
234 | 
235 | 	def predict(self, imgs_path):
236 | 		print 'Running inference...'
237 | 		np.set_printoptions(suppress=True)
238 | 		imgs = (np.load(imgs_path) - 127.5)/128.0
239 | 		shape = imgs.shape
240 | 		self.X = tf.placeholder(tf.float32, [shape[0], self.img_height, self.img_width, self.channel], name='images')
241 | 		pred = self.network(self.X, reuse = True)
242 | 
243 | 		net_preds = self.sess.run(pred, feed_dict={self.X: imgs})
244 | 
245 | 		print net_preds[-1]
246 | 		import matplotlib.pyplot as plt
247 | 		plt.imshow(imgs[-1]);plt.show()
248 | 
249 | 		brk()
250 | 
251 | 	def load_from_tfRecord(self,filename_queue,resize_size=None):
252 | 		
253 | 		reader = tf.TFRecordReader()
254 | 		_, serialized_example = reader.read(filename_queue)
255 | 		
256 | 		features = tf.parse_single_example(
257 | 			serialized_example,
258 | 			features={
259 | 				'image_raw':tf.FixedLenFeature([], tf.string),
260 | 				'width': tf.FixedLenFeature([], tf.int64),
261 | 				'height': tf.FixedLenFeature([], tf.int64)
262 | 			})
263 | 		
264 | 		image = tf.decode_raw(features['image_raw'], tf.float32)
265 | 		orig_height = tf.cast(features['height'], tf.int32)
266 | 		orig_width = tf.cast(features['width'], tf.int32)
267 | 		
268 | 		image_shape = tf.pack([orig_height,orig_width,3])
269 | 		image_tf = tf.reshape(image,image_shape)
270 | 		print image_shape
271 | 		resized_image = tf.image.resize_image_with_crop_or_pad(image_tf,target_height=resize_size[1],target_width=resize_size[0])
272 | 		
273 | 		images = tf.train.shuffle_batch([resized_image],batch_size=self.batch_size,num_threads=1,capacity=50,min_after_dequeue=10)
274 | 		
275 | 		return images
276 | 
277 | 	
278 | 
279 | 	def load_weights(self, path):
280 | 		variables = slim.get_model_variables()
281 | 		print 'Loading weights...'
282 | 		for var in tqdm(variables):
283 | 			if ('conv' in var.name) and ('weights' in var.name):
284 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').transpose((2,3,1,0))))
285 | 			elif ('fc' in var.name) and ('weights' in var.name):
286 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').T))
287 | 			elif 'biases' in var.name:
288 | 				self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/b.npy')))
289 | 		print 'Weights loaded!!'
290 | 
291 | 	def print_variables(self):
292 | 		variables = slim.get_model_variables()
293 | 		print 'Model Variables:\n'
294 | 		for var in variables:
295 | 			print var.name, ' ', var.get_shape()
296 | 
297 | 
298 | 			
299 | 
300 | 


--------------------------------------------------------------------------------
/with SPN/model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/with SPN/model.pyc


--------------------------------------------------------------------------------
/with SPN/spatial_transformer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | import tensorflow as tf
 16 | 
 17 | 
 18 | def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
 19 |     """Spatial Transformer Layer
 20 | 
 21 |     Implements a spatial transformer layer as described in [1]_.
 22 |     Based on [2]_ and edited by David Dao for Tensorflow.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     U : float
 27 |         The output of a convolutional net should have the
 28 |         shape [num_batch, height, width, num_channels].
 29 |     theta: float
 30 |         The output of the
 31 |         localisation network should be [num_batch, 6].
 32 |     out_size: tuple of two ints
 33 |         The size of the output of the network (height, width)
 34 | 
 35 |     References
 36 |     ----------
 37 |     .. [1]  Spatial Transformer Networks
 38 |             Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu
 39 |             Submitted on 5 Jun 2015
 40 |     .. [2]  https://github.com/skaae/transformer_network/blob/master/transformerlayer.py
 41 | 
 42 |     Notes
 43 |     -----
 44 |     To initialize the network to the identity transform init
 45 |     ``theta`` to :
 46 |         identity = np.array([[1., 0., 0.],
 47 |                              [0., 1., 0.]])
 48 |         identity = identity.flatten()
 49 |         theta = tf.Variable(initial_value=identity)
 50 | 
 51 |     """
 52 | 
 53 |     def _repeat(x, n_repeats):
 54 |         with tf.variable_scope('_repeat'):
 55 |             rep = tf.transpose(tf.expand_dims(tf.ones(shape=tf.pack([n_repeats, ])), 1), [1, 0])
 56 |             rep = tf.cast(rep, 'int32')
 57 |             x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
 58 |             return tf.reshape(x, [-1])
 59 | 
 60 |     def _interpolate(im, x, y, out_size):
 61 |         with tf.variable_scope('_interpolate'):
 62 |             # constants
 63 |             # num_batch = tf.shape(im)[0]
 64 |             # height = tf.shape(im)[1]
 65 |             # width = tf.shape(im)[2]
 66 |             # channels = tf.shape(im)[3]
 67 | 
 68 |             shape = im.get_shape()
 69 |             num_batch = shape[0]
 70 |             height = shape[1]
 71 |             width = shape[2]
 72 |             channels = shape[3]    
 73 | 
 74 | 
 75 | 
 76 | 
 77 |             x = tf.cast(x, 'float32')
 78 |             y = tf.cast(y, 'float32')
 79 |             height_f = tf.cast(height, 'float32')
 80 |             width_f = tf.cast(width, 'float32')
 81 |             out_height = out_size[0]
 82 |             out_width = out_size[1]
 83 |             zero = tf.zeros([], dtype='int32')
 84 |             max_y = tf.cast(tf.shape(im)[1] - 1, 'int32')
 85 |             max_x = tf.cast(tf.shape(im)[2] - 1, 'int32')
 86 | 
 87 |             # scale indices from [-1, 1] to [0, width/height]
 88 |             x = (x + 1.0)*(width_f) / 2.0
 89 |             y = (y + 1.0)*(height_f) / 2.0
 90 | 
 91 |             # do sampling
 92 |             x0 = tf.cast(tf.floor(x), 'int32')
 93 |             x1 = x0 + 1
 94 |             y0 = tf.cast(tf.floor(y), 'int32')
 95 |             y1 = y0 + 1
 96 | 
 97 |             x0 = tf.clip_by_value(x0, zero, max_x)
 98 |             x1 = tf.clip_by_value(x1, zero, max_x)
 99 |             y0 = tf.clip_by_value(y0, zero, max_y)
100 |             y1 = tf.clip_by_value(y1, zero, max_y)
101 |             dim2 = width
102 |             dim1 = width*height
103 |             base = _repeat(tf.range(num_batch)*dim1, out_height*out_width)
104 |             base_y0 = base + y0*dim2
105 |             base_y1 = base + y1*dim2
106 |             idx_a = base_y0 + x0
107 |             idx_b = base_y1 + x0
108 |             idx_c = base_y0 + x1
109 |             idx_d = base_y1 + x1
110 | 
111 |             # use indices to lookup pixels in the flat image and restore
112 |             # channels dim
113 |             im_flat = tf.reshape(im, tf.pack([-1, channels]))
114 |             im_flat = tf.cast(im_flat, 'float32')
115 |             Ia = tf.gather(im_flat, idx_a)
116 |             Ib = tf.gather(im_flat, idx_b)
117 |             Ic = tf.gather(im_flat, idx_c)
118 |             Id = tf.gather(im_flat, idx_d)
119 | 
120 |             # and finally calculate interpolated values
121 |             x0_f = tf.cast(x0, 'float32')
122 |             x1_f = tf.cast(x1, 'float32')
123 |             y0_f = tf.cast(y0, 'float32')
124 |             y1_f = tf.cast(y1, 'float32')
125 |             wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1)
126 |             wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1)
127 |             wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1)
128 |             wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1)
129 |             output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
130 |             return output
131 | 
132 |     def _meshgrid(height, width):
133 |         with tf.variable_scope('_meshgrid'):
134 |             # This should be equivalent to:
135 |             #  x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
136 |             #                         np.linspace(-1, 1, height))
137 |             #  ones = np.ones(np.prod(x_t.shape))
138 |             #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
139 |             x_t = tf.matmul(tf.ones(shape=tf.pack([height, 1])),
140 |                             tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
141 |             y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
142 |                             tf.ones(shape=tf.pack([1, width])))
143 | 
144 |             x_t_flat = tf.reshape(x_t, (1, -1))
145 |             y_t_flat = tf.reshape(y_t, (1, -1))
146 | 
147 |             ones = tf.ones_like(x_t_flat)
148 |             grid = tf.concat(0, [x_t_flat, y_t_flat, ones])
149 |             return grid
150 | 
151 |     def _transform(theta, input_dim, out_size):
152 |         with tf.variable_scope('_transform'):
153 |             # num_batch = tf.shape(input_dim)[0]
154 |             # height = tf.shape(input_dim)[1]
155 |             # width = tf.shape(input_dim)[2]
156 |             # num_channels = tf.shape(input_dim)[3]
157 |             
158 |             shape = input_dim.get_shape()
159 |             num_batch = shape[0]
160 |             height = shape[1]
161 |             width = shape[2]
162 |             num_channels = shape[3]
163 | 
164 | 
165 | 
166 | 
167 |             theta = tf.reshape(theta, (-1, 2, 3))
168 |             theta = tf.cast(theta, 'float32')
169 | 
170 |             # grid of (x_t, y_t, 1), eq (1) in ref [1]
171 |             height_f = tf.cast(height, 'float32')
172 |             width_f = tf.cast(width, 'float32')
173 |             out_height = out_size[0]
174 |             out_width = out_size[1]
175 |             grid = _meshgrid(out_height, out_width)
176 |             grid = tf.expand_dims(grid, 0)
177 |             grid = tf.reshape(grid, [-1])
178 |             grid = tf.tile(grid, tf.pack([num_batch]))
179 |             grid = tf.reshape(grid, tf.pack([num_batch, 3, -1]))
180 | 
181 |             # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
182 |             T_g = tf.batch_matmul(theta, grid)
183 |             x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1])
184 |             y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1])
185 |             x_s_flat = tf.reshape(x_s, [-1])
186 |             y_s_flat = tf.reshape(y_s, [-1])
187 | 
188 |             input_transformed = _interpolate(
189 |                 input_dim, x_s_flat, y_s_flat,
190 |                 out_size)
191 | 
192 |             output = tf.reshape(
193 |                 input_transformed, tf.pack([num_batch, out_height, out_width, num_channels]))
194 |             return output
195 | 
196 |     with tf.variable_scope(name):
197 |         output = _transform(theta, U, out_size)
198 |         return output
199 | 
200 | 
201 | def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer'):
202 |     """Batch Spatial Transformer Layer
203 | 
204 |     Parameters
205 |     ----------
206 | 
207 |     U : float
208 |         tensor of inputs [num_batch,height,width,num_channels]
209 |     thetas : float
210 |         a set of transformations for each input [num_batch,num_transforms,6]
211 |     out_size : int
212 |         the size of the output [out_height,out_width]
213 | 
214 |     Returns: float
215 |         Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels]
216 |     """
217 |     with tf.variable_scope(name):
218 |         num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2])
219 |         indices = [[i]*num_transforms for i in xrange(num_batch)]
220 |         input_repeated = tf.gather(U, tf.reshape(indices, [-1]))
221 |         return transformer(input_repeated, thetas, out_size)
222 | 


--------------------------------------------------------------------------------
/with SPN/spatial_transformer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shashanktyagi/HyperFace-TensorFlow-implementation/6ebc0a96fa952fbf2eb8c4eda56f92fa313fa3b9/with SPN/spatial_transformer.pyc


--------------------------------------------------------------------------------