├── README.md ├── augment.py ├── libs ├── batch_norm.py ├── tfpipeline.py └── utils.py ├── model_eval.py └── model_train.py /README.md: -------------------------------------------------------------------------------- 1 | # TF-FaceLandmarkDetection 2 | 3 | Face landmark detection using tensorflow 4 | Reproduction of the paper **Deep Convolutional Network Cascade for Facial Point Detection** 5 | 6 | ## Requirements 7 | 8 | - Python 3.4 9 | - Tensorflow 0.10.0 10 | 11 | ## Usage 12 | 13 | - git clone https://github.com/mariolew/TF-FaceLandmarkDetection/edit/master 14 | - Prepare data: You should have a text file, each line of the text file should have the format: image_path bbx_left bbx_right bbx_top bbx_bottom landmark1_x landmark1_y ... landmarki_x landmarki_y 15 | - Modify the text file path and the path to store augmented images in **augment.py** and do *python3 augment.py* 16 | - Modify some paths and params in **model_train.py** and do *python3 model_train.py* to train a face alignment model 17 | - Modify some paths and params in **model_eval.py** and do *python3 model_eval.py* to evaluate the trained model 18 | 19 | ## Note 20 | 21 | This repo is based on https://github.com/luoyetx/deep-landmark and https://github.com/pkmital/CADL and is still ongoing. 22 | 23 | ## Achievements 24 | 25 | Level1: Done 26 | 27 | Level2: TODO 28 | 29 | Level3: TODO 30 | 31 | ## References 32 | 33 | **[1]** [Deep Convolutional Network Cascade for Facial Point Detection](http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm) 34 | 35 | **[2]** [deep-landmark](https://github.com/luoyetx/deep-landmark) 36 | 37 | **[3]** [Creative Applications of Deep Learning w/ Tensorflow](https://github.com/pkmital/CADL) 38 | 39 | -------------------------------------------------------------------------------- /augment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import matplotlib.pyplot as plt 4 | import h5py 5 | 6 | TXT = 'testImageList.txt' 7 | 8 | def read_data_from_txt(TXT): 9 | with open(TXT, 'r') as fid: 10 | lines = fid.readlines() 11 | result = [] 12 | for line in lines: 13 | components = line.strip().split(' ') 14 | imgName = components[0].replace('\\', '/') 15 | bbx = map(int, components[1:5]) 16 | landmarks = map(float, components[5:]) 17 | landmarks = np.asarray(landmarks).reshape([-1, 2]) 18 | result.append([imgName, BBox(bbx), landmarks]) 19 | return result 20 | 21 | def flip(face, landmark): 22 | """ 23 | flip face 24 | """ 25 | face_flipped_by_x = cv2.flip(face, 1) 26 | landmark_ = np.asarray([(1-x, y) for (x, y) in landmark]) 27 | # Make sure that the flipped landmarks are in the right order # 28 | landmark_[[0, 1]] = landmark_[[1, 0]] 29 | landmark_[[3, 4]] = landmark_[[4, 3]] 30 | return (face_flipped_by_x, landmark_) 31 | 32 | def rotate(img, bbox, landmark, alpha): 33 | """ 34 | given a face with bbox and landmark, rotate with alpha 35 | and return rotated face with bbox, landmark (absolute position) 36 | """ 37 | center = (bbox.x+bbox.w/2, bbox.y+bbox.h/2) 38 | rot_mat = cv2.getRotationMatrix2D(center, alpha, 1) 39 | img_rotated_by_alpha = cv2.warpAffine(img, rot_mat, img.shape) 40 | landmark_ = np.asarray([(rot_mat[0][0]*x+rot_mat[0][1]*y+rot_mat[0][2], 41 | rot_mat[1][0]*x+rot_mat[1][1]*y+rot_mat[1][2]) for (x, y) in landmark]) 42 | face = img_rotated_by_alpha[bbox.y:bbox.y+bbox.h,bbox.x:bbox.x+bbox.w] 43 | return (face, landmark_) 44 | 45 | def processImage(imgs): 46 | """ 47 | process images before feeding to CNNs 48 | imgs: N x 1 x W x H 49 | """ 50 | imgs = imgs.astype(np.float32) 51 | for i, img in enumerate(imgs): 52 | m = img.mean() 53 | s = img.std() 54 | imgs[i] = (img - m) / s 55 | return imgs 56 | 57 | def generate_hdf5(data, output='shit.h5'): 58 | lines = [] 59 | dst = 'tf_test/' 60 | imgs = [] 61 | labels = [] 62 | for (imgPath, bbx, landmarks) in data: 63 | im = cv2.imread(imgPath, cv2.CV_LOAD_IMAGE_GRAYSCALE) 64 | imgName = imgPath.split('/')[-1][:-4] 65 | 66 | bbx_sc = bbx.bbxScale(im.shape, scale=1.1) 67 | #print bbx_sc.x, bbx_sc.y, bbx_sc.w, bbx_sc.h 68 | im_sc = im[bbx_sc.y:bbx_sc.y+bbx_sc.h, bbx_sc.x:bbx_sc.x+bbx_sc.w] 69 | im_sc = cv2.resize(im_sc, (39, 39)) 70 | imgs.append(im_sc.reshape(39, 39, 1)) 71 | name = dst+imgName+'sc.jpg' 72 | lm_sc = bbx_sc.normalizeLmToBbx(landmarks) 73 | labels.append(lm_sc.reshape(10)) 74 | lines.append(name + ' ' + ' '.join(map(str, lm_sc.flatten())) + '\n') 75 | imgs, labels = np.asarray(imgs), np.asarray(labels) 76 | imgs = processImage(imgs) 77 | with h5py.File('shit.h5', 'w') as h5: 78 | h5['data'] = imgs.astype(np.float32) 79 | h5['landmark'] = labels.astype(np.float32) 80 | 81 | def data_augmentation(data, output='tfboy.txt', is_training=False): 82 | lines = [] 83 | dst = 'tfvae_test/' 84 | for (imgPath, bbx, landmarks) in data: 85 | im = cv2.imread(imgPath, cv2.CV_LOAD_IMAGE_GRAYSCALE) 86 | imgName = imgPath.split('/')[-1][:-4] 87 | 88 | bbx_sc = bbx.bbxScale(im.shape, scale=1.1) 89 | #print bbx_sc.x, bbx_sc.y, bbx_sc.w, bbx_sc.h 90 | im_sc = im[bbx_sc.y:bbx_sc.y+bbx_sc.h, bbx_sc.x:bbx_sc.x+bbx_sc.w] 91 | im_sc = cv2.resize(im_sc, (64, 64)) 92 | name = dst+imgName+'sc.png' 93 | cv2.imwrite(name, im_sc) 94 | lm_sc = bbx_sc.normalizeLmToBbx(landmarks) 95 | lines.append(name + ' ' + ' '.join(map(str, lm_sc.flatten())) + '\n') 96 | 97 | if not is_training: 98 | continue 99 | 100 | 101 | 102 | origin = im[bbx.y:bbx.y+bbx.h, bbx.x:bbx.x+bbx.w] 103 | origin = cv2.resize(origin, (64, 64)) 104 | name = dst+imgName+'origin.png' 105 | cv2.imwrite(name, origin) 106 | lm_o = bbx.normalizeLmToBbx(landmarks) 107 | lines.append(name + ' ' + ' '.join(map(str, lm_o.flatten())) + '\n') 108 | 109 | bbx_sf = bbx_sc.bbxShift(im.shape) 110 | im_sf = im[bbx_sf.y:bbx_sf.y+bbx_sf.h, bbx_sf.x:bbx_sf.x+bbx_sf.w] 111 | im_sf = cv2.resize(im_sf, (64, 64)) 112 | name = dst+imgName+'sf.png' 113 | cv2.imwrite(name, im_sf) 114 | lm_sf = bbx_sf.normalizeLmToBbx(landmarks) 115 | lines.append(name + ' ' + ' '.join(map(str, lm_sf.flatten())) + '\n') 116 | 117 | im_rotate, lm_rotate = rotate(im, bbx_sc, landmarks, 5) 118 | im_rotate = cv2.resize(im_rotate, (64, 64)) 119 | name = dst+imgName+'rotate.png' 120 | cv2.imwrite(name, im_rotate) 121 | lm_rotate = bbx_sc.normalizeLmToBbx(lm_rotate) 122 | lines.append(name + ' ' + ' '.join(map(str, lm_rotate.flatten())) + '\n') 123 | # bbx_sf2 = bbx_sc.bbxShift(im.shape) 124 | # im_sf2 = im[bbx_sf2.y:bbx_sf2.y+bbx_sf2.h, bbx_sf2.x:bbx_sf2.x+bbx_sf2.w] 125 | # im_sf2 = cv2.resize(im_sf2, (39, 39)) 126 | # name = dst+imgName+'sf2.png' 127 | # cv2.imwrite(name, im_sf2) 128 | # lm_sf2 = bbx_sf2.normalizeLmToBbx(landmarks) 129 | # lines.append(name + ' ' + ' '.join(map(str, lm_sf2.flatten())) + '\n') 130 | 131 | flipo, lm_flipo = flip(origin, lm_o) 132 | name = dst+imgName+'flipo.png' 133 | cv2.imwrite(name, flipo) 134 | lines.append(name + ' ' + ' '.join(map(str, lm_flipo.flatten())) + '\n') 135 | 136 | flipsc, lm_flipsc = flip(im_sc, lm_sc) 137 | name = dst+imgName+'flipsc.png' 138 | cv2.imwrite(name, flipsc) 139 | lines.append(name + ' ' + ' '.join(map(str, lm_flipsc.flatten())) + '\n') 140 | 141 | flipsf, lm_flipsf = flip(im_sf, lm_sf) 142 | name = dst+imgName+'flipsf.png' 143 | cv2.imwrite(name, flipsf) 144 | lines.append(name + ' ' + ' '.join(map(str, lm_flipsf.flatten())) + '\n') 145 | 146 | # flipsf2, lm_flipsf2 = flip(im_sf2, lm_sf2) 147 | # name = dst+imgName+'flipsf2.png' 148 | # cv2.imwrite(name, flipsf2) 149 | # lines.append(name + ' ' + ' '.join(map(str, lm_flipsf2.flatten())) + '\n') 150 | 151 | with open(output, 'w') as fid: 152 | fid.writelines(lines) 153 | 154 | 155 | 156 | 157 | 158 | 159 | class BBox(object): 160 | 161 | def __init__(self, bbx): 162 | self.x = bbx[0] 163 | self.y = bbx[2] 164 | self.w = bbx[1] - bbx[0] 165 | self.h = bbx[3] - bbx[2] 166 | 167 | 168 | def bbxScale(self, im_size, scale=1.3): 169 | # We need scale greater than 1 # 170 | assert(scale > 1) 171 | x = np.around(max(1, self.x - (scale * self.w - self.w) / 2.0)) 172 | y = np.around(max(1, self.y - (scale * self.h - self.h) / 2.0)) 173 | w = np.around(min(scale * self.w, im_size[1] - x)) 174 | h = np.around(min(scale * self.h, im_size[0] - y)) 175 | return BBox([x, x+w, y, y+h]) 176 | 177 | def bbxShift(self, im_size, shift=0.03): 178 | direction = np.random.randn(2) 179 | x = np.around(max(1, self.x - self.w * shift * direction[0])) 180 | y = np.around(max(1, self.y - self.h * shift * direction[1])) 181 | w = min(self.w, im_size[1] - x) 182 | h = min(self.h, im_size[0] - y) 183 | return BBox([x, x+w, y, y+h]) 184 | 185 | def normalizeLmToBbx(self, landmarks): 186 | result = [] 187 | # print self.x, self.y, self.w, self.h 188 | # print landmarks 189 | lmks = landmarks.copy() 190 | for lm in lmks: 191 | lm[0] = (lm[0] - self.x) / self.w 192 | lm[1] = (lm[1] - self.y) / self.h 193 | result.append(lm) 194 | result = np.asarray(result) 195 | 196 | return result 197 | 198 | 199 | if __name__ == '__main__': 200 | data = read_data_from_txt(TXT) 201 | # generate_hdf5(data) 202 | data_augmentation(data, output='tftest_vae.txt', is_training=False) 203 | 204 | -------------------------------------------------------------------------------- /libs/batch_norm.py: -------------------------------------------------------------------------------- 1 | """Batch Normalization for TensorFlow. 2 | Parag K. Mital, Jan 2016. 3 | """ 4 | 5 | import tensorflow as tf 6 | from tensorflow.python import control_flow_ops 7 | 8 | 9 | def batch_norm(x, phase_train, name='bn', decay=0.9, reuse=None, 10 | affine=True): 11 | """ 12 | Batch normalization on convolutional maps. 13 | from: https://stackoverflow.com/questions/33949786/how-could-i- 14 | use-batch-normalization-in-tensorflow 15 | Only modified to infer shape from input tensor x. 16 | Parameters 17 | ---------- 18 | x 19 | Tensor, 4D BHWD input maps 20 | phase_train 21 | boolean tf.Variable, true indicates training phase 22 | name 23 | string, variable name 24 | affine 25 | whether to affine-transform outputs 26 | Return 27 | ------ 28 | normed 29 | batch-normalized maps 30 | """ 31 | with tf.variable_scope(name, reuse=reuse): 32 | shape = x.get_shape().as_list() 33 | beta = tf.get_variable(name='beta', shape=[shape[-1]], 34 | initializer=tf.constant_initializer(0.0), 35 | trainable=True) 36 | gamma = tf.get_variable(name='gamma', shape=[shape[-1]], 37 | initializer=tf.constant_initializer(1.0), 38 | trainable=affine) 39 | if len(shape) == 4: 40 | batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') 41 | else: 42 | batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') 43 | ema = tf.train.ExponentialMovingAverage(decay=decay) 44 | ema_apply_op = ema.apply([batch_mean, batch_var]) 45 | ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) 46 | 47 | def mean_var_with_update(): 48 | """Summary 49 | Returns 50 | ------- 51 | name : TYPE 52 | Description 53 | """ 54 | with tf.control_dependencies([ema_apply_op]): 55 | return tf.identity(batch_mean), tf.identity(batch_var) 56 | mean, var = control_flow_ops.cond(phase_train, 57 | mean_var_with_update, 58 | lambda: (ema_mean, ema_var)) 59 | 60 | # tf.nn.batch_normalization 61 | normed = tf.nn.batch_norm_with_global_normalization( 62 | x, mean, var, beta, gamma, 1e-6, affine) 63 | return normed 64 | -------------------------------------------------------------------------------- /libs/tfpipeline.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from functools import partial 4 | import matplotlib.pyplot as plt 5 | TXTs = ['tftest_vae.txt'] 6 | 7 | 8 | 9 | def read_my_file_format(filename): 10 | 11 | 12 | record_defaults = [[""]] + [[1.0]] * 10 13 | components = tf.decode_csv(filename, record_defaults=record_defaults, 14 | field_delim=" ") 15 | imgName = components[0] 16 | features = components[1:] 17 | img_contents = tf.read_file(imgName) 18 | img = tf.image.decode_jpeg(img_contents, channels=1) 19 | return img, features 20 | 21 | def processImage(img): 22 | """ 23 | process images before feeding to CNNs 24 | imgs: W x H x 1 25 | """ 26 | img = img.astype(np.float32) 27 | m = img.mean() 28 | s = img.std() 29 | img = (img - m) / s 30 | return img 31 | 32 | def input_pipeline(TXTs, batch_size, shape, is_training=False): 33 | 34 | filename_queue = tf.train.string_input_producer(TXTs, shuffle=is_training) 35 | reader = tf.TextLineReader() 36 | _, value = reader.read(filename_queue) 37 | img, features = read_my_file_format(value) 38 | img.set_shape(shape) 39 | img_reshape = tf.cast(img, tf.float32) 40 | # float_image = tf.py_func(processImage, [img_reshape], [tf.float32])[0] 41 | # float_image.set_shape(shape) 42 | float_image = tf.image.per_image_whitening(img_reshape) 43 | # if is_training: 44 | # float_image = distort_color(float_image) 45 | # img_batch, label_batch = tf.train.batch([float_image, features], batch_size=batch_size) 46 | min_after_dequeue = 80000 // 100 47 | 48 | # The capacity should be larger than min_after_dequeue, and determines how 49 | # many examples are prefetched. TF docs recommend setting this value to: 50 | # min_after_dequeue + (num_threads + a small safety margin) * batch_size 51 | capacity = min_after_dequeue + (2 + 1) * batch_size 52 | 53 | # Randomize the order and output batches of batch_size. 54 | img_batch, label_batch = tf.train.shuffle_batch([float_image, features], 55 | enqueue_many=False, 56 | batch_size=batch_size, 57 | capacity=capacity, 58 | min_after_dequeue=min_after_dequeue, 59 | num_threads=2) 60 | # img_batch, label_batch = tf.train.batch([float_image, features], batch_size=batch_size) 61 | return img_batch, label_batch 62 | 63 | def distort_color(image, thread_id=0, stddev=0.1, scope=None): 64 | """Distort the color of the image. 65 | Each color distortion is non-commutative and thus ordering of the color ops 66 | matters. Ideally we would randomly permute the ordering of the color ops. 67 | Rather then adding that level of complication, we select a distinct ordering 68 | of color ops for each preprocessing thread. 69 | Args: 70 | image: Tensor containing single image. 71 | thread_id: preprocessing thread ID. 72 | scope: Optional scope for op_scope. 73 | Returns: 74 | color-distorted image 75 | """ 76 | with tf.op_scope([image], scope, 'distort_color'): 77 | color_ordering = thread_id % 2 78 | 79 | if color_ordering == 0: 80 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 81 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 82 | image = tf.image.random_hue(image, max_delta=0.2) 83 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 84 | elif color_ordering == 1: 85 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 86 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 87 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 88 | image = tf.image.random_hue(image, max_delta=0.2) 89 | 90 | image += tf.random_normal( 91 | tf.shape(image), 92 | stddev=stddev, 93 | dtype=tf.float32, 94 | seed=42, 95 | name='add_gaussian_noise') 96 | # The random_* ops do not necessarily clamp. 97 | image = tf.clip_by_value(image, 0.0, 1.0) 98 | return image 99 | # shape = [64, 64, 1] 100 | # im_batch, label_batch = input_pipeline(TXTs, 1, shape) 101 | # with tf.Session() as sess: 102 | # sess.run(tf.initialize_all_variables()) 103 | # coord = tf.train.Coordinator() 104 | # threads = tf.train.start_queue_runners(coord=coord) 105 | # im, feat = sess.run([im_batch, label_batch]) 106 | # print(feat[0]) 107 | # plt.imshow(im[0].reshape((39,39))) 108 | # import pdb; pdb.set_trace() 109 | # coord.request_stop() 110 | # coord.join(threads) 111 | -------------------------------------------------------------------------------- /libs/utils.py: -------------------------------------------------------------------------------- 1 | """Utilities used in the Kadenze Academy Course on Deep Learning w/ Tensorflow. 2 | 3 | Creative Applications of Deep Learning w/ Tensorflow. 4 | Kadenze, Inc. 5 | Parag K. Mital 6 | 7 | Copyright Parag K. Mital, June 2016. 8 | """ 9 | import matplotlib.pyplot as plt 10 | import tensorflow as tf 11 | import urllib 12 | import numpy as np 13 | import zipfile 14 | import os 15 | from scipy.io import wavfile 16 | 17 | 18 | def download(path): 19 | """Use urllib to download a file. 20 | 21 | Parameters 22 | ---------- 23 | path : str 24 | Url to download 25 | 26 | Returns 27 | ------- 28 | path : str 29 | Location of downloaded file. 30 | """ 31 | import os 32 | from six.moves import urllib 33 | 34 | fname = path.split('/')[-1] 35 | if os.path.exists(fname): 36 | return fname 37 | 38 | print('Downloading ' + path) 39 | 40 | def progress(count, block_size, total_size): 41 | if count % 20 == 0: 42 | print('Downloaded %02.02f/%02.02f MB' % ( 43 | count * block_size / 1024.0 / 1024.0, 44 | total_size / 1024.0 / 1024.0), end='\r') 45 | 46 | filepath, _ = urllib.request.urlretrieve( 47 | path, filename=fname, reporthook=progress) 48 | return filepath 49 | 50 | 51 | def download_and_extract_tar(path, dst): 52 | """Download and extract a tar file. 53 | 54 | Parameters 55 | ---------- 56 | path : str 57 | Url to tar file to download. 58 | dst : str 59 | Location to save tar file contents. 60 | """ 61 | import tarfile 62 | filepath = download(path) 63 | if not os.path.exists(dst): 64 | os.makedirs(dst) 65 | tarfile.open(filepath, 'r:gz').extractall(dst) 66 | 67 | 68 | def download_and_extract_zip(path, dst): 69 | """Download and extract a zip file. 70 | 71 | Parameters 72 | ---------- 73 | path : str 74 | Url to zip file to download. 75 | dst : str 76 | Location to save zip file contents. 77 | """ 78 | import zipfile 79 | filepath = download(path) 80 | if not os.path.exists(dst): 81 | os.makedirs(dst) 82 | zf = zipfile.ZipFile(file=filepath) 83 | zf.extractall(dst) 84 | 85 | 86 | def load_audio(filename, b_normalize=True): 87 | """Load the audiofile at the provided filename using scipy.io.wavfile. 88 | 89 | Optionally normalizes the audio to the maximum value. 90 | 91 | Parameters 92 | ---------- 93 | filename : str 94 | File to load. 95 | b_normalize : bool, optional 96 | Normalize to the maximum value. 97 | """ 98 | sr, s = wavfile.read(filename) 99 | if b_normalize: 100 | s = s.astype(np.float32) 101 | s = (s / np.max(np.abs(s))) 102 | s -= np.mean(s) 103 | return s 104 | 105 | 106 | def corrupt(x): 107 | """Take an input tensor and add uniform masking. 108 | 109 | Parameters 110 | ---------- 111 | x : Tensor/Placeholder 112 | Input to corrupt. 113 | Returns 114 | ------- 115 | x_corrupted : Tensor 116 | 50 pct of values corrupted. 117 | """ 118 | return tf.mul(x, tf.cast(tf.random_uniform(shape=tf.shape(x), 119 | minval=0, 120 | maxval=2, 121 | dtype=tf.int32), tf.float32)) 122 | 123 | 124 | def interp(l, r, n_samples): 125 | """Intepolate between the arrays l and r, n_samples times. 126 | 127 | Parameters 128 | ---------- 129 | l : np.ndarray 130 | Left edge 131 | r : np.ndarray 132 | Right edge 133 | n_samples : int 134 | Number of samples 135 | 136 | Returns 137 | ------- 138 | arr : np.ndarray 139 | Inteporalted array 140 | """ 141 | return np.array([ 142 | l + step_i / (n_samples - 1) * (r - l) 143 | for step_i in range(n_samples)]) 144 | 145 | 146 | def make_latent_manifold(corners, n_samples): 147 | """Create a 2d manifold out of the provided corners: n_samples * n_samples. 148 | 149 | Parameters 150 | ---------- 151 | corners : list of np.ndarray 152 | The four corners to intepolate. 153 | n_samples : int 154 | Number of samples to use in interpolation. 155 | 156 | Returns 157 | ------- 158 | arr : np.ndarray 159 | Stacked array of all 2D interpolated samples 160 | """ 161 | left = interp(corners[0], corners[1], n_samples) 162 | right = interp(corners[2], corners[3], n_samples) 163 | 164 | embedding = [] 165 | for row_i in range(n_samples): 166 | embedding.append(interp(left[row_i], right[row_i], n_samples)) 167 | return np.vstack(embedding) 168 | 169 | 170 | def imcrop_tosquare(img): 171 | """Make any image a square image. 172 | 173 | Parameters 174 | ---------- 175 | img : np.ndarray 176 | Input image to crop, assumed at least 2d. 177 | 178 | Returns 179 | ------- 180 | crop : np.ndarray 181 | Cropped image. 182 | """ 183 | size = np.min(img.shape[:2]) 184 | extra = img.shape[:2] - size 185 | crop = img 186 | for i in np.flatnonzero(extra): 187 | crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i) 188 | return crop 189 | 190 | 191 | def slice_montage(montage, img_h, img_w, n_imgs): 192 | """Slice a montage image into n_img h x w images. 193 | 194 | Performs the opposite of the montage function. Takes a montage image and 195 | slices it back into a N x H x W x C image. 196 | 197 | Parameters 198 | ---------- 199 | montage : np.ndarray 200 | Montage image to slice. 201 | img_h : int 202 | Height of sliced image 203 | img_w : int 204 | Width of sliced image 205 | n_imgs : int 206 | Number of images to slice 207 | 208 | Returns 209 | ------- 210 | sliced : np.ndarray 211 | Sliced images as 4d array. 212 | """ 213 | sliced_ds = [] 214 | for i in range(int(np.sqrt(n_imgs))): 215 | for j in range(int(np.sqrt(n_imgs))): 216 | sliced_ds.append(montage[ 217 | 1 + i + i * img_h:1 + i + (i + 1) * img_h, 218 | 1 + j + j * img_w:1 + j + (j + 1) * img_w]) 219 | return np.array(sliced_ds) 220 | 221 | 222 | def montage(images, saveto='montage.png'): 223 | """Draw all images as a montage separated by 1 pixel borders. 224 | 225 | Also saves the file to the destination specified by `saveto`. 226 | 227 | Parameters 228 | ---------- 229 | images : numpy.ndarray 230 | Input array to create montage of. Array should be: 231 | batch x height x width x channels. 232 | saveto : str 233 | Location to save the resulting montage image. 234 | 235 | Returns 236 | ------- 237 | m : numpy.ndarray 238 | Montage image. 239 | """ 240 | if isinstance(images, list): 241 | images = np.array(images) 242 | img_h = images.shape[1] 243 | img_w = images.shape[2] 244 | n_plots = int(np.ceil(np.sqrt(images.shape[0]))) 245 | if len(images.shape) == 4 and images.shape[3] == 3: 246 | m = np.ones( 247 | (images.shape[1] * n_plots + n_plots + 1, 248 | images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5 249 | else: 250 | m = np.ones( 251 | (images.shape[1] * n_plots + n_plots + 1, 252 | images.shape[2] * n_plots + n_plots + 1)) * 0.5 253 | for i in range(n_plots): 254 | for j in range(n_plots): 255 | this_filter = i * n_plots + j 256 | if this_filter < images.shape[0]: 257 | this_img = images[this_filter] 258 | #print(this_img.shape, m.shape) 259 | m[1 + i + i * img_h:1 + i + (i + 1) * img_h, 260 | 1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img.squeeze() 261 | plt.imsave(arr=m, fname=saveto) 262 | return m 263 | 264 | 265 | def montage_filters(W): 266 | """Draws all filters (n_input * n_output filters) as a 267 | montage image separated by 1 pixel borders. 268 | 269 | Parameters 270 | ---------- 271 | W : Tensor 272 | Input tensor to create montage of. 273 | 274 | Returns 275 | ------- 276 | m : numpy.ndarray 277 | Montage image. 278 | """ 279 | W = np.reshape(W, [W.shape[0], W.shape[1], 1, W.shape[2] * W.shape[3]]) 280 | n_plots = int(np.ceil(np.sqrt(W.shape[-1]))) 281 | m = np.ones( 282 | (W.shape[0] * n_plots + n_plots + 1, 283 | W.shape[1] * n_plots + n_plots + 1)) * 0.5 284 | for i in range(n_plots): 285 | for j in range(n_plots): 286 | this_filter = i * n_plots + j 287 | if this_filter < W.shape[-1]: 288 | m[1 + i + i * W.shape[0]:1 + i + (i + 1) * W.shape[0], 289 | 1 + j + j * W.shape[1]:1 + j + (j + 1) * W.shape[1]] = ( 290 | np.squeeze(W[:, :, :, this_filter])) 291 | return m 292 | 293 | 294 | def get_celeb_files(dst='img_align_celeba', max_images=100): 295 | """Download the first 100 images of the celeb dataset. 296 | 297 | Files will be placed in a directory 'img_align_celeba' if one 298 | doesn't exist. 299 | 300 | Returns 301 | ------- 302 | files : list of strings 303 | Locations to the first 100 images of the celeb net dataset. 304 | """ 305 | # Create a directory 306 | if not os.path.exists(dst): 307 | os.mkdir(dst) 308 | 309 | # Now perform the following 100 times: 310 | for img_i in range(1, max_images + 1): 311 | 312 | # create a string using the current loop counter 313 | f = '000%03d.jpg' % img_i 314 | 315 | if not os.path.exists(os.path.join(dst, f)): 316 | 317 | # and get the url with that string appended the end 318 | url = 'https://s3.amazonaws.com/cadl/celeb-align/' + f 319 | 320 | # We'll print this out to the console so we can see how far we've gone 321 | print(url, end='\r') 322 | 323 | # And now download the url to a location inside our new directory 324 | urllib.request.urlretrieve(url, os.path.join(dst, f)) 325 | 326 | files = [os.path.join(dst, file_i) 327 | for file_i in os.listdir(dst) 328 | if '.jpg' in file_i][:max_images] 329 | return files 330 | 331 | 332 | def get_celeb_imgs(max_images=100): 333 | """Load the first `max_images` images of the celeb dataset. 334 | 335 | Returns 336 | ------- 337 | imgs : list of np.ndarray 338 | List of the first 100 images from the celeb dataset 339 | """ 340 | return [plt.imread(f_i) for f_i in get_celeb_files(max_images=max_images)] 341 | 342 | 343 | def gauss(mean, stddev, ksize): 344 | """Use Tensorflow to compute a Gaussian Kernel. 345 | 346 | Parameters 347 | ---------- 348 | mean : float 349 | Mean of the Gaussian (e.g. 0.0). 350 | stddev : float 351 | Standard Deviation of the Gaussian (e.g. 1.0). 352 | ksize : int 353 | Size of kernel (e.g. 16). 354 | 355 | Returns 356 | ------- 357 | kernel : np.ndarray 358 | Computed Gaussian Kernel using Tensorflow. 359 | """ 360 | g = tf.Graph() 361 | with tf.Session(graph=g): 362 | x = tf.linspace(-3.0, 3.0, ksize) 363 | z = (tf.exp(tf.neg(tf.pow(x - mean, 2.0) / 364 | (2.0 * tf.pow(stddev, 2.0)))) * 365 | (1.0 / (stddev * tf.sqrt(2.0 * 3.1415)))) 366 | return z.eval() 367 | 368 | 369 | def gauss2d(mean, stddev, ksize): 370 | """Use Tensorflow to compute a 2D Gaussian Kernel. 371 | 372 | Parameters 373 | ---------- 374 | mean : float 375 | Mean of the Gaussian (e.g. 0.0). 376 | stddev : float 377 | Standard Deviation of the Gaussian (e.g. 1.0). 378 | ksize : int 379 | Size of kernel (e.g. 16). 380 | 381 | Returns 382 | ------- 383 | kernel : np.ndarray 384 | Computed 2D Gaussian Kernel using Tensorflow. 385 | """ 386 | z = gauss(mean, stddev, ksize) 387 | g = tf.Graph() 388 | with tf.Session(graph=g): 389 | z_2d = tf.matmul(tf.reshape(z, [ksize, 1]), tf.reshape(z, [1, ksize])) 390 | return z_2d.eval() 391 | 392 | 393 | def convolve(img, kernel): 394 | """Use Tensorflow to convolve a 4D image with a 4D kernel. 395 | 396 | Parameters 397 | ---------- 398 | img : np.ndarray 399 | 4-dimensional image shaped N x H x W x C 400 | kernel : np.ndarray 401 | 4-dimensional image shape K_H, K_W, C_I, C_O corresponding to the 402 | kernel's height and width, the number of input channels, and the 403 | number of output channels. Note that C_I should = C. 404 | 405 | Returns 406 | ------- 407 | result : np.ndarray 408 | Convolved result. 409 | """ 410 | g = tf.Graph() 411 | with tf.Session(graph=g): 412 | convolved = tf.nn.conv2d(img, kernel, strides=[1, 1, 1, 1], padding='SAME') 413 | res = convolved.eval() 414 | return res 415 | 416 | 417 | def gabor(ksize=32): 418 | """Use Tensorflow to compute a 2D Gabor Kernel. 419 | 420 | Parameters 421 | ---------- 422 | ksize : int, optional 423 | Size of kernel. 424 | 425 | Returns 426 | ------- 427 | gabor : np.ndarray 428 | Gabor kernel with ksize x ksize dimensions. 429 | """ 430 | g = tf.Graph() 431 | with tf.Session(graph=g): 432 | z_2d = gauss2d(0.0, 1.0, ksize) 433 | ones = tf.ones((1, ksize)) 434 | ys = tf.sin(tf.linspace(-3.0, 3.0, ksize)) 435 | ys = tf.reshape(ys, [ksize, 1]) 436 | wave = tf.matmul(ys, ones) 437 | gabor = tf.mul(wave, z_2d) 438 | return gabor.eval() 439 | 440 | 441 | def build_submission(filename, file_list, optional_file_list=()): 442 | """Helper utility to check homework assignment submissions and package them. 443 | 444 | Parameters 445 | ---------- 446 | filename : str 447 | Output zip file name 448 | file_list : tuple 449 | Tuple of files to include 450 | """ 451 | # check each file exists 452 | for part_i, file_i in enumerate(file_list): 453 | if not os.path.exists(file_i): 454 | print('\nYou are missing the file {}. '.format(file_i) + 455 | 'It does not look like you have completed Part {}.'.format( 456 | part_i + 1)) 457 | 458 | def zipdir(path, zf): 459 | for root, dirs, files in os.walk(path): 460 | for file in files: 461 | # make sure the files are part of the necessary file list 462 | if file.endswith(file_list) or file.endswith(optional_file_list): 463 | zf.write(os.path.join(root, file)) 464 | 465 | # create a zip file with the necessary files 466 | zipf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) 467 | zipdir('.', zipf) 468 | zipf.close() 469 | print('Your assignment zip file has been created!') 470 | print('Now submit the file:\n{}\nto Kadenze for grading!'.format( 471 | os.path.abspath(filename))) 472 | 473 | 474 | def normalize(a, s=0.1): 475 | '''Normalize the image range for visualization''' 476 | return np.uint8(np.clip( 477 | (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5, 478 | 0, 1) * 255) 479 | 480 | 481 | # %% 482 | def weight_variable(shape, **kwargs): 483 | '''Helper function to create a weight variable initialized with 484 | a normal distribution 485 | Parameters 486 | ---------- 487 | shape : list 488 | Size of weight variable 489 | ''' 490 | if isinstance(shape, list): 491 | initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01) 492 | initial.set_shape(shape) 493 | else: 494 | initial = tf.random_normal(shape, mean=0.0, stddev=0.01) 495 | return tf.Variable(initial, **kwargs) 496 | 497 | 498 | # %% 499 | def bias_variable(shape, **kwargs): 500 | '''Helper function to create a bias variable initialized with 501 | a constant value. 502 | Parameters 503 | ---------- 504 | shape : list 505 | Size of weight variable 506 | ''' 507 | if isinstance(shape, list): 508 | initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01) 509 | initial.set_shape(shape) 510 | else: 511 | initial = tf.random_normal(shape, mean=0.0, stddev=0.01) 512 | return tf.Variable(initial, **kwargs) 513 | 514 | 515 | def binary_cross_entropy(z, x, name=None): 516 | """Binary Cross Entropy measures cross entropy of a binary variable. 517 | 518 | loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) 519 | 520 | Parameters 521 | ---------- 522 | z : tf.Tensor 523 | A `Tensor` of the same type and shape as `x`. 524 | x : tf.Tensor 525 | A `Tensor` of type `float32` or `float64`. 526 | """ 527 | with tf.variable_scope(name or 'bce'): 528 | eps = 1e-12 529 | return (-(x * tf.log(z + eps) + 530 | (1. - x) * tf.log(1. - z + eps))) 531 | 532 | 533 | def conv2d(x, n_output, 534 | k_h=5, k_w=5, d_h=2, d_w=2, 535 | padding='VALID', name='conv2d', reuse=None): 536 | """Helper for creating a 2d convolution operation. 537 | 538 | Parameters 539 | ---------- 540 | x : tf.Tensor 541 | Input tensor to convolve. 542 | n_output : int 543 | Number of filters. 544 | k_h : int, optional 545 | Kernel height 546 | k_w : int, optional 547 | Kernel width 548 | d_h : int, optional 549 | Height stride 550 | d_w : int, optional 551 | Width stride 552 | padding : str, optional 553 | Padding type: "SAME" or "VALID" 554 | name : str, optional 555 | Variable scope 556 | 557 | Returns 558 | ------- 559 | op : tf.Tensor 560 | Output of convolution 561 | """ 562 | with tf.variable_scope(name or 'conv2d', reuse=reuse): 563 | W = tf.get_variable( 564 | name='W', 565 | shape=[k_h, k_w, x.get_shape()[-1], n_output], 566 | initializer=tf.contrib.layers.xavier_initializer_conv2d()) 567 | 568 | conv = tf.nn.conv2d( 569 | name='conv', 570 | input=x, 571 | filter=W, 572 | strides=[1, d_h, d_w, 1], 573 | padding=padding) 574 | 575 | b = tf.get_variable( 576 | name='b', 577 | shape=[n_output], 578 | initializer=tf.constant_initializer(0.0)) 579 | 580 | h = tf.nn.bias_add( 581 | name='h', 582 | value=conv, 583 | bias=b) 584 | 585 | return h, W 586 | 587 | 588 | def deconv2d(x, n_output_h, n_output_w, n_output_ch, n_input_ch=None, 589 | k_h=5, k_w=5, d_h=2, d_w=2, 590 | padding='SAME', name='deconv2d', reuse=None): 591 | """Deconvolution helper. 592 | 593 | Parameters 594 | ---------- 595 | x : tf.Tensor 596 | Input tensor to convolve. 597 | n_output_h : int 598 | Height of output 599 | n_output_w : int 600 | Width of output 601 | n_output_ch : int 602 | Number of filters. 603 | k_h : int, optional 604 | Kernel height 605 | k_w : int, optional 606 | Kernel width 607 | d_h : int, optional 608 | Height stride 609 | d_w : int, optional 610 | Width stride 611 | padding : str, optional 612 | Padding type: "SAME" or "VALID" 613 | name : str, optional 614 | Variable scope 615 | 616 | Returns 617 | ------- 618 | op : tf.Tensor 619 | Output of deconvolution 620 | """ 621 | with tf.variable_scope(name or 'deconv2d', reuse=reuse): 622 | W = tf.get_variable( 623 | name='W', 624 | shape=[k_h, k_h, n_output_ch, n_input_ch or x.get_shape()[-1]], 625 | initializer=tf.contrib.layers.xavier_initializer_conv2d()) 626 | 627 | conv = tf.nn.conv2d_transpose( 628 | name='conv_t', 629 | value=x, 630 | filter=W, 631 | output_shape=tf.pack( 632 | [tf.shape(x)[0], n_output_h, n_output_w, n_output_ch]), 633 | strides=[1, d_h, d_w, 1], 634 | padding=padding) 635 | 636 | conv.set_shape([None, n_output_h, n_output_w, n_output_ch]) 637 | 638 | b = tf.get_variable( 639 | name='b', 640 | shape=[n_output_ch], 641 | initializer=tf.constant_initializer(0.0)) 642 | 643 | h = tf.nn.bias_add(name='h', value=conv, bias=b) 644 | 645 | return h, W 646 | 647 | 648 | def lrelu(features, leak=0.2): 649 | """Leaky rectifier. 650 | 651 | Parameters 652 | ---------- 653 | features : tf.Tensor 654 | Input to apply leaky rectifier to. 655 | leak : float, optional 656 | Percentage of leak. 657 | 658 | Returns 659 | ------- 660 | op : tf.Tensor 661 | Resulting output of applying leaky rectifier activation. 662 | """ 663 | f1 = 0.5 * (1 + leak) 664 | f2 = 0.5 * (1 - leak) 665 | return f1 * features + f2 * abs(features) 666 | 667 | 668 | def linear(x, n_output, name=None, activation=None, reuse=None): 669 | """Fully connected layer. 670 | 671 | Parameters 672 | ---------- 673 | x : tf.Tensor 674 | Input tensor to connect 675 | n_output : int 676 | Number of output neurons 677 | name : None, optional 678 | Scope to apply 679 | 680 | Returns 681 | ------- 682 | h, W : tf.Tensor, tf.Tensor 683 | Output of fully connected layer and the weight matrix 684 | """ 685 | if len(x.get_shape()) != 2: 686 | x = flatten(x, reuse=reuse) 687 | 688 | n_input = x.get_shape().as_list()[1] 689 | 690 | with tf.variable_scope(name or "fc", reuse=reuse): 691 | W = tf.get_variable( 692 | name='W', 693 | shape=[n_input, n_output], 694 | dtype=tf.float32, 695 | initializer=tf.contrib.layers.xavier_initializer()) 696 | 697 | b = tf.get_variable( 698 | name='b', 699 | shape=[n_output], 700 | dtype=tf.float32, 701 | initializer=tf.constant_initializer(0.0)) 702 | 703 | h = tf.nn.bias_add( 704 | name='h', 705 | value=tf.matmul(x, W), 706 | bias=b) 707 | 708 | if activation: 709 | h = activation(h) 710 | 711 | return h, W 712 | 713 | 714 | def flatten(x, name=None, reuse=None): 715 | """Flatten Tensor to 2-dimensions. 716 | 717 | Parameters 718 | ---------- 719 | x : tf.Tensor 720 | Input tensor to flatten. 721 | name : None, optional 722 | Variable scope for flatten operations 723 | 724 | Returns 725 | ------- 726 | flattened : tf.Tensor 727 | Flattened tensor. 728 | """ 729 | with tf.variable_scope('flatten'): 730 | dims = x.get_shape().as_list() 731 | if len(dims) == 4: 732 | flattened = tf.reshape( 733 | x, 734 | shape=[-1, dims[1] * dims[2] * dims[3]]) 735 | elif len(dims) == 2 or len(dims) == 1: 736 | flattened = x 737 | else: 738 | raise ValueError('Expected n dimensions of 1, 2 or 4. Found:', 739 | len(dims)) 740 | 741 | return flattened 742 | 743 | 744 | def to_tensor(x): 745 | """Convert 2 dim Tensor to a 4 dim Tensor ready for convolution. 746 | 747 | Performs the opposite of flatten(x). If the tensor is already 4-D, this 748 | returns the same as the input, leaving it unchanged. 749 | 750 | Parameters 751 | ---------- 752 | x : tf.Tesnor 753 | Input 2-D tensor. If 4-D already, left unchanged. 754 | 755 | Returns 756 | ------- 757 | x : tf.Tensor 758 | 4-D representation of the input. 759 | 760 | Raises 761 | ------ 762 | ValueError 763 | If the tensor is not 2D or already 4D. 764 | """ 765 | if len(x.get_shape()) == 2: 766 | n_input = x.get_shape().as_list()[1] 767 | x_dim = np.sqrt(n_input) 768 | if x_dim == int(x_dim): 769 | x_dim = int(x_dim) 770 | x_tensor = tf.reshape( 771 | x, [-1, x_dim, x_dim, 1], name='reshape') 772 | elif np.sqrt(n_input / 3) == int(np.sqrt(n_input / 3)): 773 | x_dim = int(np.sqrt(n_input / 3)) 774 | x_tensor = tf.reshape( 775 | x, [-1, x_dim, x_dim, 3], name='reshape') 776 | else: 777 | x_tensor = tf.reshape( 778 | x, [-1, 1, 1, n_input], name='reshape') 779 | elif len(x.get_shape()) == 4: 780 | x_tensor = x 781 | else: 782 | raise ValueError('Unsupported input dimensions') 783 | return x_tensor 784 | 785 | 786 | def genLandmarkMap(landmarks, shape=[39, 39]): 787 | '''Generate landmark map according to landmarks. 788 | Input params: 789 | landmarks: K x 2 float 790 | shape: H x W 791 | Output: 792 | landmarkMap: H x W x K binary map. For each H x W 793 | map, there's only one location nearest to the landmark 794 | location filled with 1, else 0.''' 795 | 796 | landmarks = landmarks.reshape((-1, 2)) 797 | landmarkMap = np.zeros(shape + [len(landmarks)]) 798 | for (i, landmark) in enumerate(landmarks): 799 | x = int(np.around(landmark[0] * shape[1])) 800 | y = int(np.around(landmark[1] * shape[0])) 801 | landmarkMap[y, x, i] = 1 802 | return landmarkMap 803 | 804 | -------------------------------------------------------------------------------- /model_eval.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | from datetime import datetime 7 | from pathlib import Path 8 | 9 | import math 10 | import matplotlib 11 | import numpy as np 12 | import os.path 13 | import tensorflow as tf 14 | import time 15 | from model_train import deepID 16 | from libs.tfpipeline import input_pipeline 17 | 18 | 19 | 20 | # Do not use a gui toolkit for matlotlib. 21 | matplotlib.use('Agg') 22 | 23 | FLAGS = tf.app.flags.FLAGS 24 | 25 | 26 | tf.app.flags.DEFINE_string('checkpoint_dir', 'models/', 27 | """Directory where to read model checkpoints.""") 28 | 29 | # Flags governing the frequency of the eval. 30 | tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5, 31 | """How often to run the eval.""") 32 | 33 | tf.app.flags.DEFINE_boolean('run_once', True, 34 | """Whether to run eval only once.""") 35 | 36 | # Flags governing the data used for the eval. 37 | tf.app.flags.DEFINE_integer('num_examples', 3466, 38 | """Number of examples to run.""") 39 | tf.app.flags.DEFINE_integer('batch_size', 2, 40 | """Number of examples per batch.""") 41 | tf.app.flags.DEFINE_string('data_txt', 'tftest.txt', 42 | """The text file containing test data path and annotations.""") 43 | tf.app.flags.DEFINE_string('device', '/cpu:0', 'the device to eval on.') 44 | 45 | def normalized_rmse(pred, gt_truth): 46 | # TODO: assert shapes 47 | # remove 5 48 | norm = tf.sqrt(tf.reduce_sum(((gt_truth[:, 0, :] - gt_truth[:, 1, :])**2), 1)) 49 | 50 | return tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(pred - gt_truth), 2)), 1) / (norm * 5) 51 | 52 | 53 | 54 | 55 | def _eval_once(saver, rmse_op, network): 56 | """Runs Eval once. 57 | Args: 58 | saver: Saver. 59 | summary_writer: Summary writer. 60 | rmse_op: rmse_op. 61 | summary_op: Summary op. 62 | """ 63 | with tf.Session() as sess: 64 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 65 | print(ckpt.model_checkpoint_path) 66 | if ckpt and ckpt.model_checkpoint_path: 67 | 68 | saver.restore(sess, ckpt.model_checkpoint_path) 69 | 70 | 71 | # Assuming model_checkpoint_path looks something like: 72 | # /my-favorite-path/imagenet_train/model.ckpt-0, 73 | # extract global_step from it. 74 | global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 75 | print('Succesfully loaded model from %s at step=%s.' % 76 | (ckpt.model_checkpoint_path, global_step)) 77 | else: 78 | print('No checkpoint file found') 79 | return 80 | test_x, test_label = input_pipeline(['tftest.txt'], batch_size=FLAGS.batch_size, shape=[39, 39, 1], is_training=False) 81 | # Start the queue runners. 82 | coord = tf.train.Coordinator() 83 | try: 84 | threads = [] 85 | for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 86 | threads.extend(qr.create_threads(sess, coord=coord, daemon=True, 87 | start=True)) 88 | 89 | num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) 90 | # Counts the number of correct predictions. 91 | errors = [] 92 | 93 | total_sample_count = num_iter * FLAGS.batch_size 94 | step = 0 95 | 96 | print('%s: starting evaluation on (%s).' % (datetime.now(), 'tf/')) 97 | start_time = time.time() 98 | while step < num_iter and not coord.should_stop(): 99 | test_xs, label = sess.run([test_x, test_label]) 100 | rmse = sess.run(rmse_op, feed_dict={network['x']: test_xs, network['y']: label, network['train']: False, 101 | network['keep_prob']: 0.5}) 102 | errors.append(rmse) 103 | step += 1 104 | if step % 20 == 0: 105 | duration = time.time() - start_time 106 | sec_per_batch = duration / 20.0 107 | examples_per_sec = FLAGS.batch_size / sec_per_batch 108 | print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f' 109 | 'sec/batch)' % (datetime.now(), step, num_iter, 110 | examples_per_sec, sec_per_batch)) 111 | start_time = time.time() 112 | 113 | errors = np.vstack(errors).ravel() 114 | mean_rmse = errors.mean() 115 | auc_at_08 = (errors < .08).mean() 116 | auc_at_05 = (errors < .05).mean() 117 | 118 | 119 | 120 | print('Errors', errors.shape) 121 | print('%s: mean_rmse = %.4f, auc @ 0.05 = %.4f, auc @ 0.08 = %.4f [%d examples]' % 122 | (datetime.now(), errors.mean(), auc_at_05, auc_at_08, total_sample_count)) 123 | 124 | 125 | except Exception as e: # pylint: disable=broad-except 126 | coord.request_stop(e) 127 | 128 | coord.request_stop() 129 | coord.join(threads, stop_grace_period_secs=10) 130 | 131 | 132 | 133 | 134 | 135 | def evaluate(shape=[39, 39, 1]): 136 | """Evaluate model on Dataset for a number of steps.""" 137 | with tf.Graph().as_default(), tf.device('/cpu:0'): 138 | train_dir = Path(FLAGS.checkpoint_dir) 139 | 140 | images, landmarks = input_pipeline( 141 | [FLAGS.data_txt], batch_size=2, 142 | shape=shape, is_training=False) 143 | 144 | # mirrored_images, _, mirrored_inits, shapes = data_provider.batch_inputs( 145 | # [dataset_path], reference_shape, 146 | # batch_size=FLAGS.batch_size, is_training=False, mirror_image=True) 147 | 148 | print('Loading model...') 149 | # Build a Graph that computes the logits predictions from the 150 | # inference model. 151 | with tf.device(FLAGS.device): 152 | deepid = deepID(input_shape=[None, 39, 39, 1], n_filters=[20, 40, 60, 80], 153 | filter_sizes=[4, 3, 3, 2], activation=tf.nn.relu, dropout=False) 154 | 155 | tf.get_variable_scope().reuse_variables() 156 | 157 | 158 | 159 | avg_pred = deepid['pred'] 160 | gt_truth = deepid['y'] 161 | gt_truth = tf.reshape(gt_truth, (-1, 5, 2)) 162 | # Calculate predictions. 163 | norm_error = normalized_rmse(avg_pred, gt_truth) 164 | 165 | # Restore the moving average version of the learned variables for eval. 166 | # variable_averages = tf.train.ExponentialMovingAverage( 167 | # 0.9999) 168 | # variables_to_restore = variable_averages.variables_to_restore() 169 | saver = tf.train.Saver() 170 | 171 | 172 | while True: 173 | _eval_once(saver, norm_error, deepid) 174 | if FLAGS.run_once: 175 | break 176 | time.sleep(FLAGS.eval_interval_secs) 177 | 178 | if __name__ == '__main__': 179 | evaluate() -------------------------------------------------------------------------------- /model_train.py: -------------------------------------------------------------------------------- 1 | """Convolutional neural network for face alignment. 2 | 3 | Copyright Mario S. Lew, Oct 2016 4 | """ 5 | import tensorflow as tf 6 | import numpy as np 7 | import os 8 | from libs.tfpipeline import input_pipeline 9 | from libs.batch_norm import batch_norm 10 | from libs import utils 11 | from numpy.linalg import norm 12 | import h5py 13 | import matplotlib.pyplot as plt 14 | 15 | def deepID(input_shape=[None, 39, 39, 1], 16 | n_filters=[20, 40, 60, 80], 17 | filter_sizes=[4, 3, 3, 2], 18 | activation=tf.nn.relu, 19 | dropout=False): 20 | """DeepID. 21 | 22 | Uses tied weights. 23 | 24 | Parameters 25 | ---------- 26 | input_shape : list, optional 27 | Shape of the input to the network. e.g. for MNIST: [None, 784]. 28 | n_filters : list, optional 29 | Number of filters for each layer. 30 | If convolutional=True, this refers to the total number of output 31 | filters to create for each layer, with each layer's number of output 32 | filters as a list. 33 | If convolutional=False, then this refers to the total number of neurons 34 | for each layer in a fully connected network. 35 | filter_sizes : list, optional 36 | Only applied when convolutional=True. This refers to the ksize (height 37 | and width) of each convolutional layer. 38 | activation : function, optional 39 | Activation function to apply to each layer, e.g. tf.nn.relu 40 | dropout : bool, optional 41 | Whether or not to apply dropout. If using dropout, you must feed a 42 | value for 'keep_prob', as returned in the dictionary. 1.0 means no 43 | dropout is used. 0.0 means every connection is dropped. Sensible 44 | values are between 0.5-0.8. 45 | 46 | Returns 47 | ------- 48 | model : dict 49 | { 50 | 'cost': Tensor to optimize. 51 | 'Ws': All weights of the encoder. 52 | 'x': Input Placeholder 53 | 'z': Inner most encoding Tensor (latent features) 54 | 'y': Reconstruction of the Decoder 55 | 'keep_prob': Amount to keep when using Dropout 56 | 'corrupt_prob': Amount to corrupt when using Denoising 57 | 'train': Set to True when training/Applies to Batch Normalization. 58 | } 59 | """ 60 | # network input / placeholders for train (bn) and dropout 61 | x = tf.placeholder(tf.float32, input_shape, 'x') 62 | y = tf.placeholder(tf.float32, [None, 10], 'y') 63 | phase_train = tf.placeholder(tf.bool, name='phase_train') 64 | keep_prob = tf.placeholder(tf.float32, name='keep_prob') 65 | 66 | # 2d -> 4d if convolution 67 | x_tensor = utils.to_tensor(x) 68 | current_input = x_tensor 69 | 70 | Ws = [] 71 | shapes = [] 72 | 73 | # Build the encoder 74 | shapes.append(current_input.get_shape().as_list()) 75 | conv1, W = utils.conv2d(x=x_tensor, 76 | n_output=n_filters[0], 77 | k_h=filter_sizes[0], 78 | k_w=filter_sizes[0], 79 | d_w=1, 80 | d_h=1, 81 | name='conv1') 82 | Ws.append(W) 83 | # conv1 = activation(batch_norm(conv1, phase_train, 'bn1')) 84 | conv1 = activation(conv1) 85 | 86 | 87 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') 88 | 89 | conv2, W = utils.conv2d(x=pool1, 90 | n_output=n_filters[1], 91 | k_h=filter_sizes[1], 92 | k_w=filter_sizes[1], 93 | d_w=1, 94 | d_h=1, 95 | name='conv2') 96 | Ws.append(W) 97 | # conv2 = activation(batch_norm(conv2, phase_train, 'bn2')) 98 | conv2 = activation(conv2) 99 | 100 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') 101 | 102 | conv3, W = utils.conv2d(x=pool2, 103 | n_output=n_filters[2], 104 | k_h=filter_sizes[2], 105 | k_w=filter_sizes[2], 106 | d_w=1, 107 | d_h=1, 108 | name='conv3') 109 | Ws.append(W) 110 | # conv3 = activation(batch_norm(conv3, phase_train, 'bn3')) 111 | conv3 = activation(conv3) 112 | 113 | pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') 114 | 115 | conv4, W = utils.conv2d(x=pool3, 116 | n_output=n_filters[3], 117 | k_h=filter_sizes[3], 118 | k_w=filter_sizes[3], 119 | d_w=1, 120 | d_h=1, 121 | name='conv4') 122 | Ws.append(W) 123 | # conv4 = activation(batch_norm(conv4, phase_train, 'bn4')) 124 | conv4 = activation(conv4) 125 | 126 | pool3_flat = utils.flatten(pool3) 127 | conv4_flat = utils.flatten(conv4) 128 | concat = tf.concat(1, [pool3_flat, conv4_flat], name='concat') 129 | 130 | ip1, W = utils.linear(concat, 120, name='ip1') 131 | Ws.append(W) 132 | ip1 = activation(ip1) 133 | if dropout: 134 | ip1 = tf.nn.dropout(ip1, keep_prob) 135 | 136 | ip2, W = utils.linear(ip1, 10, name='ip2') 137 | Ws.append(W) 138 | # ip2 = activation(ip2) 139 | 140 | p_flat = utils.flatten(ip2) 141 | y_flat = utils.flatten(y) 142 | 143 | regularizers = 5e-4 *(tf.nn.l2_loss(Ws[-1]) + tf.nn.l2_loss(Ws[-2])) 144 | # l2 loss 145 | loss_x = tf.reduce_sum(tf.squared_difference(p_flat, y_flat), 1) 146 | cost = tf.reduce_mean(loss_x) + regularizers 147 | prediction = tf.reshape(p_flat, (-1, 5, 2)) 148 | 149 | return {'cost': cost, 'Ws': Ws, 150 | 'x': x, 'y': y, 'pred': prediction, 151 | 'keep_prob': keep_prob, 152 | 'train': phase_train} 153 | 154 | def normalized_rmse(pred, gt_truth): 155 | # TODO: assert shapes 156 | # remove 5 157 | norm = tf.sqrt(tf.reduce_sum(((gt_truth[:, 0, :] - gt_truth[:, 1, :])**2), 1)) 158 | 159 | return tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(pred - gt_truth), 2)), 1) / (norm * 5) 160 | 161 | def evaluateError(landmarkGt, landmarkP): 162 | e = np.zeros(5) 163 | ocular_dist = norm(landmarkGt[1] - landmarkGt[0]) 164 | for i in range(5): 165 | e[i] = norm(landmarkGt[i] - landmarkP[i]) 166 | e = e / ocular_dist 167 | return e 168 | 169 | def evaluateBatchError(landmarkGt, landmarkP, batch_size): 170 | e = np.zeros([batch_size, 5]) 171 | for i in range(batch_size): 172 | e[i] = evaluateError(landmarkGt[i], landmarkP[i]) 173 | mean_err = e.mean(axis=0) 174 | return mean_err 175 | 176 | def train_deepid(input_shape=[None, 39, 39, 1], 177 | n_filters=[20, 40, 60, 80], 178 | filter_sizes=[4, 3, 3, 2], 179 | activation=tf.nn.relu, 180 | dropout=False, 181 | batch_size=64): 182 | batch_x, label_x = input_pipeline(['tftrain.txt'], batch_size=batch_size, shape=[39, 39, 1], is_training=True) 183 | # with h5py.File('../../train/1_F/train.h5', 'r') as hdf: 184 | # data = hdf['data'][:] 185 | # label = hdf['landmark'][:] 186 | # length = len(label) 187 | # test_x, test_label = input_pipeline(['tfboy.txt'], batch_size=batch_size, shape=input_shape[1:], is_training=False) 188 | 189 | deepid = deepID(input_shape=input_shape, n_filters=n_filters, filter_sizes=filter_sizes, activation=activation, 190 | dropout=dropout) 191 | 192 | batch = tf.Variable(0, dtype=tf.int32) 193 | learning_rate = tf.train.exponential_decay(0.005, batch * batch_size, 150000, 0.95, staircase=True) 194 | optimizer = tf.train.AdamOptimizer( 195 | learning_rate).minimize(deepid['cost'], global_step=batch) 196 | save_step = 10000 197 | saver = tf.train.Saver() 198 | 199 | with tf.Session() as sess: 200 | saver = tf.train.Saver(max_to_keep=5) 201 | sess.run(tf.initialize_all_variables()) 202 | #ckpt = tf.train.get_checkpoint_state('models') 203 | #if ckpt and ckpt.model_checkpoint_path: 204 | # print("Continue training from the model {}".format(ckpt.model_checkpoint_path)) 205 | # saver.restore(sess, ckpt.model_checkpoint_path) 206 | coord = tf.train.Coordinator() 207 | 208 | # Ensure no more changes to graph 209 | tf.get_default_graph().finalize() 210 | 211 | # Start up the queues for handling the image pipeline 212 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 213 | 214 | batch_i = 0 215 | 216 | # start_idx = 0 217 | # end_idx = 0 218 | for i in range(1000000): 219 | batch_i += 1 220 | # import pdb; pdb.set_trace() 221 | batch_xs, batch_label = sess.run([batch_x, label_x]) 222 | 223 | # print(batch_xs[0].shape) 224 | 225 | # end_idx = (start_idx + batch_size) % length 226 | # if start_idx + batch_size >= length: 227 | # batch_xs = np.vstack((data[start_idx:], data[:end_idx])) 228 | # batch_label = np.vstack((label[start_idx:], label[:end_idx])) 229 | # else: 230 | # batch_xs = data[start_idx:end_idx] 231 | # batch_label = label[start_idx:end_idx] 232 | # batch_xs = batch_xs.reshape((batch_size, 39, 39, 1)) 233 | # start_idx = end_idx 234 | 235 | 236 | # print(np.max(batch_xs), np.min(batch_xs)) 237 | train_cost, pred = sess.run([deepid['cost'], deepid['pred'], optimizer], feed_dict={ 238 | deepid['x']: batch_xs, deepid['y']: batch_label, deepid['train']: True, 239 | deepid['keep_prob']: 0.5})[:2] 240 | if batch_i % 100 == 0: 241 | print(batch_i, train_cost) 242 | lr = sess.run(learning_rate) 243 | print('lr: %.10f' % lr) 244 | # id = np.random.randint(10) 245 | id = 0 246 | batch_label = batch_label.reshape([-1,5,2]) 247 | print('label: ' + np.array_str(batch_label[id])) 248 | print('pred: ' + np.array_str(pred[id])) 249 | 250 | err = evaluateBatchError(batch_label, pred, batch_size) 251 | print('Mean error:' + np.array_str(err)) 252 | # print(batch_label[0]) 253 | # plt.imshow(batch_xs[0].reshape((39,39))) 254 | # import pdb; pdb.set_trace() 255 | # break 256 | 257 | 258 | if batch_i % save_step == 0: 259 | # Save the variables to disk. 260 | saver.save(sess, "./models/" + 'deepid.ckpt', 261 | global_step=batch_i, 262 | write_meta_graph=False) 263 | 264 | coord.request_stop() 265 | coord.join(threads) 266 | 267 | if __name__ == '__main__': 268 | train_deepid() 269 | 270 | 271 | 272 | 273 | --------------------------------------------------------------------------------