├── README.md
├── augment.py
├── libs
    ├── batch_norm.py
    ├── tfpipeline.py
    └── utils.py
├── model_eval.py
└── model_train.py


/README.md:
--------------------------------------------------------------------------------
 1 | # TF-FaceLandmarkDetection
 2 | 
 3 | Face landmark detection using tensorflow
 4 | Reproduction of the paper **Deep Convolutional Network Cascade for Facial Point Detection**
 5 | 
 6 | ## Requirements
 7 | 
 8 | - Python 3.4
 9 | - Tensorflow 0.10.0
10 | 
11 | ## Usage
12 | 
13 | - git clone https://github.com/mariolew/TF-FaceLandmarkDetection/edit/master
14 | - Prepare data: You should have a text file, each line of the text file should have the format: image_path bbx_left bbx_right bbx_top bbx_bottom landmark1_x landmark1_y ... landmarki_x landmarki_y
15 | - Modify the text file path and the path to store augmented images in **augment.py** and do *python3 augment.py*
16 | - Modify some paths and params in **model_train.py** and do *python3 model_train.py* to train a face alignment model
17 | - Modify some paths and params in **model_eval.py** and do *python3 model_eval.py* to evaluate the trained model
18 | 
19 | ## Note
20 | 
21 | This repo is based on https://github.com/luoyetx/deep-landmark and https://github.com/pkmital/CADL and is still ongoing.
22 | 
23 | ## Achievements
24 | 
25 | Level1: Done
26 | 
27 | Level2: TODO
28 | 
29 | Level3: TODO
30 | 
31 | ## References
32 | 
33 | **[1]** [Deep Convolutional Network Cascade for Facial Point Detection](http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm)
34 | 
35 | **[2]** [deep-landmark](https://github.com/luoyetx/deep-landmark)
36 | 
37 | **[3]** [Creative Applications of Deep Learning w/ Tensorflow](https://github.com/pkmital/CADL)
38 | 
39 | 


--------------------------------------------------------------------------------
/augment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import matplotlib.pyplot as plt
  4 | import h5py
  5 | 
  6 | TXT = 'testImageList.txt'
  7 | 
  8 | def read_data_from_txt(TXT):
  9 |     with open(TXT, 'r') as fid:
 10 |         lines = fid.readlines()
 11 |     result = []
 12 |     for line in lines:
 13 |         components = line.strip().split(' ')
 14 |         imgName = components[0].replace('\\', '/')
 15 |         bbx = map(int, components[1:5])
 16 |         landmarks = map(float, components[5:])
 17 |         landmarks = np.asarray(landmarks).reshape([-1, 2])
 18 |         result.append([imgName, BBox(bbx), landmarks])
 19 |     return result
 20 | 
 21 | def flip(face, landmark):
 22 |     """
 23 |         flip face
 24 |     """
 25 |     face_flipped_by_x = cv2.flip(face, 1)
 26 |     landmark_ = np.asarray([(1-x, y) for (x, y) in landmark])
 27 |     # Make sure that the flipped landmarks are in the right order #
 28 |     landmark_[[0, 1]] = landmark_[[1, 0]]
 29 |     landmark_[[3, 4]] = landmark_[[4, 3]]
 30 |     return (face_flipped_by_x, landmark_)
 31 | 
 32 | def rotate(img, bbox, landmark, alpha):
 33 |     """
 34 |         given a face with bbox and landmark, rotate with alpha
 35 |         and return rotated face with bbox, landmark (absolute position)
 36 |     """
 37 |     center = (bbox.x+bbox.w/2, bbox.y+bbox.h/2)
 38 |     rot_mat = cv2.getRotationMatrix2D(center, alpha, 1)
 39 |     img_rotated_by_alpha = cv2.warpAffine(img, rot_mat, img.shape)
 40 |     landmark_ = np.asarray([(rot_mat[0][0]*x+rot_mat[0][1]*y+rot_mat[0][2],
 41 |                  rot_mat[1][0]*x+rot_mat[1][1]*y+rot_mat[1][2]) for (x, y) in landmark])
 42 |     face = img_rotated_by_alpha[bbox.y:bbox.y+bbox.h,bbox.x:bbox.x+bbox.w]
 43 |     return (face, landmark_)
 44 | 
 45 | def processImage(imgs):
 46 |     """
 47 |         process images before feeding to CNNs
 48 |         imgs: N x 1 x W x H
 49 |     """
 50 |     imgs = imgs.astype(np.float32)
 51 |     for i, img in enumerate(imgs):
 52 |         m = img.mean()
 53 |         s = img.std()
 54 |         imgs[i] = (img - m) / s
 55 |     return imgs
 56 | 
 57 | def generate_hdf5(data, output='shit.h5'):
 58 |     lines = []
 59 |     dst = 'tf_test/'
 60 |     imgs = []
 61 |     labels = []
 62 |     for (imgPath, bbx, landmarks) in data:
 63 |         im = cv2.imread(imgPath, cv2.CV_LOAD_IMAGE_GRAYSCALE)
 64 |         imgName = imgPath.split('/')[-1][:-4]
 65 |         
 66 |         bbx_sc = bbx.bbxScale(im.shape, scale=1.1)
 67 |         #print bbx_sc.x, bbx_sc.y, bbx_sc.w, bbx_sc.h
 68 |         im_sc = im[bbx_sc.y:bbx_sc.y+bbx_sc.h, bbx_sc.x:bbx_sc.x+bbx_sc.w]
 69 |         im_sc = cv2.resize(im_sc, (39, 39))
 70 |         imgs.append(im_sc.reshape(39, 39, 1))
 71 |         name = dst+imgName+'sc.jpg'
 72 |         lm_sc = bbx_sc.normalizeLmToBbx(landmarks)
 73 |         labels.append(lm_sc.reshape(10))
 74 |         lines.append(name + ' ' + ' '.join(map(str, lm_sc.flatten())) + '\n')
 75 |     imgs, labels = np.asarray(imgs), np.asarray(labels)
 76 |     imgs = processImage(imgs)
 77 |     with h5py.File('shit.h5', 'w') as h5:
 78 |         h5['data'] = imgs.astype(np.float32)
 79 |         h5['landmark'] = labels.astype(np.float32)
 80 | 
 81 | def data_augmentation(data, output='tfboy.txt', is_training=False):
 82 |     lines = []
 83 |     dst = 'tfvae_test/'
 84 |     for (imgPath, bbx, landmarks) in data:
 85 |         im = cv2.imread(imgPath, cv2.CV_LOAD_IMAGE_GRAYSCALE)
 86 |         imgName = imgPath.split('/')[-1][:-4]
 87 |         
 88 |         bbx_sc = bbx.bbxScale(im.shape, scale=1.1)
 89 |         #print bbx_sc.x, bbx_sc.y, bbx_sc.w, bbx_sc.h
 90 |         im_sc = im[bbx_sc.y:bbx_sc.y+bbx_sc.h, bbx_sc.x:bbx_sc.x+bbx_sc.w]
 91 |         im_sc = cv2.resize(im_sc, (64, 64))
 92 |         name = dst+imgName+'sc.png'
 93 |         cv2.imwrite(name, im_sc)
 94 |         lm_sc = bbx_sc.normalizeLmToBbx(landmarks)
 95 |         lines.append(name + ' ' + ' '.join(map(str, lm_sc.flatten())) + '\n')
 96 | 
 97 |         if not is_training:
 98 |             continue
 99 | 
100 |         
101 | 
102 |         origin = im[bbx.y:bbx.y+bbx.h, bbx.x:bbx.x+bbx.w]
103 |         origin = cv2.resize(origin, (64, 64))
104 |         name = dst+imgName+'origin.png'
105 |         cv2.imwrite(name, origin)
106 |         lm_o = bbx.normalizeLmToBbx(landmarks)
107 |         lines.append(name + ' ' + ' '.join(map(str, lm_o.flatten())) + '\n')
108 | 
109 |         bbx_sf = bbx_sc.bbxShift(im.shape)
110 |         im_sf = im[bbx_sf.y:bbx_sf.y+bbx_sf.h, bbx_sf.x:bbx_sf.x+bbx_sf.w]
111 |         im_sf = cv2.resize(im_sf, (64, 64))
112 |         name = dst+imgName+'sf.png'
113 |         cv2.imwrite(name, im_sf)
114 |         lm_sf = bbx_sf.normalizeLmToBbx(landmarks)
115 |         lines.append(name + ' ' + ' '.join(map(str, lm_sf.flatten())) + '\n')
116 | 
117 |         im_rotate, lm_rotate = rotate(im, bbx_sc, landmarks, 5)
118 |         im_rotate = cv2.resize(im_rotate, (64, 64))
119 |         name = dst+imgName+'rotate.png'
120 |         cv2.imwrite(name, im_rotate)
121 |         lm_rotate = bbx_sc.normalizeLmToBbx(lm_rotate)
122 |         lines.append(name + ' ' + ' '.join(map(str, lm_rotate.flatten())) + '\n')
123 |         # bbx_sf2 = bbx_sc.bbxShift(im.shape)
124 |         # im_sf2 = im[bbx_sf2.y:bbx_sf2.y+bbx_sf2.h, bbx_sf2.x:bbx_sf2.x+bbx_sf2.w]
125 |         # im_sf2 = cv2.resize(im_sf2, (39, 39))
126 |         # name = dst+imgName+'sf2.png'
127 |         # cv2.imwrite(name, im_sf2)
128 |         # lm_sf2 = bbx_sf2.normalizeLmToBbx(landmarks)
129 |         # lines.append(name + ' ' + ' '.join(map(str, lm_sf2.flatten())) + '\n')
130 | 
131 |         flipo, lm_flipo = flip(origin, lm_o)
132 |         name = dst+imgName+'flipo.png'
133 |         cv2.imwrite(name, flipo)
134 |         lines.append(name + ' ' + ' '.join(map(str, lm_flipo.flatten())) + '\n')
135 | 
136 |         flipsc, lm_flipsc = flip(im_sc, lm_sc)
137 |         name = dst+imgName+'flipsc.png'
138 |         cv2.imwrite(name, flipsc)
139 |         lines.append(name + ' ' + ' '.join(map(str, lm_flipsc.flatten())) + '\n')
140 | 
141 |         flipsf, lm_flipsf = flip(im_sf, lm_sf)
142 |         name = dst+imgName+'flipsf.png'
143 |         cv2.imwrite(name, flipsf)
144 |         lines.append(name + ' ' + ' '.join(map(str, lm_flipsf.flatten())) + '\n')
145 | 
146 |         # flipsf2, lm_flipsf2 = flip(im_sf2, lm_sf2)
147 |         # name = dst+imgName+'flipsf2.png'
148 |         # cv2.imwrite(name, flipsf2)
149 |         # lines.append(name + ' ' + ' '.join(map(str, lm_flipsf2.flatten())) + '\n')
150 | 
151 |     with open(output, 'w') as fid:
152 |         fid.writelines(lines)
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | class BBox(object):
160 | 
161 |     def __init__(self, bbx):
162 |         self.x = bbx[0]
163 |         self.y = bbx[2]
164 |         self.w = bbx[1] - bbx[0]
165 |         self.h = bbx[3] - bbx[2]
166 | 
167 | 
168 |     def bbxScale(self, im_size, scale=1.3):
169 |         # We need scale greater than 1 #
170 |         assert(scale > 1)
171 |         x = np.around(max(1, self.x - (scale * self.w - self.w) / 2.0))
172 |         y = np.around(max(1, self.y - (scale * self.h - self.h) / 2.0))
173 |         w = np.around(min(scale * self.w, im_size[1] - x))
174 |         h = np.around(min(scale * self.h, im_size[0] - y))
175 |         return BBox([x, x+w, y, y+h])
176 | 
177 |     def bbxShift(self, im_size, shift=0.03):
178 |         direction = np.random.randn(2)
179 |         x = np.around(max(1, self.x - self.w * shift * direction[0]))
180 |         y = np.around(max(1, self.y - self.h * shift * direction[1]))
181 |         w = min(self.w, im_size[1] - x)
182 |         h = min(self.h, im_size[0] - y)
183 |         return BBox([x, x+w, y, y+h])
184 | 
185 |     def normalizeLmToBbx(self, landmarks):
186 |         result = []
187 |         # print self.x, self.y, self.w, self.h
188 |         # print landmarks
189 |         lmks = landmarks.copy()
190 |         for lm in lmks:
191 |             lm[0] = (lm[0] - self.x) / self.w
192 |             lm[1] = (lm[1] - self.y) / self.h
193 |             result.append(lm)
194 |         result = np.asarray(result)
195 |         
196 |         return result
197 | 
198 | 
199 | if __name__ == '__main__':
200 |     data = read_data_from_txt(TXT)
201 |     # generate_hdf5(data)
202 |     data_augmentation(data, output='tftest_vae.txt', is_training=False)
203 | 
204 | 


--------------------------------------------------------------------------------
/libs/batch_norm.py:
--------------------------------------------------------------------------------
 1 | """Batch Normalization for TensorFlow.
 2 | Parag K. Mital, Jan 2016.
 3 | """
 4 | 
 5 | import tensorflow as tf
 6 | from tensorflow.python import control_flow_ops
 7 | 
 8 | 
 9 | def batch_norm(x, phase_train, name='bn', decay=0.9, reuse=None,
10 |                affine=True):
11 |     """
12 |     Batch normalization on convolutional maps.
13 |     from: https://stackoverflow.com/questions/33949786/how-could-i-
14 |     use-batch-normalization-in-tensorflow
15 |     Only modified to infer shape from input tensor x.
16 |     Parameters
17 |     ----------
18 |     x
19 |         Tensor, 4D BHWD input maps
20 |     phase_train
21 |         boolean tf.Variable, true indicates training phase
22 |     name
23 |         string, variable name
24 |     affine
25 |         whether to affine-transform outputs
26 |     Return
27 |     ------
28 |     normed
29 |         batch-normalized maps
30 |     """
31 |     with tf.variable_scope(name, reuse=reuse):
32 |         shape = x.get_shape().as_list()
33 |         beta = tf.get_variable(name='beta', shape=[shape[-1]],
34 |                                initializer=tf.constant_initializer(0.0),
35 |                                trainable=True)
36 |         gamma = tf.get_variable(name='gamma', shape=[shape[-1]],
37 |                                 initializer=tf.constant_initializer(1.0),
38 |                                 trainable=affine)
39 |         if len(shape) == 4:
40 |             batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
41 |         else:
42 |             batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
43 |         ema = tf.train.ExponentialMovingAverage(decay=decay)
44 |         ema_apply_op = ema.apply([batch_mean, batch_var])
45 |         ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
46 | 
47 |         def mean_var_with_update():
48 |             """Summary
49 |             Returns
50 |             -------
51 |             name : TYPE
52 |                 Description
53 |             """
54 |             with tf.control_dependencies([ema_apply_op]):
55 |                 return tf.identity(batch_mean), tf.identity(batch_var)
56 |         mean, var = control_flow_ops.cond(phase_train,
57 |                                           mean_var_with_update,
58 |                                           lambda: (ema_mean, ema_var))
59 | 
60 |         # tf.nn.batch_normalization
61 |         normed = tf.nn.batch_norm_with_global_normalization(
62 |             x, mean, var, beta, gamma, 1e-6, affine)
63 |     return normed
64 | 


--------------------------------------------------------------------------------
/libs/tfpipeline.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from functools import partial
  4 | import matplotlib.pyplot as plt
  5 | TXTs = ['tftest_vae.txt']
  6 | 
  7 | 
  8 | 
  9 | def read_my_file_format(filename):
 10 |     
 11 | 
 12 |     record_defaults = [[""]] + [[1.0]] * 10
 13 |     components = tf.decode_csv(filename, record_defaults=record_defaults, 
 14 |         field_delim=" ")
 15 |     imgName = components[0]
 16 |     features = components[1:]
 17 |     img_contents = tf.read_file(imgName)
 18 |     img = tf.image.decode_jpeg(img_contents, channels=1)
 19 |     return img, features
 20 | 
 21 | def processImage(img):
 22 |     """
 23 |         process images before feeding to CNNs
 24 |         imgs: W x H x 1
 25 |     """
 26 |     img = img.astype(np.float32)
 27 |     m = img.mean()
 28 |     s = img.std()
 29 |     img = (img - m) / s
 30 |     return img
 31 | 
 32 | def input_pipeline(TXTs, batch_size, shape, is_training=False):
 33 | 
 34 |     filename_queue = tf.train.string_input_producer(TXTs, shuffle=is_training)
 35 |     reader = tf.TextLineReader()
 36 |     _, value = reader.read(filename_queue)
 37 |     img, features = read_my_file_format(value)
 38 |     img.set_shape(shape)
 39 |     img_reshape = tf.cast(img, tf.float32)
 40 |     # float_image = tf.py_func(processImage, [img_reshape], [tf.float32])[0]
 41 |     # float_image.set_shape(shape)
 42 |     float_image = tf.image.per_image_whitening(img_reshape)
 43 |     # if is_training:
 44 |     #     float_image = distort_color(float_image)
 45 |     # img_batch, label_batch = tf.train.batch([float_image, features], batch_size=batch_size)
 46 |     min_after_dequeue = 80000 // 100
 47 | 
 48 |     # The capacity should be larger than min_after_dequeue, and determines how
 49 |     # many examples are prefetched.  TF docs recommend setting this value to:
 50 |     # min_after_dequeue + (num_threads + a small safety margin) * batch_size
 51 |     capacity = min_after_dequeue + (2 + 1) * batch_size
 52 | 
 53 |     # Randomize the order and output batches of batch_size.
 54 |     img_batch, label_batch = tf.train.shuffle_batch([float_image, features],
 55 |                                    enqueue_many=False,
 56 |                                    batch_size=batch_size,
 57 |                                    capacity=capacity,
 58 |                                    min_after_dequeue=min_after_dequeue,
 59 |                                    num_threads=2)
 60 |     # img_batch, label_batch = tf.train.batch([float_image, features], batch_size=batch_size)
 61 |     return img_batch, label_batch
 62 | 
 63 | def distort_color(image, thread_id=0, stddev=0.1, scope=None):
 64 |     """Distort the color of the image.
 65 |     Each color distortion is non-commutative and thus ordering of the color ops
 66 |     matters. Ideally we would randomly permute the ordering of the color ops.
 67 |     Rather then adding that level of complication, we select a distinct ordering
 68 |     of color ops for each preprocessing thread.
 69 |     Args:
 70 |       image: Tensor containing single image.
 71 |       thread_id: preprocessing thread ID.
 72 |       scope: Optional scope for op_scope.
 73 |     Returns:
 74 |       color-distorted image
 75 |     """
 76 |     with tf.op_scope([image], scope, 'distort_color'):
 77 |         color_ordering = thread_id % 2
 78 | 
 79 |         if color_ordering == 0:
 80 |             image = tf.image.random_brightness(image, max_delta=32. / 255.)
 81 |             image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 82 |             image = tf.image.random_hue(image, max_delta=0.2)
 83 |             image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 84 |         elif color_ordering == 1:
 85 |             image = tf.image.random_brightness(image, max_delta=32. / 255.)
 86 |             image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
 87 |             image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
 88 |             image = tf.image.random_hue(image, max_delta=0.2)
 89 | 
 90 |         image += tf.random_normal(
 91 |                 tf.shape(image),
 92 |                 stddev=stddev,
 93 |                 dtype=tf.float32,
 94 |                 seed=42,
 95 |                 name='add_gaussian_noise')
 96 |         # The random_* ops do not necessarily clamp.
 97 |         image = tf.clip_by_value(image, 0.0, 1.0)
 98 |         return image
 99 | # shape = [64, 64, 1]
100 | # im_batch, label_batch = input_pipeline(TXTs, 1, shape)
101 | # with tf.Session() as sess:
102 | #    sess.run(tf.initialize_all_variables())
103 | #    coord = tf.train.Coordinator()
104 | #    threads = tf.train.start_queue_runners(coord=coord)
105 | #    im, feat = sess.run([im_batch, label_batch])
106 | #    print(feat[0])
107 | #    plt.imshow(im[0].reshape((39,39)))
108 | #    import pdb; pdb.set_trace()
109 | #    coord.request_stop()
110 | #    coord.join(threads)
111 | 


--------------------------------------------------------------------------------
/libs/utils.py:
--------------------------------------------------------------------------------
  1 | """Utilities used in the Kadenze Academy Course on Deep Learning w/ Tensorflow.
  2 | 
  3 | Creative Applications of Deep Learning w/ Tensorflow.
  4 | Kadenze, Inc.
  5 | Parag K. Mital
  6 | 
  7 | Copyright Parag K. Mital, June 2016.
  8 | """
  9 | import matplotlib.pyplot as plt
 10 | import tensorflow as tf
 11 | import urllib
 12 | import numpy as np
 13 | import zipfile
 14 | import os
 15 | from scipy.io import wavfile
 16 | 
 17 | 
 18 | def download(path):
 19 |     """Use urllib to download a file.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     path : str
 24 |         Url to download
 25 | 
 26 |     Returns
 27 |     -------
 28 |     path : str
 29 |         Location of downloaded file.
 30 |     """
 31 |     import os
 32 |     from six.moves import urllib
 33 | 
 34 |     fname = path.split('/')[-1]
 35 |     if os.path.exists(fname):
 36 |         return fname
 37 | 
 38 |     print('Downloading ' + path)
 39 | 
 40 |     def progress(count, block_size, total_size):
 41 |         if count % 20 == 0:
 42 |             print('Downloaded %02.02f/%02.02f MB' % (
 43 |                 count * block_size / 1024.0 / 1024.0,
 44 |                 total_size / 1024.0 / 1024.0), end='\r')
 45 | 
 46 |     filepath, _ = urllib.request.urlretrieve(
 47 |         path, filename=fname, reporthook=progress)
 48 |     return filepath
 49 | 
 50 | 
 51 | def download_and_extract_tar(path, dst):
 52 |     """Download and extract a tar file.
 53 | 
 54 |     Parameters
 55 |     ----------
 56 |     path : str
 57 |         Url to tar file to download.
 58 |     dst : str
 59 |         Location to save tar file contents.
 60 |     """
 61 |     import tarfile
 62 |     filepath = download(path)
 63 |     if not os.path.exists(dst):
 64 |         os.makedirs(dst)
 65 |         tarfile.open(filepath, 'r:gz').extractall(dst)
 66 | 
 67 | 
 68 | def download_and_extract_zip(path, dst):
 69 |     """Download and extract a zip file.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     path : str
 74 |         Url to zip file to download.
 75 |     dst : str
 76 |         Location to save zip file contents.
 77 |     """
 78 |     import zipfile
 79 |     filepath = download(path)
 80 |     if not os.path.exists(dst):
 81 |         os.makedirs(dst)
 82 |         zf = zipfile.ZipFile(file=filepath)
 83 |         zf.extractall(dst)
 84 | 
 85 | 
 86 | def load_audio(filename, b_normalize=True):
 87 |     """Load the audiofile at the provided filename using scipy.io.wavfile.
 88 | 
 89 |     Optionally normalizes the audio to the maximum value.
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     filename : str
 94 |         File to load.
 95 |     b_normalize : bool, optional
 96 |         Normalize to the maximum value.
 97 |     """
 98 |     sr, s = wavfile.read(filename)
 99 |     if b_normalize:
100 |         s = s.astype(np.float32)
101 |         s = (s / np.max(np.abs(s)))
102 |         s -= np.mean(s)
103 |     return s
104 | 
105 | 
106 | def corrupt(x):
107 |     """Take an input tensor and add uniform masking.
108 | 
109 |     Parameters
110 |     ----------
111 |     x : Tensor/Placeholder
112 |         Input to corrupt.
113 |     Returns
114 |     -------
115 |     x_corrupted : Tensor
116 |         50 pct of values corrupted.
117 |     """
118 |     return tf.mul(x, tf.cast(tf.random_uniform(shape=tf.shape(x),
119 |                                                minval=0,
120 |                                                maxval=2,
121 |                                                dtype=tf.int32), tf.float32))
122 | 
123 | 
124 | def interp(l, r, n_samples):
125 |     """Intepolate between the arrays l and r, n_samples times.
126 | 
127 |     Parameters
128 |     ----------
129 |     l : np.ndarray
130 |         Left edge
131 |     r : np.ndarray
132 |         Right edge
133 |     n_samples : int
134 |         Number of samples
135 | 
136 |     Returns
137 |     -------
138 |     arr : np.ndarray
139 |         Inteporalted array
140 |     """
141 |     return np.array([
142 |         l + step_i / (n_samples - 1) * (r - l)
143 |         for step_i in range(n_samples)])
144 | 
145 | 
146 | def make_latent_manifold(corners, n_samples):
147 |     """Create a 2d manifold out of the provided corners: n_samples * n_samples.
148 | 
149 |     Parameters
150 |     ----------
151 |     corners : list of np.ndarray
152 |         The four corners to intepolate.
153 |     n_samples : int
154 |         Number of samples to use in interpolation.
155 | 
156 |     Returns
157 |     -------
158 |     arr : np.ndarray
159 |         Stacked array of all 2D interpolated samples
160 |     """
161 |     left = interp(corners[0], corners[1], n_samples)
162 |     right = interp(corners[2], corners[3], n_samples)
163 | 
164 |     embedding = []
165 |     for row_i in range(n_samples):
166 |         embedding.append(interp(left[row_i], right[row_i], n_samples))
167 |     return np.vstack(embedding)
168 | 
169 | 
170 | def imcrop_tosquare(img):
171 |     """Make any image a square image.
172 | 
173 |     Parameters
174 |     ----------
175 |     img : np.ndarray
176 |         Input image to crop, assumed at least 2d.
177 | 
178 |     Returns
179 |     -------
180 |     crop : np.ndarray
181 |         Cropped image.
182 |     """
183 |     size = np.min(img.shape[:2])
184 |     extra = img.shape[:2] - size
185 |     crop = img
186 |     for i in np.flatnonzero(extra):
187 |         crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i)
188 |     return crop
189 | 
190 | 
191 | def slice_montage(montage, img_h, img_w, n_imgs):
192 |     """Slice a montage image into n_img h x w images.
193 | 
194 |     Performs the opposite of the montage function.  Takes a montage image and
195 |     slices it back into a N x H x W x C image.
196 | 
197 |     Parameters
198 |     ----------
199 |     montage : np.ndarray
200 |         Montage image to slice.
201 |     img_h : int
202 |         Height of sliced image
203 |     img_w : int
204 |         Width of sliced image
205 |     n_imgs : int
206 |         Number of images to slice
207 | 
208 |     Returns
209 |     -------
210 |     sliced : np.ndarray
211 |         Sliced images as 4d array.
212 |     """
213 |     sliced_ds = []
214 |     for i in range(int(np.sqrt(n_imgs))):
215 |         for j in range(int(np.sqrt(n_imgs))):
216 |             sliced_ds.append(montage[
217 |                 1 + i + i * img_h:1 + i + (i + 1) * img_h,
218 |                 1 + j + j * img_w:1 + j + (j + 1) * img_w])
219 |     return np.array(sliced_ds)
220 | 
221 | 
222 | def montage(images, saveto='montage.png'):
223 |     """Draw all images as a montage separated by 1 pixel borders.
224 | 
225 |     Also saves the file to the destination specified by `saveto`.
226 | 
227 |     Parameters
228 |     ----------
229 |     images : numpy.ndarray
230 |         Input array to create montage of.  Array should be:
231 |         batch x height x width x channels.
232 |     saveto : str
233 |         Location to save the resulting montage image.
234 | 
235 |     Returns
236 |     -------
237 |     m : numpy.ndarray
238 |         Montage image.
239 |     """
240 |     if isinstance(images, list):
241 |         images = np.array(images)
242 |     img_h = images.shape[1]
243 |     img_w = images.shape[2]
244 |     n_plots = int(np.ceil(np.sqrt(images.shape[0])))
245 |     if len(images.shape) == 4 and images.shape[3] == 3:
246 |         m = np.ones(
247 |             (images.shape[1] * n_plots + n_plots + 1,
248 |              images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5
249 |     else:
250 |         m = np.ones(
251 |             (images.shape[1] * n_plots + n_plots + 1,
252 |              images.shape[2] * n_plots + n_plots + 1)) * 0.5
253 |     for i in range(n_plots):
254 |         for j in range(n_plots):
255 |             this_filter = i * n_plots + j
256 |             if this_filter < images.shape[0]:
257 |                 this_img = images[this_filter]
258 |                 #print(this_img.shape, m.shape)
259 |                 m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
260 |                   1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img.squeeze()
261 |     plt.imsave(arr=m, fname=saveto)
262 |     return m
263 | 
264 | 
265 | def montage_filters(W):
266 |     """Draws all filters (n_input * n_output filters) as a
267 |     montage image separated by 1 pixel borders.
268 | 
269 |     Parameters
270 |     ----------
271 |     W : Tensor
272 |         Input tensor to create montage of.
273 | 
274 |     Returns
275 |     -------
276 |     m : numpy.ndarray
277 |         Montage image.
278 |     """
279 |     W = np.reshape(W, [W.shape[0], W.shape[1], 1, W.shape[2] * W.shape[3]])
280 |     n_plots = int(np.ceil(np.sqrt(W.shape[-1])))
281 |     m = np.ones(
282 |         (W.shape[0] * n_plots + n_plots + 1,
283 |          W.shape[1] * n_plots + n_plots + 1)) * 0.5
284 |     for i in range(n_plots):
285 |         for j in range(n_plots):
286 |             this_filter = i * n_plots + j
287 |             if this_filter < W.shape[-1]:
288 |                 m[1 + i + i * W.shape[0]:1 + i + (i + 1) * W.shape[0],
289 |                   1 + j + j * W.shape[1]:1 + j + (j + 1) * W.shape[1]] = (
290 |                     np.squeeze(W[:, :, :, this_filter]))
291 |     return m
292 | 
293 | 
294 | def get_celeb_files(dst='img_align_celeba', max_images=100):
295 |     """Download the first 100 images of the celeb dataset.
296 | 
297 |     Files will be placed in a directory 'img_align_celeba' if one
298 |     doesn't exist.
299 | 
300 |     Returns
301 |     -------
302 |     files : list of strings
303 |         Locations to the first 100 images of the celeb net dataset.
304 |     """
305 |     # Create a directory
306 |     if not os.path.exists(dst):
307 |         os.mkdir(dst)
308 | 
309 |     # Now perform the following 100 times:
310 |     for img_i in range(1, max_images + 1):
311 | 
312 |         # create a string using the current loop counter
313 |         f = '000%03d.jpg' % img_i
314 | 
315 |         if not os.path.exists(os.path.join(dst, f)):
316 | 
317 |             # and get the url with that string appended the end
318 |             url = 'https://s3.amazonaws.com/cadl/celeb-align/' + f
319 | 
320 |             # We'll print this out to the console so we can see how far we've gone
321 |             print(url, end='\r')
322 | 
323 |             # And now download the url to a location inside our new directory
324 |             urllib.request.urlretrieve(url, os.path.join(dst, f))
325 | 
326 |     files = [os.path.join(dst, file_i)
327 |              for file_i in os.listdir(dst)
328 |              if '.jpg' in file_i][:max_images]
329 |     return files
330 | 
331 | 
332 | def get_celeb_imgs(max_images=100):
333 |     """Load the first `max_images` images of the celeb dataset.
334 | 
335 |     Returns
336 |     -------
337 |     imgs : list of np.ndarray
338 |         List of the first 100 images from the celeb dataset
339 |     """
340 |     return [plt.imread(f_i) for f_i in get_celeb_files(max_images=max_images)]
341 | 
342 | 
343 | def gauss(mean, stddev, ksize):
344 |     """Use Tensorflow to compute a Gaussian Kernel.
345 | 
346 |     Parameters
347 |     ----------
348 |     mean : float
349 |         Mean of the Gaussian (e.g. 0.0).
350 |     stddev : float
351 |         Standard Deviation of the Gaussian (e.g. 1.0).
352 |     ksize : int
353 |         Size of kernel (e.g. 16).
354 | 
355 |     Returns
356 |     -------
357 |     kernel : np.ndarray
358 |         Computed Gaussian Kernel using Tensorflow.
359 |     """
360 |     g = tf.Graph()
361 |     with tf.Session(graph=g):
362 |         x = tf.linspace(-3.0, 3.0, ksize)
363 |         z = (tf.exp(tf.neg(tf.pow(x - mean, 2.0) /
364 |                            (2.0 * tf.pow(stddev, 2.0)))) *
365 |              (1.0 / (stddev * tf.sqrt(2.0 * 3.1415))))
366 |         return z.eval()
367 | 
368 | 
369 | def gauss2d(mean, stddev, ksize):
370 |     """Use Tensorflow to compute a 2D Gaussian Kernel.
371 | 
372 |     Parameters
373 |     ----------
374 |     mean : float
375 |         Mean of the Gaussian (e.g. 0.0).
376 |     stddev : float
377 |         Standard Deviation of the Gaussian (e.g. 1.0).
378 |     ksize : int
379 |         Size of kernel (e.g. 16).
380 | 
381 |     Returns
382 |     -------
383 |     kernel : np.ndarray
384 |         Computed 2D Gaussian Kernel using Tensorflow.
385 |     """
386 |     z = gauss(mean, stddev, ksize)
387 |     g = tf.Graph()
388 |     with tf.Session(graph=g):
389 |         z_2d = tf.matmul(tf.reshape(z, [ksize, 1]), tf.reshape(z, [1, ksize]))
390 |         return z_2d.eval()
391 | 
392 | 
393 | def convolve(img, kernel):
394 |     """Use Tensorflow to convolve a 4D image with a 4D kernel.
395 | 
396 |     Parameters
397 |     ----------
398 |     img : np.ndarray
399 |         4-dimensional image shaped N x H x W x C
400 |     kernel : np.ndarray
401 |         4-dimensional image shape K_H, K_W, C_I, C_O corresponding to the
402 |         kernel's height and width, the number of input channels, and the
403 |         number of output channels.  Note that C_I should = C.
404 | 
405 |     Returns
406 |     -------
407 |     result : np.ndarray
408 |         Convolved result.
409 |     """
410 |     g = tf.Graph()
411 |     with tf.Session(graph=g):
412 |         convolved = tf.nn.conv2d(img, kernel, strides=[1, 1, 1, 1], padding='SAME')
413 |         res = convolved.eval()
414 |     return res
415 | 
416 | 
417 | def gabor(ksize=32):
418 |     """Use Tensorflow to compute a 2D Gabor Kernel.
419 | 
420 |     Parameters
421 |     ----------
422 |     ksize : int, optional
423 |         Size of kernel.
424 | 
425 |     Returns
426 |     -------
427 |     gabor : np.ndarray
428 |         Gabor kernel with ksize x ksize dimensions.
429 |     """
430 |     g = tf.Graph()
431 |     with tf.Session(graph=g):
432 |         z_2d = gauss2d(0.0, 1.0, ksize)
433 |         ones = tf.ones((1, ksize))
434 |         ys = tf.sin(tf.linspace(-3.0, 3.0, ksize))
435 |         ys = tf.reshape(ys, [ksize, 1])
436 |         wave = tf.matmul(ys, ones)
437 |         gabor = tf.mul(wave, z_2d)
438 |         return gabor.eval()
439 | 
440 | 
441 | def build_submission(filename, file_list, optional_file_list=()):
442 |     """Helper utility to check homework assignment submissions and package them.
443 | 
444 |     Parameters
445 |     ----------
446 |     filename : str
447 |         Output zip file name
448 |     file_list : tuple
449 |         Tuple of files to include
450 |     """
451 |     # check each file exists
452 |     for part_i, file_i in enumerate(file_list):
453 |         if not os.path.exists(file_i):
454 |             print('\nYou are missing the file {}.  '.format(file_i) +
455 |                   'It does not look like you have completed Part {}.'.format(
456 |                 part_i + 1))
457 | 
458 |     def zipdir(path, zf):
459 |         for root, dirs, files in os.walk(path):
460 |             for file in files:
461 |                 # make sure the files are part of the necessary file list
462 |                 if file.endswith(file_list) or file.endswith(optional_file_list):
463 |                     zf.write(os.path.join(root, file))
464 | 
465 |     # create a zip file with the necessary files
466 |     zipf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED)
467 |     zipdir('.', zipf)
468 |     zipf.close()
469 |     print('Your assignment zip file has been created!')
470 |     print('Now submit the file:\n{}\nto Kadenze for grading!'.format(
471 |         os.path.abspath(filename)))
472 | 
473 | 
474 | def normalize(a, s=0.1):
475 |     '''Normalize the image range for visualization'''
476 |     return np.uint8(np.clip(
477 |         (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5,
478 |         0, 1) * 255)
479 | 
480 | 
481 | # %%
482 | def weight_variable(shape, **kwargs):
483 |     '''Helper function to create a weight variable initialized with
484 |     a normal distribution
485 |     Parameters
486 |     ----------
487 |     shape : list
488 |         Size of weight variable
489 |     '''
490 |     if isinstance(shape, list):
491 |         initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01)
492 |         initial.set_shape(shape)
493 |     else:
494 |         initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
495 |     return tf.Variable(initial, **kwargs)
496 | 
497 | 
498 | # %%
499 | def bias_variable(shape, **kwargs):
500 |     '''Helper function to create a bias variable initialized with
501 |     a constant value.
502 |     Parameters
503 |     ----------
504 |     shape : list
505 |         Size of weight variable
506 |     '''
507 |     if isinstance(shape, list):
508 |         initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01)
509 |         initial.set_shape(shape)
510 |     else:
511 |         initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
512 |     return tf.Variable(initial, **kwargs)
513 | 
514 | 
515 | def binary_cross_entropy(z, x, name=None):
516 |     """Binary Cross Entropy measures cross entropy of a binary variable.
517 | 
518 |     loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
519 | 
520 |     Parameters
521 |     ----------
522 |     z : tf.Tensor
523 |         A `Tensor` of the same type and shape as `x`.
524 |     x : tf.Tensor
525 |         A `Tensor` of type `float32` or `float64`.
526 |     """
527 |     with tf.variable_scope(name or 'bce'):
528 |         eps = 1e-12
529 |         return (-(x * tf.log(z + eps) +
530 |                   (1. - x) * tf.log(1. - z + eps)))
531 | 
532 | 
533 | def conv2d(x, n_output,
534 |            k_h=5, k_w=5, d_h=2, d_w=2,
535 |            padding='VALID', name='conv2d', reuse=None):
536 |     """Helper for creating a 2d convolution operation.
537 | 
538 |     Parameters
539 |     ----------
540 |     x : tf.Tensor
541 |         Input tensor to convolve.
542 |     n_output : int
543 |         Number of filters.
544 |     k_h : int, optional
545 |         Kernel height
546 |     k_w : int, optional
547 |         Kernel width
548 |     d_h : int, optional
549 |         Height stride
550 |     d_w : int, optional
551 |         Width stride
552 |     padding : str, optional
553 |         Padding type: "SAME" or "VALID"
554 |     name : str, optional
555 |         Variable scope
556 | 
557 |     Returns
558 |     -------
559 |     op : tf.Tensor
560 |         Output of convolution
561 |     """
562 |     with tf.variable_scope(name or 'conv2d', reuse=reuse):
563 |         W = tf.get_variable(
564 |             name='W',
565 |             shape=[k_h, k_w, x.get_shape()[-1], n_output],
566 |             initializer=tf.contrib.layers.xavier_initializer_conv2d())
567 | 
568 |         conv = tf.nn.conv2d(
569 |             name='conv',
570 |             input=x,
571 |             filter=W,
572 |             strides=[1, d_h, d_w, 1],
573 |             padding=padding)
574 | 
575 |         b = tf.get_variable(
576 |             name='b',
577 |             shape=[n_output],
578 |             initializer=tf.constant_initializer(0.0))
579 | 
580 |         h = tf.nn.bias_add(
581 |             name='h',
582 |             value=conv,
583 |             bias=b)
584 | 
585 |     return h, W
586 | 
587 | 
588 | def deconv2d(x, n_output_h, n_output_w, n_output_ch, n_input_ch=None,
589 |              k_h=5, k_w=5, d_h=2, d_w=2,
590 |              padding='SAME', name='deconv2d', reuse=None):
591 |     """Deconvolution helper.
592 | 
593 |     Parameters
594 |     ----------
595 |     x : tf.Tensor
596 |         Input tensor to convolve.
597 |     n_output_h : int
598 |         Height of output
599 |     n_output_w : int
600 |         Width of output
601 |     n_output_ch : int
602 |         Number of filters.
603 |     k_h : int, optional
604 |         Kernel height
605 |     k_w : int, optional
606 |         Kernel width
607 |     d_h : int, optional
608 |         Height stride
609 |     d_w : int, optional
610 |         Width stride
611 |     padding : str, optional
612 |         Padding type: "SAME" or "VALID"
613 |     name : str, optional
614 |         Variable scope
615 | 
616 |     Returns
617 |     -------
618 |     op : tf.Tensor
619 |         Output of deconvolution
620 |     """
621 |     with tf.variable_scope(name or 'deconv2d', reuse=reuse):
622 |         W = tf.get_variable(
623 |             name='W',
624 |             shape=[k_h, k_h, n_output_ch, n_input_ch or x.get_shape()[-1]],
625 |             initializer=tf.contrib.layers.xavier_initializer_conv2d())
626 | 
627 |         conv = tf.nn.conv2d_transpose(
628 |             name='conv_t',
629 |             value=x,
630 |             filter=W,
631 |             output_shape=tf.pack(
632 |                 [tf.shape(x)[0], n_output_h, n_output_w, n_output_ch]),
633 |             strides=[1, d_h, d_w, 1],
634 |             padding=padding)
635 | 
636 |         conv.set_shape([None, n_output_h, n_output_w, n_output_ch])
637 | 
638 |         b = tf.get_variable(
639 |             name='b',
640 |             shape=[n_output_ch],
641 |             initializer=tf.constant_initializer(0.0))
642 | 
643 |         h = tf.nn.bias_add(name='h', value=conv, bias=b)
644 | 
645 |     return h, W
646 | 
647 | 
648 | def lrelu(features, leak=0.2):
649 |     """Leaky rectifier.
650 | 
651 |     Parameters
652 |     ----------
653 |     features : tf.Tensor
654 |         Input to apply leaky rectifier to.
655 |     leak : float, optional
656 |         Percentage of leak.
657 | 
658 |     Returns
659 |     -------
660 |     op : tf.Tensor
661 |         Resulting output of applying leaky rectifier activation.
662 |     """
663 |     f1 = 0.5 * (1 + leak)
664 |     f2 = 0.5 * (1 - leak)
665 |     return f1 * features + f2 * abs(features)
666 | 
667 | 
668 | def linear(x, n_output, name=None, activation=None, reuse=None):
669 |     """Fully connected layer.
670 | 
671 |     Parameters
672 |     ----------
673 |     x : tf.Tensor
674 |         Input tensor to connect
675 |     n_output : int
676 |         Number of output neurons
677 |     name : None, optional
678 |         Scope to apply
679 | 
680 |     Returns
681 |     -------
682 |     h, W : tf.Tensor, tf.Tensor
683 |         Output of fully connected layer and the weight matrix
684 |     """
685 |     if len(x.get_shape()) != 2:
686 |         x = flatten(x, reuse=reuse)
687 | 
688 |     n_input = x.get_shape().as_list()[1]
689 | 
690 |     with tf.variable_scope(name or "fc", reuse=reuse):
691 |         W = tf.get_variable(
692 |             name='W',
693 |             shape=[n_input, n_output],
694 |             dtype=tf.float32,
695 |             initializer=tf.contrib.layers.xavier_initializer())
696 | 
697 |         b = tf.get_variable(
698 |             name='b',
699 |             shape=[n_output],
700 |             dtype=tf.float32,
701 |             initializer=tf.constant_initializer(0.0))
702 | 
703 |         h = tf.nn.bias_add(
704 |             name='h',
705 |             value=tf.matmul(x, W),
706 |             bias=b)
707 | 
708 |         if activation:
709 |             h = activation(h)
710 | 
711 |         return h, W
712 | 
713 | 
714 | def flatten(x, name=None, reuse=None):
715 |     """Flatten Tensor to 2-dimensions.
716 | 
717 |     Parameters
718 |     ----------
719 |     x : tf.Tensor
720 |         Input tensor to flatten.
721 |     name : None, optional
722 |         Variable scope for flatten operations
723 | 
724 |     Returns
725 |     -------
726 |     flattened : tf.Tensor
727 |         Flattened tensor.
728 |     """
729 |     with tf.variable_scope('flatten'):
730 |         dims = x.get_shape().as_list()
731 |         if len(dims) == 4:
732 |             flattened = tf.reshape(
733 |                 x,
734 |                 shape=[-1, dims[1] * dims[2] * dims[3]])
735 |         elif len(dims) == 2 or len(dims) == 1:
736 |             flattened = x
737 |         else:
738 |             raise ValueError('Expected n dimensions of 1, 2 or 4.  Found:',
739 |                              len(dims))
740 | 
741 |         return flattened
742 | 
743 | 
744 | def to_tensor(x):
745 |     """Convert 2 dim Tensor to a 4 dim Tensor ready for convolution.
746 | 
747 |     Performs the opposite of flatten(x).  If the tensor is already 4-D, this
748 |     returns the same as the input, leaving it unchanged.
749 | 
750 |     Parameters
751 |     ----------
752 |     x : tf.Tesnor
753 |         Input 2-D tensor.  If 4-D already, left unchanged.
754 | 
755 |     Returns
756 |     -------
757 |     x : tf.Tensor
758 |         4-D representation of the input.
759 | 
760 |     Raises
761 |     ------
762 |     ValueError
763 |         If the tensor is not 2D or already 4D.
764 |     """
765 |     if len(x.get_shape()) == 2:
766 |         n_input = x.get_shape().as_list()[1]
767 |         x_dim = np.sqrt(n_input)
768 |         if x_dim == int(x_dim):
769 |             x_dim = int(x_dim)
770 |             x_tensor = tf.reshape(
771 |                 x, [-1, x_dim, x_dim, 1], name='reshape')
772 |         elif np.sqrt(n_input / 3) == int(np.sqrt(n_input / 3)):
773 |             x_dim = int(np.sqrt(n_input / 3))
774 |             x_tensor = tf.reshape(
775 |                 x, [-1, x_dim, x_dim, 3], name='reshape')
776 |         else:
777 |             x_tensor = tf.reshape(
778 |                 x, [-1, 1, 1, n_input], name='reshape')
779 |     elif len(x.get_shape()) == 4:
780 |         x_tensor = x
781 |     else:
782 |         raise ValueError('Unsupported input dimensions')
783 |     return x_tensor
784 | 
785 | 
786 | def genLandmarkMap(landmarks, shape=[39, 39]):
787 |     '''Generate landmark map according to landmarks.
788 |     Input params:
789 |     landmarks: K x 2 float 
790 |     shape: H x W
791 |     Output:
792 |     landmarkMap: H x W x K binary map. For each H x W
793 |     map, there's only one location nearest to the landmark
794 |     location filled with 1, else 0.'''
795 | 
796 |     landmarks = landmarks.reshape((-1, 2))
797 |     landmarkMap = np.zeros(shape + [len(landmarks)])
798 |     for (i, landmark) in enumerate(landmarks):
799 |         x = int(np.around(landmark[0] * shape[1]))
800 |         y = int(np.around(landmark[1] * shape[0]))
801 |         landmarkMap[y, x, i] = 1
802 |     return landmarkMap
803 | 
804 | 


--------------------------------------------------------------------------------
/model_eval.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | from datetime import datetime
  7 | from pathlib import Path
  8 | 
  9 | import math
 10 | import matplotlib
 11 | import numpy as np
 12 | import os.path
 13 | import tensorflow as tf
 14 | import time
 15 | from model_train import deepID
 16 | from libs.tfpipeline import input_pipeline
 17 | 
 18 | 
 19 | 
 20 | # Do not use a gui toolkit for matlotlib.
 21 | matplotlib.use('Agg')
 22 | 
 23 | FLAGS = tf.app.flags.FLAGS
 24 | 
 25 | 
 26 | tf.app.flags.DEFINE_string('checkpoint_dir', 'models/',
 27 |                            """Directory where to read model checkpoints.""")
 28 | 
 29 | # Flags governing the frequency of the eval.
 30 | tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5,
 31 |                             """How often to run the eval.""")
 32 | 
 33 | tf.app.flags.DEFINE_boolean('run_once', True,
 34 |                             """Whether to run eval only once.""")
 35 | 
 36 | # Flags governing the data used for the eval.
 37 | tf.app.flags.DEFINE_integer('num_examples', 3466,
 38 |                             """Number of examples to run.""")
 39 | tf.app.flags.DEFINE_integer('batch_size', 2,
 40 |                             """Number of examples per batch.""")
 41 | tf.app.flags.DEFINE_string('data_txt', 'tftest.txt',
 42 |                            """The text file containing test data path and annotations.""")
 43 | tf.app.flags.DEFINE_string('device', '/cpu:0', 'the device to eval on.')
 44 | 
 45 | def normalized_rmse(pred, gt_truth):
 46 |     # TODO: assert shapes
 47 |     #       remove 5
 48 |     norm = tf.sqrt(tf.reduce_sum(((gt_truth[:, 0, :] - gt_truth[:, 1, :])**2), 1))
 49 | 
 50 |     return tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(pred - gt_truth), 2)), 1) / (norm * 5)
 51 | 
 52 | 
 53 | 
 54 | 
 55 | def _eval_once(saver, rmse_op, network):
 56 |   """Runs Eval once.
 57 |   Args:
 58 |     saver: Saver.
 59 |     summary_writer: Summary writer.
 60 |     rmse_op: rmse_op.
 61 |     summary_op: Summary op.
 62 |   """
 63 |   with tf.Session() as sess:
 64 |     ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
 65 |     print(ckpt.model_checkpoint_path)
 66 |     if ckpt and ckpt.model_checkpoint_path:
 67 | 
 68 |       saver.restore(sess, ckpt.model_checkpoint_path)
 69 | 
 70 | 
 71 |       # Assuming model_checkpoint_path looks something like:
 72 |       #   /my-favorite-path/imagenet_train/model.ckpt-0,
 73 |       # extract global_step from it.
 74 |       global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
 75 |       print('Succesfully loaded model from %s at step=%s.' %
 76 |             (ckpt.model_checkpoint_path, global_step))
 77 |     else:
 78 |       print('No checkpoint file found')
 79 |       return
 80 |     test_x, test_label = input_pipeline(['tftest.txt'], batch_size=FLAGS.batch_size, shape=[39, 39, 1], is_training=False)
 81 |     # Start the queue runners.
 82 |     coord = tf.train.Coordinator()
 83 |     try:
 84 |       threads = []
 85 |       for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
 86 |         threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
 87 |                                          start=True))
 88 | 
 89 |       num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
 90 |       # Counts the number of correct predictions.
 91 |       errors = []
 92 | 
 93 |       total_sample_count = num_iter * FLAGS.batch_size
 94 |       step = 0
 95 | 
 96 |       print('%s: starting evaluation on (%s).' % (datetime.now(), 'tf/'))
 97 |       start_time = time.time()
 98 |       while step < num_iter and not coord.should_stop():
 99 |         test_xs, label = sess.run([test_x, test_label])
100 |         rmse = sess.run(rmse_op, feed_dict={network['x']: test_xs, network['y']: label, network['train']: False,
101 |                 network['keep_prob']: 0.5})
102 |         errors.append(rmse)
103 |         step += 1
104 |         if step % 20 == 0:
105 |           duration = time.time() - start_time
106 |           sec_per_batch = duration / 20.0
107 |           examples_per_sec = FLAGS.batch_size / sec_per_batch
108 |           print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f'
109 |                 'sec/batch)' % (datetime.now(), step, num_iter,
110 |                                 examples_per_sec, sec_per_batch))
111 |           start_time = time.time()
112 | 
113 |       errors = np.vstack(errors).ravel()
114 |       mean_rmse = errors.mean()
115 |       auc_at_08 = (errors < .08).mean()
116 |       auc_at_05 = (errors < .05).mean()
117 | 
118 | 
119 | 
120 |       print('Errors', errors.shape)
121 |       print('%s: mean_rmse = %.4f, auc @ 0.05 = %.4f, auc @ 0.08 = %.4f [%d examples]' %
122 |             (datetime.now(), errors.mean(), auc_at_05, auc_at_08, total_sample_count))
123 | 
124 | 
125 |     except Exception as e:  # pylint: disable=broad-except
126 |       coord.request_stop(e)
127 | 
128 |     coord.request_stop()
129 |     coord.join(threads, stop_grace_period_secs=10)
130 | 
131 |     
132 | 
133 | 
134 | 
135 | def evaluate(shape=[39, 39, 1]):
136 |   """Evaluate model on Dataset for a number of steps."""
137 |   with tf.Graph().as_default(), tf.device('/cpu:0'):
138 |     train_dir = Path(FLAGS.checkpoint_dir)
139 |     
140 |     images, landmarks = input_pipeline(
141 |             [FLAGS.data_txt], batch_size=2,
142 |             shape=shape, is_training=False)
143 | 
144 |     # mirrored_images, _, mirrored_inits, shapes = data_provider.batch_inputs(
145 |     #     [dataset_path], reference_shape,
146 |     #     batch_size=FLAGS.batch_size, is_training=False, mirror_image=True)
147 | 
148 |     print('Loading model...')
149 |     # Build a Graph that computes the logits predictions from the
150 |     # inference model.
151 |     with tf.device(FLAGS.device):
152 |         deepid = deepID(input_shape=[None, 39, 39, 1], n_filters=[20, 40, 60, 80], 
153 |             filter_sizes=[4, 3, 3, 2], activation=tf.nn.relu, dropout=False)
154 | 
155 |         tf.get_variable_scope().reuse_variables()
156 | 
157 | 
158 | 
159 |     avg_pred = deepid['pred']
160 |     gt_truth = deepid['y']
161 |     gt_truth = tf.reshape(gt_truth, (-1, 5, 2))
162 |     # Calculate predictions.
163 |     norm_error = normalized_rmse(avg_pred, gt_truth)
164 | 
165 |     # Restore the moving average version of the learned variables for eval.
166 |     # variable_averages = tf.train.ExponentialMovingAverage(
167 |         # 0.9999)
168 |     # variables_to_restore = variable_averages.variables_to_restore()
169 |     saver = tf.train.Saver()
170 | 
171 | 
172 |     while True:
173 |       _eval_once(saver, norm_error, deepid)
174 |       if FLAGS.run_once:
175 |         break
176 |       time.sleep(FLAGS.eval_interval_secs)
177 | 
178 | if __name__ == '__main__':
179 |     evaluate()


--------------------------------------------------------------------------------
/model_train.py:
--------------------------------------------------------------------------------
  1 | """Convolutional neural network for face alignment.
  2 | 
  3 | Copyright Mario S. Lew, Oct 2016
  4 | """
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import os
  8 | from libs.tfpipeline import input_pipeline
  9 | from libs.batch_norm import batch_norm
 10 | from libs import utils
 11 | from numpy.linalg import norm
 12 | import h5py
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | def deepID(input_shape=[None, 39, 39, 1],
 16 |         n_filters=[20, 40, 60, 80],
 17 |         filter_sizes=[4, 3, 3, 2],
 18 |         activation=tf.nn.relu,
 19 |         dropout=False):
 20 |     """DeepID.
 21 | 
 22 |     Uses tied weights.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     input_shape : list, optional
 27 |         Shape of the input to the network. e.g. for MNIST: [None, 784].
 28 |     n_filters : list, optional
 29 |         Number of filters for each layer.
 30 |         If convolutional=True, this refers to the total number of output
 31 |         filters to create for each layer, with each layer's number of output
 32 |         filters as a list.
 33 |         If convolutional=False, then this refers to the total number of neurons
 34 |         for each layer in a fully connected network.
 35 |     filter_sizes : list, optional
 36 |         Only applied when convolutional=True.  This refers to the ksize (height
 37 |         and width) of each convolutional layer.
 38 |     activation : function, optional
 39 |         Activation function to apply to each layer, e.g. tf.nn.relu
 40 |     dropout : bool, optional
 41 |         Whether or not to apply dropout.  If using dropout, you must feed a
 42 |         value for 'keep_prob', as returned in the dictionary.  1.0 means no
 43 |         dropout is used.  0.0 means every connection is dropped.  Sensible
 44 |         values are between 0.5-0.8.
 45 | 
 46 |     Returns
 47 |     -------
 48 |     model : dict
 49 |         {
 50 |             'cost': Tensor to optimize.
 51 |             'Ws': All weights of the encoder.
 52 |             'x': Input Placeholder
 53 |             'z': Inner most encoding Tensor (latent features)
 54 |             'y': Reconstruction of the Decoder
 55 |             'keep_prob': Amount to keep when using Dropout
 56 |             'corrupt_prob': Amount to corrupt when using Denoising
 57 |             'train': Set to True when training/Applies to Batch Normalization.
 58 |         }
 59 |     """
 60 |     # network input / placeholders for train (bn) and dropout
 61 |     x = tf.placeholder(tf.float32, input_shape, 'x')
 62 |     y = tf.placeholder(tf.float32, [None, 10], 'y')
 63 |     phase_train = tf.placeholder(tf.bool, name='phase_train')
 64 |     keep_prob = tf.placeholder(tf.float32, name='keep_prob')
 65 | 
 66 |     # 2d -> 4d if convolution
 67 |     x_tensor = utils.to_tensor(x)
 68 |     current_input = x_tensor
 69 | 
 70 |     Ws = []
 71 |     shapes = []
 72 | 
 73 |     # Build the encoder
 74 |     shapes.append(current_input.get_shape().as_list())
 75 |     conv1, W = utils.conv2d(x=x_tensor,
 76 |                         n_output=n_filters[0],
 77 |                         k_h=filter_sizes[0],
 78 |                         k_w=filter_sizes[0],
 79 |                         d_w=1,
 80 |                         d_h=1,
 81 |                         name='conv1')
 82 |     Ws.append(W)
 83 |     # conv1 = activation(batch_norm(conv1, phase_train, 'bn1'))
 84 |     conv1 = activation(conv1)
 85 | 
 86 | 
 87 |     pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
 88 | 
 89 |     conv2, W = utils.conv2d(x=pool1,
 90 |                         n_output=n_filters[1],
 91 |                         k_h=filter_sizes[1],
 92 |                         k_w=filter_sizes[1],
 93 |                         d_w=1,
 94 |                         d_h=1,
 95 |                         name='conv2')
 96 |     Ws.append(W)
 97 |     # conv2 = activation(batch_norm(conv2, phase_train, 'bn2'))
 98 |     conv2 = activation(conv2)
 99 | 
100 |     pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')
101 | 
102 |     conv3, W = utils.conv2d(x=pool2,
103 |                         n_output=n_filters[2],
104 |                         k_h=filter_sizes[2],
105 |                         k_w=filter_sizes[2],
106 |                         d_w=1,
107 |                         d_h=1,
108 |                         name='conv3')
109 |     Ws.append(W)
110 |     # conv3 = activation(batch_norm(conv3, phase_train, 'bn3'))
111 |     conv3 = activation(conv3)
112 | 
113 |     pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3')
114 | 
115 |     conv4, W = utils.conv2d(x=pool3,
116 |                         n_output=n_filters[3],
117 |                         k_h=filter_sizes[3],
118 |                         k_w=filter_sizes[3],
119 |                         d_w=1,
120 |                         d_h=1,
121 |                         name='conv4')
122 |     Ws.append(W)
123 |     # conv4 = activation(batch_norm(conv4, phase_train, 'bn4'))
124 |     conv4 = activation(conv4)
125 | 
126 |     pool3_flat = utils.flatten(pool3)
127 |     conv4_flat = utils.flatten(conv4)
128 |     concat = tf.concat(1, [pool3_flat, conv4_flat], name='concat')
129 | 
130 |     ip1, W = utils.linear(concat, 120, name='ip1')
131 |     Ws.append(W)
132 |     ip1 = activation(ip1)
133 |     if dropout:
134 |         ip1 = tf.nn.dropout(ip1, keep_prob)
135 | 
136 |     ip2, W = utils.linear(ip1, 10, name='ip2')
137 |     Ws.append(W)
138 |     # ip2 = activation(ip2)
139 | 
140 |     p_flat = utils.flatten(ip2)
141 |     y_flat = utils.flatten(y)
142 | 
143 |     regularizers = 5e-4 *(tf.nn.l2_loss(Ws[-1]) + tf.nn.l2_loss(Ws[-2]))
144 |     # l2 loss
145 |     loss_x = tf.reduce_sum(tf.squared_difference(p_flat, y_flat), 1)
146 |     cost = tf.reduce_mean(loss_x) + regularizers
147 |     prediction = tf.reshape(p_flat, (-1, 5, 2))
148 | 
149 |     return {'cost': cost, 'Ws': Ws,
150 |             'x': x, 'y': y, 'pred': prediction,
151 |             'keep_prob': keep_prob,
152 |             'train': phase_train}
153 | 
154 | def normalized_rmse(pred, gt_truth):
155 |     # TODO: assert shapes
156 |     #       remove 5
157 |     norm = tf.sqrt(tf.reduce_sum(((gt_truth[:, 0, :] - gt_truth[:, 1, :])**2), 1))
158 | 
159 |     return tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(pred - gt_truth), 2)), 1) / (norm * 5)
160 | 
161 | def evaluateError(landmarkGt, landmarkP):
162 |     e = np.zeros(5)
163 |     ocular_dist = norm(landmarkGt[1] - landmarkGt[0])
164 |     for i in range(5):
165 |         e[i] = norm(landmarkGt[i] - landmarkP[i])
166 |     e = e / ocular_dist
167 |     return e
168 | 
169 | def evaluateBatchError(landmarkGt, landmarkP, batch_size):
170 |     e = np.zeros([batch_size, 5])
171 |     for i in range(batch_size):
172 |         e[i] = evaluateError(landmarkGt[i], landmarkP[i])
173 |     mean_err = e.mean(axis=0)
174 |     return mean_err
175 | 
176 | def train_deepid(input_shape=[None, 39, 39, 1],
177 |                 n_filters=[20, 40, 60, 80],
178 |                 filter_sizes=[4, 3, 3, 2],
179 |                 activation=tf.nn.relu,
180 |                 dropout=False,
181 |                 batch_size=64):
182 |     batch_x, label_x = input_pipeline(['tftrain.txt'], batch_size=batch_size, shape=[39, 39, 1], is_training=True)
183 |     # with h5py.File('../../train/1_F/train.h5', 'r') as hdf:
184 |     #     data = hdf['data'][:]
185 |     #     label = hdf['landmark'][:]
186 |     # length = len(label)
187 |     # test_x, test_label = input_pipeline(['tfboy.txt'], batch_size=batch_size, shape=input_shape[1:], is_training=False)
188 |     
189 |     deepid = deepID(input_shape=input_shape, n_filters=n_filters, filter_sizes=filter_sizes, activation=activation,
190 |         dropout=dropout)
191 | 
192 |     batch = tf.Variable(0, dtype=tf.int32)
193 |     learning_rate = tf.train.exponential_decay(0.005, batch * batch_size, 150000, 0.95, staircase=True)
194 |     optimizer = tf.train.AdamOptimizer(
195 |         learning_rate).minimize(deepid['cost'], global_step=batch)
196 |     save_step = 10000
197 |     saver = tf.train.Saver()
198 |      
199 |     with tf.Session() as sess:
200 |         saver = tf.train.Saver(max_to_keep=5)
201 |         sess.run(tf.initialize_all_variables())
202 |         #ckpt = tf.train.get_checkpoint_state('models')
203 |         #if ckpt and ckpt.model_checkpoint_path:
204 |         #    print("Continue training from the model {}".format(ckpt.model_checkpoint_path))
205 |         #    saver.restore(sess, ckpt.model_checkpoint_path)
206 |         coord = tf.train.Coordinator()
207 | 
208 |         # Ensure no more changes to graph
209 |         tf.get_default_graph().finalize()
210 | 
211 |         # Start up the queues for handling the image pipeline
212 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
213 | 
214 |         batch_i = 0
215 | 
216 |         # start_idx = 0
217 |         # end_idx = 0
218 |         for i in range(1000000):
219 |             batch_i += 1
220 |             # import pdb; pdb.set_trace()
221 |             batch_xs, batch_label = sess.run([batch_x, label_x])
222 | 
223 |             # print(batch_xs[0].shape)
224 |             
225 |             # end_idx = (start_idx + batch_size) % length
226 |             # if start_idx + batch_size >= length:
227 |             #     batch_xs = np.vstack((data[start_idx:], data[:end_idx]))
228 |             #     batch_label = np.vstack((label[start_idx:], label[:end_idx]))
229 |             # else:
230 |             #     batch_xs = data[start_idx:end_idx]
231 |             #     batch_label = label[start_idx:end_idx]
232 |             # batch_xs = batch_xs.reshape((batch_size, 39, 39, 1))
233 |             # start_idx = end_idx
234 | 
235 | 
236 |             # print(np.max(batch_xs), np.min(batch_xs))
237 |             train_cost, pred = sess.run([deepid['cost'], deepid['pred'], optimizer], feed_dict={
238 |                 deepid['x']: batch_xs, deepid['y']: batch_label, deepid['train']: True,
239 |                 deepid['keep_prob']: 0.5})[:2]
240 |             if batch_i % 100 == 0:
241 |                 print(batch_i, train_cost)
242 |                 lr = sess.run(learning_rate)
243 |                 print('lr: %.10f' % lr)
244 |                 # id = np.random.randint(10)
245 |                 id = 0
246 |                 batch_label = batch_label.reshape([-1,5,2])
247 |                 print('label: ' + np.array_str(batch_label[id]))
248 |                 print('pred:  ' + np.array_str(pred[id]))
249 | 
250 |                 err = evaluateBatchError(batch_label, pred, batch_size)
251 |                 print('Mean error:' + np.array_str(err))
252 |                 # print(batch_label[0])
253 |                 # plt.imshow(batch_xs[0].reshape((39,39)))
254 |                 # import pdb; pdb.set_trace()
255 |                 # break
256 |                 
257 | 
258 |             if batch_i % save_step == 0:
259 |                 # Save the variables to disk.
260 |                 saver.save(sess, "./models/" + 'deepid.ckpt',
261 |                            global_step=batch_i,
262 |                            write_meta_graph=False)
263 |         
264 |         coord.request_stop()
265 |         coord.join(threads)
266 | 
267 | if __name__ == '__main__':
268 |     train_deepid()
269 | 
270 | 
271 | 
272 | 
273 | 


--------------------------------------------------------------------------------