├── README.md └── Zero-shot Classification by Deep Learning ├── CADL_Project_DLZSL_Final.ipynb ├── Zero-shot Classification by Deep Learning_kadenze.gif ├── Zero-shot Classification by Deep Learning_kadenze.pptx ├── Zero-shot Classification by Deep Learning_kadenze.wmv ├── images_project ├── ae_graph.png ├── ae_result.png ├── concept_zsl.jpg ├── cub.jpg ├── deep_regression_graph.png ├── deep_regression_result.png ├── fea_example.png └── regression_result.png ├── libs ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── batch_norm.cpython-35.pyc │ ├── celeb_vaegan.cpython-35.pyc │ ├── charrnn.cpython-35.pyc │ ├── dataset_utils.cpython-35.pyc │ ├── datasets.cpython-35.pyc │ ├── dft.cpython-35.pyc │ ├── gif.cpython-35.pyc │ ├── i2v.cpython-35.pyc │ ├── inception.cpython-35.pyc │ ├── nb_utils.cpython-35.pyc │ ├── utils.cpython-35.pyc │ └── vgg16.cpython-35.pyc ├── batch_norm.py ├── celeb_vaegan.py ├── charrnn.py ├── dataset_utils.py ├── datasets.py ├── deepdream.py ├── dft.py ├── gan.py ├── gif.py ├── i2v.py ├── inception.py ├── nb_utils.py ├── stylenet.py ├── utils.py ├── vae.py ├── vaegan.py └── vgg16.py ├── slim_birds_final.ipynb └── testclasses_akata.txt /README.md: -------------------------------------------------------------------------------- 1 | # zsl-deep-learning 2 | This doc shows zero-shot learning experiments by deep learning. 3 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.gif -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.pptx -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.wmv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.wmv -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/ae_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/ae_graph.png -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/ae_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/ae_result.png -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/concept_zsl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/concept_zsl.jpg -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/cub.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/cub.jpg -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/deep_regression_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/deep_regression_graph.png -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/deep_regression_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/deep_regression_result.png -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/fea_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/fea_example.png -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/images_project/regression_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/regression_result.png -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/batch_norm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/batch_norm.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/celeb_vaegan.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/celeb_vaegan.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/charrnn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/charrnn.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/dataset_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/dataset_utils.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/datasets.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/datasets.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/dft.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/dft.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/gif.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/gif.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/i2v.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/i2v.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/inception.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/inception.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/nb_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/nb_utils.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/__pycache__/vgg16.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/vgg16.cpython-35.pyc -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/batch_norm.py: -------------------------------------------------------------------------------- 1 | """Batch Normalization for TensorFlow. 2 | Parag K. Mital, Jan 2016. 3 | """ 4 | 5 | import tensorflow as tf 6 | from tensorflow.python.ops import control_flow_ops 7 | 8 | 9 | def batch_norm(x, phase_train, name='bn', decay=0.9, reuse=None, 10 | affine=True): 11 | """ 12 | Batch normalization on convolutional maps. 13 | from: https://stackoverflow.com/questions/33949786/how-could-i- 14 | use-batch-normalization-in-tensorflow 15 | Only modified to infer shape from input tensor x. 16 | Parameters 17 | ---------- 18 | x 19 | Tensor, 4D BHWD input maps 20 | phase_train 21 | boolean tf.Variable, true indicates training phase 22 | name 23 | string, variable name 24 | affine 25 | whether to affine-transform outputs 26 | Return 27 | ------ 28 | normed 29 | batch-normalized maps 30 | """ 31 | with tf.variable_scope(name, reuse=reuse): 32 | shape = x.get_shape().as_list() 33 | beta = tf.get_variable(name='beta', shape=[shape[-1]], 34 | initializer=tf.constant_initializer(0.0), 35 | trainable=True) 36 | gamma = tf.get_variable(name='gamma', shape=[shape[-1]], 37 | initializer=tf.constant_initializer(1.0), 38 | trainable=affine) 39 | if len(shape) == 4: 40 | batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') 41 | else: 42 | batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') 43 | ema = tf.train.ExponentialMovingAverage(decay=decay) 44 | ema_apply_op = ema.apply([batch_mean, batch_var]) 45 | ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) 46 | 47 | def mean_var_with_update(): 48 | """Summary 49 | Returns 50 | ------- 51 | name : TYPE 52 | Description 53 | """ 54 | with tf.control_dependencies([ema_apply_op]): 55 | return tf.identity(batch_mean), tf.identity(batch_var) 56 | mean, var = control_flow_ops.cond(phase_train, 57 | mean_var_with_update, 58 | lambda: (ema_mean, ema_var)) 59 | 60 | # tf.nn.batch_normalization 61 | normed = tf.nn.batch_norm_with_global_normalization( 62 | x, mean, var, beta, gamma, 1e-6, affine) 63 | return normed 64 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/celeb_vaegan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creative Applications of Deep Learning w/ Tensorflow. 3 | Kadenze, Inc. 4 | Copyright Parag K. Mital, June 2016. 5 | """ 6 | import numpy as np 7 | import tensorflow as tf 8 | from tensorflow.python.platform import gfile 9 | from .utils import download 10 | from skimage.transform import resize as imresize 11 | 12 | 13 | def celeb_vaegan_download(): 14 | """Download a pretrained celeb vae/gan network.""" 15 | 16 | # Load the model and labels 17 | model = download('https://s3.amazonaws.com/cadl/models/celeb.vaegan.tfmodel') 18 | labels = download('https://s3.amazonaws.com/cadl/celeb-align/list_attr_celeba.txt') 19 | return model, labels 20 | 21 | 22 | def get_celeb_vaegan_model(): 23 | """Get a pretrained model. 24 | 25 | Returns 26 | ------- 27 | net : dict 28 | { 29 | 'graph_def': tf.GraphDef 30 | The graph definition 31 | 'labels': list 32 | List of different possible attributes from celeb 33 | 'attributes': np.ndarray 34 | One hot encoding of the attributes per image 35 | [n_els x n_labels] 36 | 'preprocess': function 37 | Preprocess function 38 | } 39 | """ 40 | # Download the trained net 41 | model, labels = celeb_vaegan_download() 42 | 43 | # Parse the ids and synsets 44 | txt = open(labels).readlines() 45 | n_els = int(txt[0].strip()) 46 | labels = txt[1].strip().split() 47 | n_labels = len(labels) 48 | attributes = np.zeros((n_els, n_labels), dtype=bool) 49 | for i, txt_i in enumerate(txt[2:]): 50 | attributes[i] = (np.array(txt_i.strip().split()[1:]).astype(int) > 0) 51 | 52 | # Load the saved graph 53 | with gfile.GFile(model, 'rb') as f: 54 | graph_def = tf.GraphDef() 55 | try: 56 | graph_def.ParseFromString(f.read()) 57 | except: 58 | print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' + 59 | 'to environment. e.g.:\n' + 60 | 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' + 61 | 'See here for info: ' + 62 | 'https://github.com/tensorflow/tensorflow/issues/582') 63 | net = { 64 | 'graph_def': graph_def, 65 | 'labels': labels, 66 | 'attributes': attributes, 67 | 'preprocess': preprocess, 68 | } 69 | return net 70 | 71 | 72 | def preprocess(img, crop_factor=0.8): 73 | """Replicate the preprocessing we did on the VAE/GAN. 74 | 75 | This model used a crop_factor of 0.8 and crop size of [100, 100, 3]. 76 | """ 77 | crop = np.min(img.shape[:2]) 78 | r = (img.shape[0] - crop) // 2 79 | c = (img.shape[1] - crop) // 2 80 | cropped = img[r: r + crop, c: c + crop] 81 | r, c, *d = cropped.shape 82 | if crop_factor < 1.0: 83 | amt = (1 - crop_factor) / 2 84 | h, w = int(c * amt), int(r * amt) 85 | cropped = cropped[h:-h, w:-w] 86 | rsz = imresize(cropped, (100, 100), preserve_range=False) 87 | return rsz 88 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/charrnn.py: -------------------------------------------------------------------------------- 1 | """Creative Applications of Deep Learning w/ Tensorflow. 2 | Kadenze, Inc. 3 | Copyright Parag K. Mital, June 2016. 4 | 5 | TODO: 6 | argparse 7 | better sound example/model 8 | prime with text input 9 | """ 10 | 11 | import tensorflow as tf 12 | import numpy as np 13 | import os 14 | import sys 15 | from six.moves import urllib 16 | import collections 17 | 18 | 19 | def build_model(txt, 20 | batch_size=1, 21 | sequence_length=1, 22 | n_layers=2, 23 | n_cells=100, 24 | gradient_clip=10.0, 25 | learning_rate=0.001): 26 | 27 | vocab = list(set(txt)) 28 | vocab.sort() 29 | n_chars = len(vocab) 30 | encoder = collections.OrderedDict(zip(vocab, range(n_chars))) 31 | decoder = collections.OrderedDict(zip(range(n_chars), vocab)) 32 | 33 | X = tf.placeholder(tf.int32, [None, sequence_length], name='X') 34 | Y = tf.placeholder(tf.int32, [None, sequence_length], name='Y') 35 | keep_prob = tf.placeholder(tf.float32, name='keep_prob') 36 | 37 | with tf.variable_scope('embedding'): 38 | embedding = tf.get_variable("embedding", [n_chars, n_cells]) 39 | # Each sequence element will be connected to n_cells 40 | Xs = tf.nn.embedding_lookup(embedding, X) 41 | # Then slice each sequence element 42 | Xs = tf.split(1, sequence_length, Xs) 43 | # Get rid of singleton sequence element dimension 44 | Xs = [tf.squeeze(X_i, [1]) for X_i in Xs] 45 | 46 | with tf.variable_scope('rnn'): 47 | cells = tf.nn.rnn_cell.BasicLSTMCell( 48 | num_units=n_cells, forget_bias=0.0, state_is_tuple=True) 49 | initial_state = cells.zero_state(tf.shape(X)[0], tf.float32) 50 | if n_layers > 1: 51 | cells = tf.nn.rnn_cell.MultiRNNCell( 52 | [cells] * n_layers, state_is_tuple=True) 53 | initial_state = cells.zero_state(tf.shape(X)[0], tf.float32) 54 | cells = tf.nn.rnn_cell.DropoutWrapper( 55 | cells, output_keep_prob=keep_prob) 56 | outputs, final_state = tf.nn.rnn( 57 | cells, Xs, initial_state=initial_state) 58 | outputs_flat = tf.reshape(tf.concat(1, outputs), [-1, n_cells]) 59 | 60 | with tf.variable_scope('prediction'): 61 | W = tf.get_variable( 62 | "W", 63 | shape=[n_cells, n_chars], 64 | initializer=tf.contrib.layers.xavier_initializer()) 65 | b = tf.get_variable( 66 | "b", 67 | shape=[n_chars], 68 | initializer=tf.constant_initializer()) 69 | logits = tf.matmul(outputs_flat, W) + b 70 | probs = tf.nn.softmax(logits) 71 | Y_pred = tf.argmax(probs, 1) 72 | 73 | with tf.variable_scope('loss'): 74 | loss = tf.nn.seq2seq.sequence_loss_by_example( 75 | [logits], 76 | [tf.reshape(tf.concat(1, Y), [-1])], 77 | [tf.ones([batch_size * sequence_length])]) 78 | cost = tf.reduce_sum(loss) / batch_size 79 | 80 | with tf.name_scope('optimizer'): 81 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 82 | gradients = [] 83 | clip = tf.constant(gradient_clip, name="clip") 84 | for grad, var in optimizer.compute_gradients(cost): 85 | gradients.append((tf.clip_by_value(grad, -clip, clip), var)) 86 | updates = optimizer.apply_gradients(gradients) 87 | 88 | model = {'X': X, 'Y': Y, 'logits': logits, 'probs': probs, 89 | 'Y_pred': Y_pred, 'keep_prob': keep_prob, 90 | 'cost': cost, 'updates': updates, 'initial_state': initial_state, 91 | 'final_state': final_state, 'decoder': decoder, 'encoder': encoder, 92 | 'vocab_size': n_chars} 93 | return model 94 | 95 | 96 | def train(txt, batch_size=100, sequence_length=150, n_cells=100, n_layers=3, 97 | learning_rate=0.00001, max_iter=50000, gradient_clip=5.0, 98 | ckpt_name="model.ckpt", keep_prob=1.0): 99 | 100 | g = tf.Graph() 101 | with tf.Session(graph=g) as sess: 102 | model = build_model(txt=txt, 103 | batch_size=batch_size, 104 | sequence_length=sequence_length, 105 | n_layers=n_layers, 106 | n_cells=n_cells, 107 | gradient_clip=gradient_clip, 108 | learning_rate=learning_rate) 109 | 110 | init_op = tf.initialize_all_variables() 111 | saver = tf.train.Saver() 112 | sess.run(init_op) 113 | if os.path.exists(ckpt_name): 114 | saver.restore(sess, ckpt_name) 115 | print("Model restored.") 116 | 117 | cursor = 0 118 | it_i = 0 119 | print_step = 100 120 | avg_cost = 0 121 | while it_i < max_iter: 122 | Xs, Ys = [], [] 123 | for batch_i in range(batch_size): 124 | Xs.append([model['encoder'][ch] 125 | for ch in txt[cursor:cursor + sequence_length]]) 126 | Ys.append([model['encoder'][ch] 127 | for ch in txt[cursor + 1: 128 | cursor + sequence_length + 1]]) 129 | cursor += sequence_length 130 | if (cursor + 1) >= len(txt) - sequence_length - 1: 131 | cursor = np.random.randint(0, high=sequence_length) 132 | 133 | feed_dict = {model['X']: Xs, model['Y']: Ys, model['keep_prob']: keep_prob} 134 | out = sess.run([model['cost'], model['updates']], feed_dict=feed_dict) 135 | avg_cost += out[0] 136 | 137 | if (it_i + 1) % print_step == 0: 138 | p = sess.run(model['probs'], feed_dict={ 139 | model['X']: np.array(Xs[-1])[np.newaxis], model['keep_prob']: 1.0}) 140 | print(p.shape, 'min:', np.min(p), 'max:', np.max(p), 141 | 'mean:', np.mean(p), 'std:', np.std(p)) 142 | if isinstance(txt[0], str): 143 | # Print original string 144 | print('original:', "".join( 145 | [model['decoder'][ch] for ch in Xs[-1]])) 146 | 147 | # Print max guess 148 | amax = [] 149 | for p_i in p: 150 | amax.append(model['decoder'][np.argmax(p_i)]) 151 | print('synth(amax):', "".join(amax)) 152 | 153 | # Print w/ sampling 154 | samp = [] 155 | for p_i in p: 156 | p_i = p_i.astype(np.float64) 157 | p_i = p_i / p_i.sum() 158 | idx = np.argmax(np.random.multinomial(1, p_i.ravel())) 159 | samp.append(model['decoder'][idx]) 160 | print('synth(samp):', "".join(samp)) 161 | 162 | print(it_i, avg_cost / print_step) 163 | avg_cost = 0 164 | 165 | save_path = saver.save(sess, "./" + ckpt_name, global_step=it_i) 166 | print("Model saved in file: %s" % save_path) 167 | 168 | print(it_i, out[0], end='\r') 169 | it_i += 1 170 | 171 | return model 172 | 173 | 174 | def infer(txt, ckpt_name, n_iterations, n_cells=512, n_layers=3, 175 | learning_rate=0.001, max_iter=5000, gradient_clip=10.0, 176 | init_value=[0], keep_prob=1.0, sampling='prob', temperature=1.0): 177 | 178 | g = tf.Graph() 179 | with tf.Session(graph=g) as sess: 180 | sequence_length = len(init_value) 181 | model = build_model(txt=txt, 182 | batch_size=1, 183 | sequence_length=sequence_length, 184 | n_layers=n_layers, 185 | n_cells=n_cells, 186 | gradient_clip=gradient_clip, 187 | learning_rate=learning_rate) 188 | 189 | init_op = tf.initialize_all_variables() 190 | saver = tf.train.Saver() 191 | sess.run(init_op) 192 | if os.path.exists(ckpt_name): 193 | saver.restore(sess, ckpt_name) 194 | print("Model restored.") 195 | 196 | state = [] 197 | synth = [init_value] 198 | for s_i in model['final_state']: 199 | state += sess.run([s_i.c, s_i.h], feed_dict={ 200 | model['X']: [synth[-1]], model['keep_prob']: keep_prob}) 201 | 202 | for i in range(n_iterations): 203 | # print('iteration: {}/{}'.format(i, n_iterations), end='\r') 204 | feed_dict = {model['X']: [synth[-1]], 205 | model['keep_prob']: keep_prob} 206 | state_updates = [] 207 | for state_i in range(n_layers): 208 | feed_dict[model['initial_state'][state_i].c] = state[state_i * 2] 209 | feed_dict[model['initial_state'][state_i].h] = state[state_i * 2 + 1] 210 | state_updates.append(model['final_state'][state_i].c) 211 | state_updates.append(model['final_state'][state_i].h) 212 | p = sess.run(model['probs'], feed_dict=feed_dict)[0] 213 | if sampling == 'max': 214 | p = np.argmax(p) 215 | else: 216 | p = p.astype(np.float64) 217 | p = np.log(p) / temperature 218 | p = np.exp(p) / np.sum(np.exp(p)) 219 | p = np.random.multinomial(1, p.ravel()) 220 | p = np.argmax(p) 221 | # Get the current state 222 | state = [sess.run(s_i, feed_dict=feed_dict) 223 | for s_i in state_updates] 224 | synth.append([p]) 225 | print(model['decoder'][p], end='') 226 | sys.stdout.flush() 227 | if model['decoder'][p] in ['.', '?', '!']: 228 | print('\n') 229 | print(np.concatenate(synth).shape) 230 | print("".join([model['decoder'][ch] for ch in np.concatenate(synth)])) 231 | return [model['decoder'][ch] for ch in np.concatenate(synth)] 232 | 233 | 234 | def test_alice(): 235 | f, _ = urllib.request.urlretrieve( 236 | 'https://www.gutenberg.org/cache/epub/11/pg11.txt', 'alice.txt') 237 | with open(f, 'r') as fp: 238 | txt = fp.read() 239 | train(txt, max_iter=50000) 240 | 241 | 242 | def test_trump(): 243 | with open('trump.txt', 'r') as fp: 244 | txt = fp.read() 245 | # train(txt, max_iter=50000) 246 | print(infer(txt, 'trump.ckpt', 50000)) 247 | 248 | 249 | def test_wtc(): 250 | from scipy.io.wavfile import write, read 251 | rate, aud = read('wtc.wav') 252 | txt = np.int8(np.round(aud / 16384.0 * 128.0)) 253 | txt = np.squeeze(txt).tolist() 254 | train(txt, sequence_length=250, n_layers=3, n_cells=512, max_iter=100000) 255 | synthesis = infer(txt, 'model.ckpt', 8000 * 30, n_layers=3, 256 | n_cells=150, keep_prob=1.0, sampling='prob') 257 | snd = np.int16(np.array(synthesis) / 128.0 * 16384.0) 258 | write('wtc-synth.wav', 8000, snd) 259 | 260 | 261 | if __name__ == '__main__': 262 | test_alice() 263 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/dataset_utils.py: -------------------------------------------------------------------------------- 1 | """Utils for dataset creation. 2 | 3 | Creative Applications of Deep Learning w/ Tensorflow. 4 | Kadenze, Inc. 5 | Copyright Parag K. Mital, June 2016. 6 | """ 7 | 8 | import os 9 | import pickle 10 | import numpy as np 11 | import tensorflow as tf 12 | from . import dft 13 | from .utils import download_and_extract_tar 14 | 15 | 16 | def create_input_pipeline(files, batch_size, n_epochs, shape, crop_shape=None, 17 | crop_factor=1.0, n_threads=2): 18 | """Creates a pipefile from a list of image files. 19 | Includes batch generator/central crop/resizing options. 20 | The resulting generator will dequeue the images batch_size at a time until 21 | it throws tf.errors.OutOfRangeError when there are no more images left in 22 | the queue. 23 | 24 | Parameters 25 | ---------- 26 | files : list 27 | List of paths to image files. 28 | batch_size : int 29 | Number of image files to load at a time. 30 | n_epochs : int 31 | Number of epochs to run before raising tf.errors.OutOfRangeError 32 | shape : list 33 | [height, width, channels] 34 | crop_shape : list 35 | [height, width] to crop image to. 36 | crop_factor : float 37 | Percentage of image to take starting from center. 38 | n_threads : int, optional 39 | Number of threads to use for batch shuffling 40 | """ 41 | 42 | # We first create a "producer" queue. It creates a production line which 43 | # will queue up the file names and allow another queue to deque the file 44 | # names all using a tf queue runner. 45 | # Put simply, this is the entry point of the computational graph. 46 | # It will generate the list of file names. 47 | # We also specify it's capacity beforehand. 48 | producer = tf.train.string_input_producer( 49 | files, capacity=len(files)) 50 | 51 | # We need something which can open the files and read its contents. 52 | reader = tf.WholeFileReader() 53 | 54 | # We pass the filenames to this object which can read the file's contents. 55 | # This will create another queue running which dequeues the previous queue. 56 | keys, vals = reader.read(producer) 57 | 58 | # And then have to decode its contents as we know it is a jpeg image 59 | imgs = tf.image.decode_jpeg( 60 | vals, 61 | channels=3 if len(shape) > 2 and shape[2] == 3 else 0) 62 | 63 | # We have to explicitly define the shape of the tensor. 64 | # This is because the decode_jpeg operation is still a node in the graph 65 | # and doesn't yet know the shape of the image. Future operations however 66 | # need explicit knowledge of the image's shape in order to be created. 67 | imgs.set_shape(shape) 68 | 69 | # Next we'll centrally crop the image to the size of 100x100. 70 | # This operation required explicit knowledge of the image's shape. 71 | if shape[0] > shape[1]: 72 | rsz_shape = [int(shape[0] / shape[1] * crop_shape[0] / crop_factor), 73 | int(crop_shape[1] / crop_factor)] 74 | else: 75 | rsz_shape = [int(crop_shape[0] / crop_factor), 76 | int(shape[1] / shape[0] * crop_shape[1] / crop_factor)] 77 | rszs = tf.image.resize_images(imgs, rsz_shape) 78 | crops = (tf.image.resize_image_with_crop_or_pad( 79 | rszs, crop_shape[0], crop_shape[1]) 80 | if crop_shape is not None 81 | else imgs) 82 | 83 | # Now we'll create a batch generator that will also shuffle our examples. 84 | # We tell it how many it should have in its buffer when it randomly 85 | # permutes the order. 86 | min_after_dequeue = len(files) // 100 87 | 88 | # The capacity should be larger than min_after_dequeue, and determines how 89 | # many examples are prefetched. TF docs recommend setting this value to: 90 | # min_after_dequeue + (num_threads + a small safety margin) * batch_size 91 | capacity = min_after_dequeue + (n_threads + 1) * batch_size 92 | 93 | # Randomize the order and output batches of batch_size. 94 | batch = tf.train.shuffle_batch([crops], 95 | enqueue_many=False, 96 | batch_size=batch_size, 97 | capacity=capacity, 98 | min_after_dequeue=min_after_dequeue, 99 | num_threads=n_threads) 100 | 101 | # alternatively, we could use shuffle_batch_join to use multiple reader 102 | # instances, or set shuffle_batch's n_threads to higher than 1. 103 | 104 | return batch 105 | 106 | 107 | def gtzan_music_speech_download(dst='gtzan_music_speech'): 108 | """Download the GTZAN music and speech dataset. 109 | 110 | Parameters 111 | ---------- 112 | dst : str, optional 113 | Location to put the GTZAN music and speech datset. 114 | """ 115 | path = 'http://opihi.cs.uvic.ca/sound/music_speech.tar.gz' 116 | download_and_extract_tar(path, dst) 117 | 118 | 119 | def gtzan_music_speech_load(dst='gtzan_music_speech'): 120 | """Load the GTZAN Music and Speech dataset. 121 | 122 | Downloads the dataset if it does not exist into the dst directory. 123 | 124 | Parameters 125 | ---------- 126 | dst : str, optional 127 | Location of GTZAN Music and Speech dataset. 128 | 129 | Returns 130 | ------- 131 | Xs, ys : np.ndarray, np.ndarray 132 | Array of data, Array of labels 133 | """ 134 | from scipy.io import wavfile 135 | 136 | if not os.path.exists(dst): 137 | gtzan_music_speech_download(dst) 138 | music_dir = os.path.join(os.path.join(dst, 'music_speech'), 'music_wav') 139 | music = [os.path.join(music_dir, file_i) 140 | for file_i in os.listdir(music_dir) 141 | if file_i.endswith('.wav')] 142 | speech_dir = os.path.join(os.path.join(dst, 'music_speech'), 'speech_wav') 143 | speech = [os.path.join(speech_dir, file_i) 144 | for file_i in os.listdir(speech_dir) 145 | if file_i.endswith('.wav')] 146 | Xs = [] 147 | ys = [] 148 | for i in music: 149 | sr, s = wavfile.read(i) 150 | s = s / 16384.0 - 1.0 151 | re, im = dft.dft_np(s) 152 | mag, phs = dft.ztoc(re, im) 153 | Xs.append((mag, phs)) 154 | ys.append(0) 155 | for i in speech: 156 | sr, s = wavfile.read(i) 157 | s = s / 16384.0 - 1.0 158 | re, im = dft.dft_np(s) 159 | mag, phs = dft.ztoc(re, im) 160 | Xs.append((mag, phs)) 161 | ys.append(1) 162 | Xs = np.array(Xs) 163 | Xs = np.transpose(Xs, [0, 2, 3, 1]) 164 | ys = np.array(ys) 165 | return Xs, ys 166 | 167 | 168 | def cifar10_download(dst='cifar10'): 169 | """Download the CIFAR10 dataset. 170 | 171 | Parameters 172 | ---------- 173 | dst : str, optional 174 | Directory to download into. 175 | """ 176 | path = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' 177 | download_and_extract_tar(path, dst) 178 | 179 | 180 | def cifar10_load(dst='cifar10'): 181 | """Load the CIFAR10 dataset. 182 | 183 | Downloads the dataset if it does not exist into the dst directory. 184 | 185 | Parameters 186 | ---------- 187 | dst : str, optional 188 | Location of CIFAR10 dataset. 189 | 190 | Returns 191 | ------- 192 | Xs, ys : np.ndarray, np.ndarray 193 | Array of data, Array of labels 194 | """ 195 | if not os.path.exists(dst): 196 | cifar10_download(dst) 197 | Xs = None 198 | ys = None 199 | for f in range(1, 6): 200 | cf = pickle.load(open( 201 | '%s/cifar-10-batches-py/data_batch_%d' % (dst, f), 'rb'), 202 | encoding='LATIN') 203 | if Xs is not None: 204 | Xs = np.r_[Xs, cf['data']] 205 | ys = np.r_[ys, np.array(cf['labels'])] 206 | else: 207 | Xs = cf['data'] 208 | ys = cf['labels'] 209 | Xs = np.swapaxes(np.swapaxes(Xs.reshape(-1, 3, 32, 32), 1, 3), 1, 2) 210 | return Xs, ys 211 | 212 | 213 | def dense_to_one_hot(labels, n_classes=2): 214 | """Convert class labels from scalars to one-hot vectors. 215 | 216 | Parameters 217 | ---------- 218 | labels : array 219 | Input labels to convert to one-hot representation. 220 | n_classes : int, optional 221 | Number of possible one-hot. 222 | 223 | Returns 224 | ------- 225 | one_hot : array 226 | One hot representation of input. 227 | """ 228 | return np.eye(n_classes).astype(np.float32)[labels] 229 | 230 | 231 | class DatasetSplit(object): 232 | """Utility class for batching data and handling multiple splits. 233 | 234 | Attributes 235 | ---------- 236 | current_batch_idx : int 237 | Description 238 | images : np.ndarray 239 | Xs of the dataset. Not necessarily images. 240 | labels : np.ndarray 241 | ys of the dataset. 242 | n_labels : int 243 | Number of possible labels 244 | num_examples : int 245 | Number of total observations 246 | """ 247 | 248 | def __init__(self, images, labels): 249 | """Initialize a DatasetSplit object. 250 | 251 | Parameters 252 | ---------- 253 | images : np.ndarray 254 | Xs/inputs 255 | labels : np.ndarray 256 | ys/outputs 257 | """ 258 | self.images = np.array(images).astype(np.float32) 259 | if labels is not None: 260 | self.labels = np.array(labels).astype(np.int32) 261 | self.n_labels = len(np.unique(labels)) 262 | else: 263 | self.labels = None 264 | self.num_examples = len(self.images) 265 | 266 | def next_batch(self, batch_size=100): 267 | """Batch generator with randomization. 268 | 269 | Parameters 270 | ---------- 271 | batch_size : int, optional 272 | Size of each minibatch. 273 | 274 | Returns 275 | ------- 276 | Xs, ys : np.ndarray, np.ndarray 277 | Next batch of inputs and labels (if no labels, then None). 278 | """ 279 | # Shuffle each epoch 280 | current_permutation = np.random.permutation(range(len(self.images))) 281 | epoch_images = self.images[current_permutation, ...] 282 | if self.labels is not None: 283 | epoch_labels = self.labels[current_permutation, ...] 284 | 285 | # Then iterate over the epoch 286 | self.current_batch_idx = 0 287 | while self.current_batch_idx < len(self.images): 288 | end_idx = min( 289 | self.current_batch_idx + batch_size, len(self.images)) 290 | this_batch = { 291 | 'images': epoch_images[self.current_batch_idx:end_idx], 292 | 'labels': epoch_labels[self.current_batch_idx:end_idx] 293 | if self.labels is not None else None 294 | } 295 | self.current_batch_idx += batch_size 296 | yield this_batch['images'], this_batch['labels'] 297 | 298 | 299 | class Dataset(object): 300 | """Create a dataset from data and their labels. 301 | 302 | Allows easy use of train/valid/test splits; Batch generator. 303 | 304 | Attributes 305 | ---------- 306 | all_idxs : list 307 | All indexes across all splits. 308 | all_inputs : list 309 | All inputs across all splits. 310 | all_labels : list 311 | All labels across all splits. 312 | n_labels : int 313 | Number of labels. 314 | split : list 315 | Percentage split of train, valid, test sets. 316 | test_idxs : list 317 | Indexes of the test split. 318 | train_idxs : list 319 | Indexes of the train split. 320 | valid_idxs : list 321 | Indexes of the valid split. 322 | """ 323 | 324 | def __init__(self, Xs, ys=None, split=[1.0, 0.0, 0.0], one_hot=False): 325 | """Initialize a Dataset object. 326 | 327 | Parameters 328 | ---------- 329 | Xs : np.ndarray 330 | Images/inputs to a network 331 | ys : np.ndarray 332 | Labels/outputs to a network 333 | split : list, optional 334 | Percentage of train, valid, and test sets. 335 | one_hot : bool, optional 336 | Whether or not to use one-hot encoding of labels (ys). 337 | """ 338 | self.all_idxs = [] 339 | self.all_labels = [] 340 | self.all_inputs = [] 341 | self.train_idxs = [] 342 | self.valid_idxs = [] 343 | self.test_idxs = [] 344 | self.n_labels = 0 345 | self.split = split 346 | 347 | # Now mix all the labels that are currently stored as blocks 348 | self.all_inputs = Xs 349 | n_idxs = len(self.all_inputs) 350 | idxs = range(n_idxs) 351 | rand_idxs = np.random.permutation(idxs) 352 | self.all_inputs = self.all_inputs[rand_idxs, ...] 353 | if ys is not None: 354 | self.all_labels = ys if not one_hot else dense_to_one_hot(ys) 355 | self.all_labels = self.all_labels[rand_idxs, ...] 356 | else: 357 | self.all_labels = None 358 | 359 | # Get splits 360 | self.train_idxs = idxs[:round(split[0] * n_idxs)] 361 | self.valid_idxs = idxs[len(self.train_idxs): 362 | len(self.train_idxs) + round(split[1] * n_idxs)] 363 | self.test_idxs = idxs[ 364 | (len(self.valid_idxs) + len(self.train_idxs)): 365 | (len(self.valid_idxs) + len(self.train_idxs)) + 366 | round(split[2] * n_idxs)] 367 | 368 | @property 369 | def X(self): 370 | """Inputs/Xs/Images. 371 | 372 | Returns 373 | ------- 374 | all_inputs : np.ndarray 375 | Original Inputs/Xs. 376 | """ 377 | return self.all_inputs 378 | 379 | @property 380 | def Y(self): 381 | """Outputs/ys/Labels. 382 | 383 | Returns 384 | ------- 385 | all_labels : np.ndarray 386 | Original Outputs/ys. 387 | """ 388 | return self.all_labels 389 | 390 | @property 391 | def train(self): 392 | """Train split. 393 | 394 | Returns 395 | ------- 396 | split : DatasetSplit 397 | Split of the train dataset. 398 | """ 399 | if len(self.train_idxs): 400 | inputs = self.all_inputs[self.train_idxs, ...] 401 | if self.all_labels is not None: 402 | labels = self.all_labels[self.train_idxs, ...] 403 | else: 404 | labels = None 405 | else: 406 | inputs, labels = [], [] 407 | return DatasetSplit(inputs, labels) 408 | 409 | @property 410 | def valid(self): 411 | """Validation split. 412 | 413 | Returns 414 | ------- 415 | split : DatasetSplit 416 | Split of the validation dataset. 417 | """ 418 | if len(self.valid_idxs): 419 | inputs = self.all_inputs[self.valid_idxs, ...] 420 | if self.all_labels is not None: 421 | labels = self.all_labels[self.valid_idxs, ...] 422 | else: 423 | labels = None 424 | else: 425 | inputs, labels = [], [] 426 | return DatasetSplit(inputs, labels) 427 | 428 | @property 429 | def test(self): 430 | """Test split. 431 | 432 | Returns 433 | ------- 434 | split : DatasetSplit 435 | Split of the test dataset. 436 | """ 437 | if len(self.test_idxs): 438 | inputs = self.all_inputs[self.test_idxs, ...] 439 | if self.all_labels is not None: 440 | labels = self.all_labels[self.test_idxs, ...] 441 | else: 442 | labels = None 443 | else: 444 | inputs, labels = [], [] 445 | return DatasetSplit(inputs, labels) 446 | 447 | def mean(self): 448 | """Mean of the inputs/Xs. 449 | 450 | Returns 451 | ------- 452 | mean : np.ndarray 453 | Calculates mean across 0th (batch) dimension. 454 | """ 455 | return np.mean(self.all_inputs, axis=0) 456 | 457 | def std(self): 458 | """Standard deviation of the inputs/Xs. 459 | 460 | Returns 461 | ------- 462 | std : np.ndarray 463 | Calculates std across 0th (batch) dimension. 464 | """ 465 | return np.std(self.all_inputs, axis=0) 466 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/datasets.py: -------------------------------------------------------------------------------- 1 | """Creative Applications of Deep Learning w/ Tensorflow. 2 | Kadenze, Inc. 3 | Copyright Parag K. Mital, June 2016. 4 | """ 5 | import tensorflow.examples.tutorials.mnist.input_data as input_data 6 | from .dataset_utils import * 7 | 8 | 9 | def MNIST(one_hot=True, split=[1.0, 0.0, 0.0]): 10 | """Returns the MNIST dataset. 11 | 12 | Returns 13 | ------- 14 | mnist : DataSet 15 | DataSet object w/ convenienve props for accessing 16 | train/validation/test sets and batches. 17 | """ 18 | ds = input_data.read_data_sets('MNIST_data/', one_hot=one_hot) 19 | return Dataset(np.r_[ds.train.images, 20 | ds.validation.images, 21 | ds.test.images], 22 | np.r_[ds.train.labels, 23 | ds.validation.labels, 24 | ds.test.labels], 25 | split=split) 26 | 27 | 28 | def CIFAR10(flatten=True, split=[1.0, 0.0, 0.0]): 29 | """Returns the CIFAR10 dataset. 30 | 31 | Parameters 32 | ---------- 33 | flatten : bool, optional 34 | Convert the 3 x 32 x 32 pixels to a single vector 35 | 36 | Returns 37 | ------- 38 | cifar : Dataset 39 | Description 40 | """ 41 | # plt.imshow(np.transpose(np.reshape( 42 | # cifar.train.images[10], (3, 32, 32)), [1, 2, 0])) 43 | Xs, ys = cifar10_load() 44 | if flatten: 45 | Xs = Xs.reshape((Xs.shape[0], -1)) 46 | return Dataset(Xs, ys, split=split) 47 | 48 | 49 | def CELEB(path='./img_align_celeba/'): 50 | """Attempt to load the files of the CELEB dataset. 51 | 52 | Requires the files already be downloaded and placed in the `dst` directory. 53 | 54 | http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html 55 | 56 | Parameters 57 | ---------- 58 | path : str, optional 59 | Directory where the aligned/cropped celeb dataset can be found. 60 | 61 | Returns 62 | ------- 63 | files : list 64 | List of file paths to the dataset. 65 | """ 66 | if not os.path.exists(path): 67 | print('Could not find celeb dataset under {}.'.format(path)) 68 | print('Try downloading the dataset from the "Aligned and Cropped" ' + 69 | 'link located here (imgs/img_align_celeba.zip [1.34 GB]): ' + 70 | 'http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html') 71 | return None 72 | else: 73 | fs = [os.path.join(path, f) 74 | for f in os.listdir(path) if f.endswith('.jpg')] 75 | if len(fs) < 202598: 76 | print('It does not look like you have downloaded the entire ' + 77 | 'Celeb Dataset.\n' + 78 | 'Try downloading the dataset from the "Aligned and Cropped" ' + 79 | 'link located here (imgs/img_align_celeba.zip [1.34 GB]): ' + 80 | 'http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html') 81 | return fs 82 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/deepdream.py: -------------------------------------------------------------------------------- 1 | """Deep Dream using the Inception v5 network. 2 | 3 | Creative Applications of Deep Learning w/ Tensorflow. 4 | Kadenze, Inc. 5 | Copyright Parag K. Mital, June 2016. 6 | """ 7 | import os 8 | import numpy as np 9 | import tensorflow as tf 10 | from scipy.ndimage.filters import gaussian_filter 11 | from skimage.transform import resize 12 | from scipy.misc import imsave 13 | from . import inception, vgg16, i2v 14 | from . import gif 15 | 16 | 17 | def get_labels(model='inception'): 18 | """Return labels corresponding to the `neuron_i` parameter of deep dream. 19 | 20 | Parameters 21 | ---------- 22 | model : str, optional 23 | Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v', 24 | 'vgg16', or 'vgg_face'. 25 | 26 | Raises 27 | ------ 28 | ValueError 29 | Unknown model. Must be one of: ['inception'], 'i2v_tag', 'i2v', 30 | 'vgg16', or 'vgg_face'. 31 | """ 32 | if model == 'inception': 33 | net = inception.get_inception_model() 34 | return net['labels'] 35 | elif model == 'i2v_tag': 36 | net = i2v.get_i2v_tag_model() 37 | return net['labels'] 38 | elif model == 'vgg16': 39 | net = vgg16.get_vgg_model() 40 | return net['labels'] 41 | elif model == 'vgg_face': 42 | net = vgg16.get_vgg_face_model() 43 | return net['labels'] 44 | else: 45 | raise ValueError("Unknown model or this model does not have labels!") 46 | 47 | 48 | def get_layer_names(model='inception'): 49 | """Retun every layer's index and name in the given model. 50 | 51 | Parameters 52 | ---------- 53 | model : str, optional 54 | Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v', 55 | 'vgg16', or 'vgg_face'. 56 | 57 | Returns 58 | ------- 59 | names : list of tuples 60 | The index and layer's name for every layer in the given model. 61 | """ 62 | g = tf.Graph() 63 | with tf.Session(graph=g): 64 | if model == 'inception': 65 | net = inception.get_inception_model() 66 | elif model == 'vgg_face': 67 | net = vgg16.get_vgg_face_model() 68 | elif model == 'vgg16': 69 | net = vgg16.get_vgg_model() 70 | elif model == 'i2v': 71 | net = i2v.get_i2v_model() 72 | elif model == 'i2v-tag': 73 | net = i2v.get_i2v_tag_model() 74 | 75 | tf.import_graph_def(net['graph_def'], name='net') 76 | names = [(i, op.name) for i, op in enumerate(g.get_operations())] 77 | return names 78 | 79 | 80 | def _setup(input_img, model, downsize): 81 | """Internal use only. Load the given model's graph and preprocess an image. 82 | 83 | Parameters 84 | ---------- 85 | input_img : np.ndarray 86 | Image to process with the model's normalizaiton process. 87 | model : str 88 | Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v', 89 | 'vgg16', or 'vgg_face'. 90 | downsize : bool 91 | Optionally crop/resize the input image to the standard shape. Only 92 | applies to inception network which is all convolutional. 93 | 94 | Returns 95 | ------- 96 | net, img, preprocess, deprocess : dict, np.ndarray, function, function 97 | net : The networks graph_def and labels 98 | img : The preprocessed input image 99 | preprocess: Function for preprocessing an image 100 | deprocess: Function for deprocessing an image 101 | 102 | Raises 103 | ------ 104 | ValueError 105 | If model is unknown. 106 | """ 107 | if model == 'inception': 108 | net = inception.get_inception_model() 109 | img = inception.preprocess(input_img, resize=downsize, crop=downsize)[np.newaxis] 110 | deprocess, preprocess = inception.deprocess, inception.preprocess 111 | elif model == 'vgg_face': 112 | net = vgg16.get_vgg_face_model() 113 | img = vgg16.preprocess(input_img)[np.newaxis] 114 | deprocess, preprocess = vgg16.deprocess, vgg16.preprocess 115 | elif model == 'vgg16': 116 | net = vgg16.get_vgg_model() 117 | img = vgg16.preprocess(input_img)[np.newaxis] 118 | deprocess, preprocess = vgg16.deprocess, vgg16.preprocess 119 | elif model == 'i2v': 120 | net = i2v.get_i2v_model() 121 | img = i2v.preprocess(input_img)[np.newaxis] 122 | deprocess, preprocess = i2v.deprocess, i2v.preprocess 123 | elif model == 'i2v_tag': 124 | net = i2v.get_i2v_tag_model() 125 | img = i2v.preprocess(input_img)[np.newaxis] 126 | deprocess, preprocess = i2v.deprocess, i2v.preprocess 127 | else: 128 | raise ValueError( 129 | "Unknown model name! Supported: " + 130 | "['inception', 'vgg_face', 'vgg16', 'i2v', 'i2v_tag']") 131 | 132 | return net, img, preprocess, deprocess 133 | 134 | 135 | def _apply(img, 136 | gradient, 137 | it_i, 138 | decay=0.998, 139 | sigma=1.5, 140 | blur_step=10, 141 | step=1.0, 142 | crop=0, 143 | crop_step=1, 144 | pth=0): 145 | """Interal use only. Apply the gradient to an image with the given params. 146 | 147 | Parameters 148 | ---------- 149 | img : np.ndarray 150 | Tensor to apply gradient ascent to. 151 | gradient : np.ndarray 152 | Gradient to ascend to. 153 | it_i : int 154 | Current iteration (used for step modulos) 155 | decay : float, optional 156 | Amount to decay. 157 | sigma : float, optional 158 | Sigma for Gaussian Kernel. 159 | blur_step : int, optional 160 | How often to blur. 161 | step : float, optional 162 | Step for gradient ascent. 163 | crop : int, optional 164 | Amount to crop from each border. 165 | crop_step : int, optional 166 | How often to crop. 167 | pth : int, optional 168 | Percentile to mask out. 169 | 170 | Returns 171 | ------- 172 | img : np.ndarray 173 | Ascended image. 174 | """ 175 | gradient /= (np.std(gradient) + 1e-10) 176 | img += gradient * step 177 | img *= decay 178 | 179 | if pth: 180 | mask = (np.abs(img) < np.percentile(np.abs(img), pth)) 181 | img = img - img * mask 182 | 183 | if blur_step and it_i % blur_step == 0: 184 | for ch_i in range(3): 185 | img[..., ch_i] = gaussian_filter(img[..., ch_i], sigma) 186 | 187 | if crop and it_i % crop_step == 0: 188 | height, width, *ch = img[0].shape 189 | 190 | # Crop a 1 pixel border from height and width 191 | img = img[:, crop:-crop, crop:-crop, :] 192 | 193 | # Resize 194 | img = resize(img[0], (height, width), order=3, 195 | clip=False, preserve_range=True 196 | )[np.newaxis].astype(np.float32) 197 | 198 | 199 | def deep_dream(input_img, 200 | downsize=False, 201 | model='inception', 202 | layer_i=-1, 203 | neuron_i=-1, 204 | n_iterations=100, 205 | save_gif=None, 206 | save_images='imgs', 207 | device='/cpu:0', 208 | **kwargs): 209 | """Deep Dream with the given parameters. 210 | 211 | Parameters 212 | ---------- 213 | input_img : np.ndarray 214 | Image to apply deep dream to. Should be 3-dimenionsal H x W x C 215 | RGB uint8 or float32. 216 | downsize : bool, optional 217 | Whether or not to downsize the image. Only applies to 218 | model=='inception'. 219 | model : str, optional 220 | Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v', 221 | 'vgg16', or 'vgg_face'. 222 | layer_i : int, optional 223 | Which layer to use for finding the gradient. E.g. the softmax layer 224 | for inception is -1, for vgg networks it is -2. Use the function 225 | "get_layer_names" to find the layer number that you need. 226 | neuron_i : int, optional 227 | Which neuron to use. -1 for the entire layer. 228 | n_iterations : int, optional 229 | Number of iterations to dream. 230 | save_gif : bool, optional 231 | Save a GIF. 232 | save_images : str, optional 233 | Folder to save images to. 234 | device : str, optional 235 | Which device to use, e.g. ['/cpu:0'] or '/gpu:0'. 236 | **kwargs : dict 237 | See "_apply" for additional parameters. 238 | 239 | Returns 240 | ------- 241 | imgs : list of np.array 242 | Images of every iteration 243 | """ 244 | net, img, preprocess, deprocess = _setup(input_img, model, downsize) 245 | batch, height, width, *ch = img.shape 246 | 247 | g = tf.Graph() 248 | with tf.Session(graph=g) as sess, g.device(device): 249 | 250 | tf.import_graph_def(net['graph_def'], name='net') 251 | names = [op.name for op in g.get_operations()] 252 | input_name = names[0] + ':0' 253 | x = g.get_tensor_by_name(input_name) 254 | 255 | layer = g.get_tensor_by_name(names[layer_i] + ':0') 256 | layer_shape = sess.run(tf.shape(layer), feed_dict={x: img}) 257 | layer_vec = np.ones(layer_shape) / layer_shape[-1] 258 | layer_vec[..., neuron_i] = 1.0 - (1.0 / layer_shape[-1]) 259 | 260 | ascent = tf.gradients(layer, x) 261 | 262 | imgs = [] 263 | for it_i in range(n_iterations): 264 | print(it_i, np.min(img), np.max(img)) 265 | if neuron_i == -1: 266 | this_res = sess.run( 267 | ascent, feed_dict={x: img})[0] 268 | else: 269 | this_res = sess.run( 270 | ascent, feed_dict={x: img, layer: layer_vec})[0] 271 | 272 | _apply(img, this_res, it_i, **kwargs) 273 | imgs.append(deprocess(img[0])) 274 | 275 | if save_images is not None: 276 | imsave(os.path.join(save_images, 277 | 'frame{}.png'.format(it_i)), imgs[-1]) 278 | 279 | if save_gif is not None: 280 | gif.build_gif(imgs, saveto=save_gif) 281 | 282 | return imgs 283 | 284 | 285 | def guided_dream(input_img, 286 | guide_img=None, 287 | downsize=False, 288 | layers=[162, 183, 184, 247], 289 | label_i=962, 290 | layer_i=-1, 291 | feature_loss_weight=1.0, 292 | tv_loss_weight=1.0, 293 | l2_loss_weight=1.0, 294 | softmax_loss_weight=1.0, 295 | model='inception', 296 | neuron_i=920, 297 | n_iterations=100, 298 | save_gif=None, 299 | save_images='imgs', 300 | device='/cpu:0', 301 | **kwargs): 302 | """Deep Dream v2. Use an optional guide image and other techniques. 303 | 304 | Parameters 305 | ---------- 306 | input_img : np.ndarray 307 | Image to apply deep dream to. Should be 3-dimenionsal H x W x C 308 | RGB uint8 or float32. 309 | guide_img : np.ndarray, optional 310 | Optional image to find features at different layers for. Must pass in 311 | a list of layers that you want to find features for. Then the guided 312 | dream will try to match this images features at those layers. 313 | downsize : bool, optional 314 | Whether or not to downsize the image. Only applies to 315 | model=='inception'. 316 | layers : list, optional 317 | A list of layers to find features for in the "guide_img". 318 | label_i : int, optional 319 | Which label to use for the softmax layer. Use the "get_labels" function 320 | to find the index corresponding the object of interest. If None, not 321 | used. 322 | layer_i : int, optional 323 | Which layer to use for finding the gradient. E.g. the softmax layer 324 | for inception is -1, for vgg networks it is -2. Use the function 325 | "get_layer_names" to find the layer number that you need. 326 | feature_loss_weight : float, optional 327 | Weighting for the feature loss from the guide_img. 328 | tv_loss_weight : float, optional 329 | Total variational loss weighting. Enforces smoothness. 330 | l2_loss_weight : float, optional 331 | L2 loss weighting. Enforces smaller values and reduces saturation. 332 | softmax_loss_weight : float, optional 333 | Softmax loss weighting. Must set label_i. 334 | model : str, optional 335 | Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v', 336 | 'vgg16', or 'vgg_face'. 337 | neuron_i : int, optional 338 | Which neuron to use. -1 for the entire layer. 339 | n_iterations : int, optional 340 | Number of iterations to dream. 341 | save_gif : bool, optional 342 | Save a GIF. 343 | save_images : str, optional 344 | Folder to save images to. 345 | device : str, optional 346 | Which device to use, e.g. ['/cpu:0'] or '/gpu:0'. 347 | **kwargs : dict 348 | See "_apply" for additional parameters. 349 | 350 | Returns 351 | ------- 352 | imgs : list of np.ndarray 353 | Images of the dream. 354 | """ 355 | net, img, preprocess, deprocess = _setup(input_img, model, downsize) 356 | print(img.shape, input_img.shape) 357 | print(img.min(), img.max()) 358 | 359 | if guide_img is not None: 360 | guide_img = preprocess(guide_img.copy(), model)[np.newaxis] 361 | assert(guide_img.shape == img.shape) 362 | batch, height, width, *ch = img.shape 363 | 364 | g = tf.Graph() 365 | with tf.Session(graph=g) as sess, g.device(device): 366 | tf.import_graph_def(net['graph_def'], name='net') 367 | names = [op.name for op in g.get_operations()] 368 | input_name = names[0] + ':0' 369 | x = g.get_tensor_by_name(input_name) 370 | 371 | features = [names[layer_i] + ':0' for layer_i in layers] 372 | feature_loss = tf.Variable(0.0) 373 | for feature_i in features: 374 | layer = g.get_tensor_by_name(feature_i) 375 | if guide_img is None: 376 | feature_loss += tf.reduce_mean(layer) 377 | else: 378 | # Reshape it to 2D vector 379 | layer = tf.reshape(layer, [-1, 1]) 380 | # Do the same for our guide image 381 | guide_layer = sess.run(layer, feed_dict={x: guide_img}) 382 | guide_layer = guide_layer.reshape(-1, 1) 383 | # Now calculate their dot product 384 | correlation = tf.matmul(guide_layer.T, layer) 385 | feature_loss += feature_loss_weight * tf.reduce_mean(correlation) 386 | softmax_loss = tf.Variable(0.0) 387 | if label_i is not None: 388 | layer = g.get_tensor_by_name(names[layer_i] + ':0') 389 | layer_shape = sess.run(tf.shape(layer), feed_dict={x: img}) 390 | layer_vec = np.ones(layer_shape) / layer_shape[-1] 391 | layer_vec[..., neuron_i] = 1.0 - 1.0 / layer_shape[1] 392 | softmax_loss += softmax_loss_weight * tf.reduce_mean(tf.nn.l2_loss(layer - layer_vec)) 393 | 394 | dx = tf.square(x[:, :height - 1, :width - 1, :] - x[:, :height - 1, 1:, :]) 395 | dy = tf.square(x[:, :height - 1, :width - 1, :] - x[:, 1:, :width - 1, :]) 396 | tv_loss = tv_loss_weight * tf.reduce_mean(tf.pow(dx + dy, 1.2)) 397 | l2_loss = l2_loss_weight * tf.reduce_mean(tf.nn.l2_loss(x)) 398 | 399 | ascent = tf.gradients(feature_loss + softmax_loss + tv_loss + l2_loss, x)[0] 400 | sess.run(tf.initialize_all_variables()) 401 | imgs = [] 402 | for it_i in range(n_iterations): 403 | this_res, this_feature_loss, this_softmax_loss, this_tv_loss, this_l2_loss = sess.run( 404 | [ascent, feature_loss, softmax_loss, tv_loss, l2_loss], feed_dict={x: img}) 405 | print('feature:', this_feature_loss, 406 | 'softmax:', this_softmax_loss, 407 | 'tv', this_tv_loss, 408 | 'l2', this_l2_loss) 409 | 410 | _apply(img, -this_res, it_i, **kwargs) 411 | imgs.append(deprocess(img[0])) 412 | 413 | if save_images is not None: 414 | imsave(os.path.join(save_images, 415 | 'frame{}.png'.format(it_i)), imgs[-1]) 416 | 417 | if save_gif is not None: 418 | gif.build_gif(imgs, saveto=save_gif) 419 | 420 | return imgs 421 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/dft.py: -------------------------------------------------------------------------------- 1 | """Summary. 2 | 3 | #CADL 4 | Copyright Parag K. Mital 2016 5 | """ 6 | import numpy as np 7 | from scipy.signal import hann 8 | 9 | 10 | def ztoc(re, im): 11 | return np.sqrt(re**2 + im**2), np.angle(re + im * 1j) 12 | 13 | 14 | def ctoz(mag, phs): 15 | return mag * np.cos(phs), mag * np.sin(phs) 16 | 17 | 18 | def dft_np(signal, hop_size=256, fft_size=512): 19 | n_hops = len(signal) // hop_size 20 | s = [] 21 | hann_win = hann(fft_size) 22 | for hop_i in range(n_hops): 23 | frame = signal[(hop_i * hop_size):(hop_i * hop_size + fft_size)] 24 | frame = np.pad(frame, (0, fft_size - len(frame)), 'constant') 25 | frame *= hann_win 26 | s.append(frame) 27 | s = np.array(s) 28 | N = s.shape[-1] 29 | k = np.reshape(np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [1, N // 2]) 30 | x = np.reshape(np.linspace(0.0, N - 1, N), [N, 1]) 31 | freqs = np.dot(x, k) 32 | reals = np.dot(s, np.cos(freqs)) * (2.0 / N) 33 | imags = np.dot(s, np.sin(freqs)) * (2.0 / N) 34 | return reals, imags 35 | 36 | 37 | def idft_np(re, im, hop_size=256, fft_size=512): 38 | N = re.shape[1] * 2 39 | k = np.reshape(np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [N // 2, 1]) 40 | x = np.reshape(np.linspace(0.0, N - 1, N), [1, N]) 41 | freqs = np.dot(k, x) 42 | signal = np.zeros((re.shape[0] * hop_size + fft_size,)) 43 | recon = np.dot(re, np.cos(freqs)) + np.dot(im, np.sin(freqs)) 44 | for hop_i, frame in enumerate(recon): 45 | signal[(hop_i * hop_size): (hop_i * hop_size + fft_size)] += frame 46 | return signal 47 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/gan.py: -------------------------------------------------------------------------------- 1 | """Generative Adversarial Network. 2 | 3 | Creative Applications of Deep Learning w/ Tensorflow. 4 | Kadenze, Inc. 5 | Copyright Parag K. Mital, June 2016. 6 | """ 7 | import tensorflow as tf 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import os 11 | import libs.batch_norm as bn 12 | from libs.utils import * 13 | 14 | 15 | def encoder(x, phase_train, dimensions=[], filter_sizes=[], 16 | convolutional=False, activation=tf.nn.relu, 17 | output_activation=tf.nn.sigmoid, reuse=False): 18 | """Encoder network codes input `x` to layers defined by dimensions. 19 | 20 | Parameters 21 | ---------- 22 | x : tf.Tensor 23 | Input to the encoder network, e.g. tf.Placeholder or tf.Variable 24 | phase_train : tf.Placeholder 25 | Placeholder defining whether the network is in train mode or not. 26 | Used for changing the behavior of batch normalization which updates 27 | its statistics during train mode. 28 | dimensions : list, optional 29 | List of the number of neurons in each layer (convolutional=False) -or- 30 | List of the number of filters in each layer (convolutional=True), e.g. 31 | [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer. 32 | filter_sizes : list, optional 33 | List of the size of the kernel in each layer, e.g.: 34 | [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer. 35 | convolutional : bool, optional 36 | Whether or not to use convolutional layers. 37 | activation : fn, optional 38 | Function for applying an activation, e.g. tf.nn.relu 39 | output_activation : fn, optional 40 | Function for applying an activation on the last layer, e.g. tf.nn.relu 41 | reuse : bool, optional 42 | For each layer's variable scope, whether to reuse existing variables. 43 | 44 | Returns 45 | ------- 46 | h : tf.Tensor 47 | Output tensor of the encoder 48 | """ 49 | # %% 50 | # ensure 2-d is converted to square tensor. 51 | if convolutional: 52 | x_tensor = to_tensor(x) 53 | else: 54 | x_tensor = tf.reshape( 55 | tensor=x, 56 | shape=[-1, dimensions[0]]) 57 | dimensions = dimensions[1:] 58 | current_input = x_tensor 59 | 60 | for layer_i, n_output in enumerate(dimensions): 61 | with tf.variable_scope(str(layer_i), reuse=reuse): 62 | if convolutional: 63 | h, W = conv2d( 64 | x=current_input, 65 | n_output=n_output, 66 | k_h=filter_sizes[layer_i], 67 | k_w=filter_sizes[layer_i], 68 | padding='SAME', 69 | reuse=reuse) 70 | else: 71 | h, W = linear( 72 | x=current_input, 73 | n_output=n_output, 74 | reuse=reuse) 75 | norm = bn.batch_norm( 76 | x=h, 77 | phase_train=phase_train, 78 | name='bn', 79 | reuse=reuse) 80 | output = activation(norm) 81 | 82 | current_input = output 83 | 84 | flattened = flatten(current_input, name='flatten', reuse=reuse) 85 | 86 | if output_activation is None: 87 | return flattened 88 | else: 89 | return output_activation(flattened) 90 | 91 | 92 | def decoder(z, 93 | phase_train, 94 | dimensions=[], 95 | channels=[], 96 | filter_sizes=[], 97 | convolutional=False, 98 | activation=tf.nn.relu, 99 | output_activation=tf.nn.tanh, 100 | reuse=None): 101 | """Decoder network codes input `x` to layers defined by dimensions. 102 | 103 | In contrast with `encoder`, this requires information on the number of 104 | output channels in each layer for convolution. Otherwise, it is mostly 105 | the same. 106 | 107 | Parameters 108 | ---------- 109 | z : tf.Tensor 110 | Input to the decoder network, e.g. tf.Placeholder or tf.Variable 111 | phase_train : tf.Placeholder 112 | Placeholder defining whether the network is in train mode or not. 113 | Used for changing the behavior of batch normalization which updates 114 | its statistics during train mode. 115 | dimensions : list, optional 116 | List of the number of neurons in each layer (convolutional=False) -or- 117 | List of the number of filters in each layer (convolutional=True), e.g. 118 | [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer. 119 | channels : list, optional 120 | For decoding when convolutional=True, require the number of output 121 | channels in each layer. 122 | filter_sizes : list, optional 123 | List of the size of the kernel in each layer, e.g.: 124 | [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer. 125 | convolutional : bool, optional 126 | Whether or not to use convolutional layers. 127 | activation : fn, optional 128 | Function for applying an activation, e.g. tf.nn.relu 129 | output_activation : fn, optional 130 | Function for applying an activation on the last layer, e.g. tf.nn.relu 131 | reuse : bool, optional 132 | For each layer's variable scope, whether to reuse existing variables. 133 | 134 | Returns 135 | ------- 136 | h : tf.Tensor 137 | Output tensor of the decoder 138 | """ 139 | 140 | if convolutional: 141 | with tf.variable_scope('fc', reuse=reuse): 142 | z1, W = linear( 143 | x=z, 144 | n_output=channels[0] * dimensions[0][0] * dimensions[0][1], 145 | reuse=reuse) 146 | rsz = tf.reshape( 147 | z1, [-1, dimensions[0][0], dimensions[0][1], channels[0]]) 148 | current_input = activation( 149 | features=bn.batch_norm( 150 | name='bn', 151 | x=rsz, 152 | phase_train=phase_train, 153 | reuse=reuse)) 154 | 155 | dimensions = dimensions[1:] 156 | channels = channels[1:] 157 | filter_sizes = filter_sizes[1:] 158 | else: 159 | current_input = z 160 | 161 | for layer_i, n_output in enumerate(dimensions): 162 | with tf.variable_scope(str(layer_i), reuse=reuse): 163 | 164 | if convolutional: 165 | h, W = deconv2d( 166 | x=current_input, 167 | n_output_h=n_output[0], 168 | n_output_w=n_output[1], 169 | n_output_ch=channels[layer_i], 170 | k_h=filter_sizes[layer_i], 171 | k_w=filter_sizes[layer_i], 172 | padding='SAME', 173 | reuse=reuse) 174 | else: 175 | h, W = linear( 176 | x=current_input, 177 | n_output=n_output, 178 | reuse=reuse) 179 | 180 | if layer_i < len(dimensions) - 1: 181 | norm = bn.batch_norm( 182 | x=h, 183 | phase_train=phase_train, 184 | name='bn', reuse=reuse) 185 | output = activation(norm) 186 | else: 187 | output = h 188 | current_input = output 189 | 190 | if output_activation is None: 191 | return current_input 192 | else: 193 | return output_activation(current_input) 194 | 195 | 196 | def generator(z, phase_train, output_h, output_w, convolutional=True, 197 | n_features=32, rgb=False, reuse=None): 198 | """Simple interface to build a decoder network given the input parameters. 199 | 200 | Parameters 201 | ---------- 202 | z : tf.Tensor 203 | Input to the generator, i.e. tf.Placeholder of tf.Variable 204 | phase_train : tf.Placeholder of type bool 205 | Whether or not the network should be trained (used for Batch Norm). 206 | output_h : int 207 | Final generated height 208 | output_w : int 209 | Final generated width 210 | convolutional : bool, optional 211 | Whether or not to build a convolutional generative network. 212 | n_features : int, optional 213 | Number of channels to use in the last hidden layer. 214 | rgb : bool, optional 215 | Whether or not the final generated image is RGB or not. 216 | reuse : None, optional 217 | Whether or not to reuse the variables if they are already created. 218 | 219 | Returns 220 | ------- 221 | x_tilde : tf.Tensor 222 | Output of the generator network. 223 | """ 224 | n_channels = 3 if rgb else 1 225 | with tf.variable_scope('generator', reuse=reuse): 226 | return decoder(z=z, 227 | phase_train=phase_train, 228 | convolutional=convolutional, 229 | filter_sizes=[5, 5, 5, 5, 5], 230 | channels=[n_features * 8, n_features * 4, 231 | n_features * 2, n_features, n_channels], 232 | dimensions=[ 233 | [output_h // 16, output_w // 16], 234 | [output_h // 8, output_w // 8], 235 | [output_h // 4, output_w // 4], 236 | [output_h // 2, output_w // 2], 237 | [output_h, output_w]] 238 | if convolutional else [384, 512, n_features], 239 | activation=tf.nn.relu6, 240 | output_activation=tf.nn.tanh, 241 | reuse=reuse) 242 | 243 | 244 | def discriminator(x, phase_train, convolutional=True, 245 | n_features=32, rgb=False, reuse=False): 246 | """Summary 247 | 248 | Parameters 249 | ---------- 250 | x : TYPE 251 | Description 252 | phase_train : TYPE 253 | Description 254 | convolutional : bool, optional 255 | Description 256 | n_features : int, optional 257 | Description 258 | rgb : bool, optional 259 | Description 260 | reuse : bool, optional 261 | Description 262 | 263 | Returns 264 | ------- 265 | name : TYPE 266 | Description 267 | """ 268 | n_channels = 3 if rgb else 1 269 | with tf.variable_scope('discriminator', reuse=reuse): 270 | return encoder(x=x, 271 | phase_train=phase_train, 272 | convolutional=convolutional, 273 | filter_sizes=[5, 5, 5, 5], 274 | dimensions=[n_features, n_features * 2, 275 | n_features * 4, n_features * 8] 276 | if convolutional 277 | else [n_features, 128, 256], 278 | activation=tf.nn.relu6, 279 | output_activation=None, 280 | reuse=reuse) 281 | 282 | 283 | def GAN(input_shape, n_latent, n_features, rgb, debug=True): 284 | """Summary 285 | 286 | Parameters 287 | ---------- 288 | input_shape : TYPE 289 | Description 290 | n_latent : TYPE 291 | Description 292 | n_features : TYPE 293 | Description 294 | rgb : TYPE 295 | Description 296 | debug : bool, optional 297 | Description 298 | 299 | Returns 300 | ------- 301 | name : TYPE 302 | Description 303 | """ 304 | # Real input samples 305 | # n_features is either the image dimension or flattened number of features 306 | x = tf.placeholder(tf.float32, input_shape, 'x') 307 | x = (x / 127.5) - 1.0 308 | sum_x = tf.image_summary("x", x) 309 | phase_train = tf.placeholder(tf.bool, name='phase_train') 310 | 311 | # Discriminator for real input samples 312 | D_real_logits = discriminator( 313 | x, phase_train, n_features=n_features, rgb=rgb) 314 | D_real = tf.nn.sigmoid(D_real_logits) 315 | sum_D_real = tf.histogram_summary("D_real", D_real) 316 | 317 | # Generator tries to recreate input samples using latent feature vector 318 | z = tf.placeholder(tf.float32, [None, n_latent], 'z') 319 | sum_z = tf.histogram_summary("z", z) 320 | G = generator( 321 | z, phase_train, 322 | output_h=input_shape[1], output_w=input_shape[2], 323 | n_features=n_features, rgb=rgb) 324 | sum_G = tf.image_summary("G", G) 325 | 326 | # Discriminator for generated samples 327 | D_fake_logits = discriminator( 328 | G, phase_train, n_features=n_features, rgb=rgb, reuse=True) 329 | D_fake = tf.nn.sigmoid(D_fake_logits) 330 | sum_D_fake = tf.histogram_summary("D_fake", D_fake) 331 | 332 | with tf.variable_scope('loss'): 333 | # Loss functions 334 | loss_D_real = binary_cross_entropy( 335 | D_real, tf.ones_like(D_real), name='loss_D_real') 336 | loss_D_fake = binary_cross_entropy( 337 | D_fake, tf.zeros_like(D_fake), name='loss_D_fake') 338 | loss_D = tf.reduce_mean((loss_D_real + loss_D_fake) / 2) 339 | loss_G = tf.reduce_mean(binary_cross_entropy( 340 | D_fake, tf.ones_like(D_fake), name='loss_G')) 341 | 342 | # Summaries 343 | sum_loss_D_real = tf.histogram_summary("loss_D_real", loss_D_real) 344 | sum_loss_D_fake = tf.histogram_summary("loss_D_fake", loss_D_fake) 345 | sum_loss_D = tf.scalar_summary("loss_D", loss_D) 346 | sum_loss_G = tf.scalar_summary("loss_G", loss_G) 347 | sum_D_real = tf.histogram_summary("D_real", D_real) 348 | sum_D_fake = tf.histogram_summary("D_fake", D_fake) 349 | 350 | return { 351 | 'loss_D': loss_D, 352 | 'loss_G': loss_G, 353 | 'x': x, 354 | 'G': G, 355 | 'z': z, 356 | 'train': phase_train, 357 | 'sums': { 358 | 'G': sum_G, 359 | 'D_real': sum_D_real, 360 | 'D_fake': sum_D_fake, 361 | 'loss_G': sum_loss_G, 362 | 'loss_D': sum_loss_D, 363 | 'loss_D_real': sum_loss_D_real, 364 | 'loss_D_fake': sum_loss_D_fake, 365 | 'z': sum_z, 366 | 'x': sum_x 367 | } 368 | } 369 | 370 | 371 | def train_ds(): 372 | """Summary 373 | 374 | Returns 375 | ------- 376 | name : TYPE 377 | Description 378 | """ 379 | init_lr_g = 1e-4 380 | init_lr_d = 1e-4 381 | n_latent = 100 382 | n_epochs = 1000000 383 | batch_size = 200 384 | n_samples = 15 385 | input_shape = [218, 178, 3] 386 | crop_shape = [64, 64, 3] 387 | crop_factor = 0.8 388 | 389 | from libs.dataset_utils import create_input_pipeline 390 | from libs.datasets import CELEB 391 | 392 | files = CELEB() 393 | batch = create_input_pipeline( 394 | files=files, 395 | batch_size=batch_size, 396 | n_epochs=n_epochs, 397 | crop_shape=crop_shape, 398 | crop_factor=crop_factor, 399 | shape=input_shape) 400 | 401 | gan = GAN(input_shape=[None] + crop_shape, n_features=10, 402 | n_latent=n_latent, rgb=True, debug=False) 403 | 404 | vars_d = [v for v in tf.trainable_variables() 405 | if v.name.startswith('discriminator')] 406 | print('Training discriminator variables:') 407 | [print(v.name) for v in tf.trainable_variables() 408 | if v.name.startswith('discriminator')] 409 | 410 | vars_g = [v for v in tf.trainable_variables() 411 | if v.name.startswith('generator')] 412 | print('Training generator variables:') 413 | [print(v.name) for v in tf.trainable_variables() 414 | if v.name.startswith('generator')] 415 | zs = np.random.uniform( 416 | -1.0, 1.0, [4, n_latent]).astype(np.float32) 417 | zs = make_latent_manifold(zs, n_samples) 418 | 419 | lr_g = tf.placeholder(tf.float32, shape=[], name='learning_rate_g') 420 | lr_d = tf.placeholder(tf.float32, shape=[], name='learning_rate_d') 421 | 422 | try: 423 | from tf.contrib.layers import apply_regularization 424 | d_reg = apply_regularization( 425 | tf.contrib.layers.l2_regularizer(1e-6), vars_d) 426 | g_reg = apply_regularization( 427 | tf.contrib.layers.l2_regularizer(1e-6), vars_g) 428 | except: 429 | d_reg, g_reg = 0, 0 430 | 431 | opt_g = tf.train.AdamOptimizer(lr_g, name='Adam_g').minimize( 432 | gan['loss_G'] + g_reg, var_list=vars_g) 433 | opt_d = tf.train.AdamOptimizer(lr_d, name='Adam_d').minimize( 434 | gan['loss_D'] + d_reg, var_list=vars_d) 435 | 436 | # %% 437 | # We create a session to use the graph 438 | sess = tf.Session() 439 | init_op = tf.initialize_all_variables() 440 | 441 | saver = tf.train.Saver() 442 | sums = gan['sums'] 443 | G_sum_op = tf.merge_summary([ 444 | sums['G'], sums['loss_G'], sums['z'], 445 | sums['loss_D_fake'], sums['D_fake']]) 446 | D_sum_op = tf.merge_summary([ 447 | sums['loss_D'], sums['loss_D_real'], sums['loss_D_fake'], 448 | sums['z'], sums['x'], sums['D_real'], sums['D_fake']]) 449 | writer = tf.train.SummaryWriter("./logs", sess.graph_def) 450 | 451 | coord = tf.train.Coordinator() 452 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 453 | sess.run(init_op) 454 | # g = tf.get_default_graph() 455 | # [print(op.name) for op in g.get_operations()] 456 | 457 | if os.path.exists("gan.ckpt"): 458 | saver.restore(sess, "gan.ckpt") 459 | print("GAN model restored.") 460 | 461 | fig, ax = plt.subplots(1, 1, figsize=(10, 10)) 462 | step_i, t_i = 0, 0 463 | loss_d = 1 464 | loss_g = 1 465 | n_loss_d, total_loss_d = 1, 1 466 | n_loss_g, total_loss_g = 1, 1 467 | try: 468 | while not coord.should_stop(): 469 | batch_xs = sess.run(batch) 470 | step_i += 1 471 | batch_zs = np.random.uniform( 472 | -1.0, 1.0, [batch_size, n_latent]).astype(np.float32) 473 | 474 | this_lr_g = min(1e-2, max(1e-6, init_lr_g * (loss_g / loss_d)**2)) 475 | this_lr_d = min(1e-2, max(1e-6, init_lr_d * (loss_d / loss_g)**2)) 476 | # this_lr_d *= ((1.0 - (step_i / 100000)) ** 2) 477 | # this_lr_g *= ((1.0 - (step_i / 100000)) ** 2) 478 | 479 | # if np.random.random() > (loss_g / (loss_d + loss_g)): 480 | if step_i % 3 == 1: 481 | loss_d, _, sum_d = sess.run([gan['loss_D'], opt_d, D_sum_op], 482 | feed_dict={gan['x']: batch_xs, 483 | gan['z']: batch_zs, 484 | gan['train']: True, 485 | lr_d: this_lr_d}) 486 | total_loss_d += loss_d 487 | n_loss_d += 1 488 | writer.add_summary(sum_d, step_i) 489 | print('%04d d* = lr: %0.08f, loss: %08.06f, \t' % 490 | (step_i, this_lr_d, loss_d) + 491 | 'g = lr: %0.08f, loss: %08.06f' % (this_lr_g, loss_g)) 492 | else: 493 | loss_g, _, sum_g = sess.run([gan['loss_G'], opt_g, G_sum_op], 494 | feed_dict={gan['z']: batch_zs, 495 | gan['train']: True, 496 | lr_g: this_lr_g}) 497 | total_loss_g += loss_g 498 | n_loss_g += 1 499 | writer.add_summary(sum_g, step_i) 500 | print('%04d d = lr: %0.08f, loss: %08.06f, \t' % 501 | (step_i, this_lr_d, loss_d) + 502 | 'g* = lr: %0.08f, loss: %08.06f' % (this_lr_g, loss_g)) 503 | 504 | if step_i % 100 == 0: 505 | samples = sess.run(gan['G'], feed_dict={ 506 | gan['z']: zs, 507 | gan['train']: False}) 508 | montage(np.clip((samples + 1) * 127.5, 0, 255).astype(np.uint8), 509 | 'imgs/gan_%08d.png' % t_i) 510 | t_i += 1 511 | 512 | print('generator loss:', total_loss_g / n_loss_g) 513 | print('discriminator loss:', total_loss_d / n_loss_d) 514 | 515 | # Save the variables to disk. 516 | save_path = saver.save(sess, "./gan.ckpt", 517 | global_step=step_i, 518 | write_meta_graph=False) 519 | print("Model saved in file: %s" % save_path) 520 | except tf.errors.OutOfRangeError: 521 | print('Done training -- epoch limit reached') 522 | finally: 523 | # One of the threads has issued an exception. So let's tell all the 524 | # threads to shutdown. 525 | coord.request_stop() 526 | 527 | # Wait until all threads have finished. 528 | coord.join(threads) 529 | 530 | # Clean up the session. 531 | sess.close() 532 | 533 | 534 | if __name__ == '__main__': 535 | train_ds() 536 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/gif.py: -------------------------------------------------------------------------------- 1 | """Utility for creating a GIF. 2 | 3 | Creative Applications of Deep Learning w/ Tensorflow. 4 | Kadenze, Inc. 5 | Copyright Parag K. Mital, June 2016. 6 | """ 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import matplotlib.animation as animation 10 | 11 | 12 | def build_gif(imgs, interval=0.1, dpi=72, 13 | save_gif=True, saveto='animation.gif', 14 | show_gif=False, cmap=None): 15 | """Take an array or list of images and create a GIF. 16 | 17 | Parameters 18 | ---------- 19 | imgs : np.ndarray or list 20 | List of images to create a GIF of 21 | interval : float, optional 22 | Spacing in seconds between successive images. 23 | dpi : int, optional 24 | Dots per inch. 25 | save_gif : bool, optional 26 | Whether or not to save the GIF. 27 | saveto : str, optional 28 | Filename of GIF to save. 29 | show_gif : bool, optional 30 | Whether or not to render the GIF using plt. 31 | cmap : None, optional 32 | Optional colormap to apply to the images. 33 | 34 | Returns 35 | ------- 36 | ani : matplotlib.animation.ArtistAnimation 37 | The artist animation from matplotlib. Likely not useful. 38 | """ 39 | imgs = np.asarray(imgs) 40 | h, w, *c = imgs[0].shape 41 | fig, ax = plt.subplots(figsize=(np.round(w / dpi), np.round(h / dpi))) 42 | fig.subplots_adjust(bottom=0) 43 | fig.subplots_adjust(top=1) 44 | fig.subplots_adjust(right=1) 45 | fig.subplots_adjust(left=0) 46 | ax.set_axis_off() 47 | 48 | if cmap is not None: 49 | axs = list(map(lambda x: [ 50 | ax.imshow(x, cmap=cmap)], imgs)) 51 | else: 52 | axs = list(map(lambda x: [ 53 | ax.imshow(x)], imgs)) 54 | 55 | ani = animation.ArtistAnimation( 56 | fig, axs, interval=interval*1000, repeat_delay=0, blit=True) 57 | 58 | if save_gif: 59 | ani.save(saveto, writer='imagemagick', dpi=dpi) 60 | 61 | if show_gif: 62 | plt.show() 63 | 64 | return ani 65 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/i2v.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creative Applications of Deep Learning w/ Tensorflow. 3 | Kadenze, Inc. 4 | Copyright Parag K. Mital, June 2016. 5 | """ 6 | import json 7 | import numpy as np 8 | from tensorflow.python.platform import gfile 9 | import tensorflow as tf 10 | import matplotlib.pyplot as plt 11 | from skimage.transform import resize as imresize 12 | from .utils import download 13 | 14 | 15 | def i2v_download(): 16 | """Download a pretrained i2v network.""" 17 | model = download('https://s3.amazonaws.com/cadl/models/illust2vec.tfmodel') 18 | return model 19 | 20 | 21 | def i2v_tag_download(): 22 | """Download a pretrained i2v network.""" 23 | model = download('https://s3.amazonaws.com/cadl/models/illust2vec_tag.tfmodel') 24 | tags = download('https://s3.amazonaws.com/cadl/models/tag_list.json') 25 | return model, tags 26 | 27 | 28 | def get_i2v_model(): 29 | """Get a pretrained i2v network. 30 | 31 | Returns 32 | ------- 33 | net : dict 34 | {'graph_def': graph_def, 'labels': synsets} 35 | where the graph_def is a tf.GraphDef and the synsets 36 | map an integer label from 0-1000 to a list of names 37 | """ 38 | # Download the trained net 39 | model = i2v_download() 40 | 41 | # Load the saved graph 42 | with gfile.GFile(model, 'rb') as f: 43 | graph_def = tf.GraphDef() 44 | try: 45 | graph_def.ParseFromString(f.read()) 46 | except: 47 | print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' + 48 | 'to environment. e.g.:\n' + 49 | 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' + 50 | 'See here for info: ' + 51 | 'https://github.com/tensorflow/tensorflow/issues/582') 52 | 53 | return {'graph_def': graph_def} 54 | 55 | 56 | def get_i2v_tag_model(): 57 | """Get a pretrained i2v tag network. 58 | 59 | Returns 60 | ------- 61 | net : dict 62 | {'graph_def': graph_def, 'labels': synsets} 63 | where the graph_def is a tf.GraphDef and the synsets 64 | map an integer label from 0-1000 to a list of names 65 | """ 66 | # Download the trained net 67 | model, tags = i2v_tag_download() 68 | tags = json.load(open(tags, 'r')) 69 | 70 | # Load the saved graph 71 | with gfile.GFile(model, 'rb') as f: 72 | graph_def = tf.GraphDef() 73 | try: 74 | graph_def.ParseFromString(f.read()) 75 | except: 76 | print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' + 77 | 'to environment. e.g.:\n' + 78 | 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' + 79 | 'See here for info: ' + 80 | 'https://github.com/tensorflow/tensorflow/issues/582') 81 | 82 | return { 83 | 'graph_def': graph_def, 84 | 'labels': tags, 85 | 'preprocess': preprocess, 86 | 'deprocess': deprocess 87 | } 88 | 89 | 90 | def preprocess(img, crop=True, resize=True, dsize=(224, 224)): 91 | mean_img = np.array([164.76139251, 167.47864617, 181.13838569]) 92 | if img.dtype == np.uint8: 93 | img = (img[..., ::-1] - mean_img).astype(np.float32) 94 | else: 95 | img = img[..., ::-1] * 255.0 - mean_img 96 | 97 | if crop: 98 | short_edge = min(img.shape[:2]) 99 | yy = int((img.shape[0] - short_edge) / 2) 100 | xx = int((img.shape[1] - short_edge) / 2) 101 | crop_img = img[yy: yy + short_edge, xx: xx + short_edge] 102 | else: 103 | crop_img = img 104 | if resize: 105 | norm_img = imresize(crop_img, dsize, preserve_range=True) 106 | else: 107 | norm_img = crop_img 108 | return (norm_img).astype(np.float32) 109 | 110 | 111 | def deprocess(img): 112 | mean_img = np.array([164.76139251, 167.47864617, 181.13838569]) 113 | processed = (img + mean_img)[..., ::-1] 114 | return np.clip(processed, 0, 255).astype(np.uint8) 115 | # return ((img / np.max(np.abs(img))) * 127.5 + 116 | # 127.5).astype(np.uint8) 117 | 118 | 119 | def test_i2v(): 120 | """Loads the i2v network and applies it to a test image. 121 | """ 122 | with tf.Session() as sess: 123 | net = get_i2v_model() 124 | tf.import_graph_def(net['graph_def'], name='i2v') 125 | g = tf.get_default_graph() 126 | names = [op.name for op in g.get_operations()] 127 | x = g.get_tensor_by_name(names[0] + ':0') 128 | softmax = g.get_tensor_by_name(names[-3] + ':0') 129 | 130 | from skimage import data 131 | img = preprocess(data.coffee())[np.newaxis] 132 | res = np.squeeze(softmax.eval(feed_dict={x: img})) 133 | print([(res[idx], net['labels'][idx]) 134 | for idx in res.argsort()[-5:][::-1]]) 135 | 136 | """Let's visualize the network's gradient activation 137 | when backpropagated to the original input image. This 138 | is effectively telling us which pixels contribute to the 139 | predicted class or given neuron""" 140 | pools = [name for name in names if 'pool' in name.split('/')[-1]] 141 | fig, axs = plt.subplots(1, len(pools)) 142 | for pool_i, poolname in enumerate(pools): 143 | pool = g.get_tensor_by_name(poolname + ':0') 144 | pool.get_shape() 145 | neuron = tf.reduce_max(pool, 1) 146 | saliency = tf.gradients(neuron, x) 147 | neuron_idx = tf.arg_max(pool, 1) 148 | this_res = sess.run([saliency[0], neuron_idx], 149 | feed_dict={x: img}) 150 | 151 | grad = this_res[0][0] / np.max(np.abs(this_res[0])) 152 | axs[pool_i].imshow((grad * 128 + 128).astype(np.uint8)) 153 | axs[pool_i].set_title(poolname) 154 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/inception.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creative Applications of Deep Learning w/ Tensorflow. 3 | Kadenze, Inc. 4 | Copyright Parag K. Mital, June 2016. 5 | """ 6 | import os 7 | import numpy as np 8 | from tensorflow.python.platform import gfile 9 | import tensorflow as tf 10 | import matplotlib.pyplot as plt 11 | from skimage.transform import resize as imresize 12 | from .utils import download_and_extract_tar, download_and_extract_zip 13 | 14 | 15 | def inception_download(data_dir='inception', version='v5'): 16 | """Download a pretrained inception network. 17 | 18 | Parameters 19 | ---------- 20 | data_dir : str, optional 21 | Location of the pretrained inception network download. 22 | version : str, optional 23 | Version of the model: ['v3'] or 'v5'. 24 | """ 25 | if version == 'v3': 26 | download_and_extract_tar( 27 | 'https://s3.amazonaws.com/cadl/models/inception-2015-12-05.tgz', 28 | data_dir) 29 | return (os.path.join(data_dir, 'classify_image_graph_def.pb'), 30 | os.path.join(data_dir, 'imagenet_synset_to_human_label_map.txt')) 31 | else: 32 | download_and_extract_zip( 33 | 'https://s3.amazonaws.com/cadl/models/inception5h.zip', data_dir) 34 | return (os.path.join(data_dir, 'tensorflow_inception_graph.pb'), 35 | os.path.join(data_dir, 'imagenet_comp_graph_label_strings.txt')) 36 | 37 | 38 | def get_inception_model(data_dir='inception', version='v5'): 39 | """Get a pretrained inception network. 40 | 41 | Parameters 42 | ---------- 43 | data_dir : str, optional 44 | Location of the pretrained inception network download. 45 | version : str, optional 46 | Version of the model: ['v3'] or 'v5'. 47 | 48 | Returns 49 | ------- 50 | net : dict 51 | {'graph_def': graph_def, 'labels': synsets} 52 | where the graph_def is a tf.GraphDef and the synsets 53 | map an integer label from 0-1000 to a list of names 54 | """ 55 | # Download the trained net 56 | model, labels = inception_download(data_dir, version) 57 | 58 | # Parse the ids and synsets 59 | txt = open(labels).readlines() 60 | synsets = [(key, val.strip()) for key, val in enumerate(txt)] 61 | 62 | # Load the saved graph 63 | with gfile.GFile(model, 'rb') as f: 64 | graph_def = tf.GraphDef() 65 | try: 66 | graph_def.ParseFromString(f.read()) 67 | except: 68 | print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' + 69 | 'to environment. e.g.:\n' + 70 | 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' + 71 | 'See here for info: ' + 72 | 'https://github.com/tensorflow/tensorflow/issues/582') 73 | return { 74 | 'graph_def': graph_def, 75 | 'labels': synsets, 76 | 'preprocess': preprocess, 77 | 'deprocess': deprocess 78 | } 79 | 80 | 81 | def preprocess(img, crop=True, resize=True, dsize=(299, 299)): 82 | if img.dtype != np.uint8: 83 | img *= 255.0 84 | 85 | if crop: 86 | crop = np.min(img.shape[:2]) 87 | r = (img.shape[0] - crop) // 2 88 | c = (img.shape[1] - crop) // 2 89 | cropped = img[r: r + crop, c: c + crop] 90 | else: 91 | cropped = img 92 | 93 | if resize: 94 | rsz = imresize(cropped, dsize, preserve_range=True) 95 | else: 96 | rsz = cropped 97 | 98 | if rsz.ndim == 2: 99 | rsz = rsz[..., np.newaxis] 100 | 101 | rsz = rsz.astype(np.float32) 102 | # subtract imagenet mean 103 | return (rsz - 117) 104 | 105 | 106 | def deprocess(img): 107 | return np.clip(img + 117, 0, 255).astype(np.uint8) 108 | 109 | 110 | def test_inception(): 111 | """Loads the inception network and applies it to a test image. 112 | """ 113 | with tf.Session() as sess: 114 | net = get_inception_model() 115 | tf.import_graph_def(net['graph_def'], name='inception') 116 | g = tf.get_default_graph() 117 | names = [op.name for op in g.get_operations()] 118 | x = g.get_tensor_by_name(names[0] + ':0') 119 | softmax = g.get_tensor_by_name(names[-3] + ':0') 120 | 121 | from skimage import data 122 | img = preprocess(data.coffee())[np.newaxis] 123 | res = np.squeeze(softmax.eval(feed_dict={x: img})) 124 | print([(res[idx], net['labels'][idx]) 125 | for idx in res.argsort()[-5:][::-1]]) 126 | 127 | """Let's visualize the network's gradient activation 128 | when backpropagated to the original input image. This 129 | is effectively telling us which pixels contribute to the 130 | predicted class or given neuron""" 131 | pools = [name for name in names if 'pool' in name.split('/')[-1]] 132 | fig, axs = plt.subplots(1, len(pools)) 133 | for pool_i, poolname in enumerate(pools): 134 | pool = g.get_tensor_by_name(poolname + ':0') 135 | pool.get_shape() 136 | neuron = tf.reduce_max(pool, 1) 137 | saliency = tf.gradients(neuron, x) 138 | neuron_idx = tf.arg_max(pool, 1) 139 | this_res = sess.run([saliency[0], neuron_idx], 140 | feed_dict={x: img}) 141 | 142 | grad = this_res[0][0] / np.max(np.abs(this_res[0])) 143 | axs[pool_i].imshow((grad * 128 + 128).astype(np.uint8)) 144 | axs[pool_i].set_title(poolname) 145 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/nb_utils.py: -------------------------------------------------------------------------------- 1 | """Utility for displaying Tensorflow graphs from: 2 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb 3 | """ 4 | import tensorflow as tf 5 | import numpy as np 6 | from IPython.display import display, HTML 7 | 8 | 9 | def show_graph(graph_def): 10 | # Helper functions for TF Graph visualization 11 | def _strip_consts(graph_def, max_const_size=32): 12 | """Strip large constant values from graph_def.""" 13 | strip_def = tf.GraphDef() 14 | for n0 in graph_def.node: 15 | n = strip_def.node.add() 16 | n.MergeFrom(n0) 17 | if n.op == 'Const': 18 | tensor = n.attr['value'].tensor 19 | size = len(tensor.tensor_content) 20 | if size > max_const_size: 21 | tensor.tensor_content = "".format(size).encode() 22 | return strip_def 23 | 24 | def _rename_nodes(graph_def, rename_func): 25 | res_def = tf.GraphDef() 26 | for n0 in graph_def.node: 27 | n = res_def.node.add() 28 | n.MergeFrom(n0) 29 | n.name = rename_func(n.name) 30 | for i, s in enumerate(n.input): 31 | n.input[i] = rename_func(s) if s[0] != '^' else '^' + rename_func(s[1:]) 32 | return res_def 33 | 34 | def _show_entire_graph(graph_def, max_const_size=32): 35 | """Visualize TensorFlow graph.""" 36 | if hasattr(graph_def, 'as_graph_def'): 37 | graph_def = graph_def.as_graph_def() 38 | strip_def = _strip_consts(graph_def, max_const_size=max_const_size) 39 | code = """ 40 | 45 | 46 |
47 | 48 |
49 | """.format(data=repr(str(strip_def)), id='graph' + str(np.random.rand())) 50 | 51 | iframe = """ 52 | 53 | """.format(code.replace('"', '"')) 54 | display(HTML(iframe)) 55 | # Visualizing the network graph. Be sure expand the "mixed" nodes to see their 56 | # internal structure. We are going to visualize "Conv2D" nodes. 57 | tmp_def = _rename_nodes(graph_def, lambda s: "/".join(s.split('_', 1))) 58 | _show_entire_graph(tmp_def) 59 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/stylenet.py: -------------------------------------------------------------------------------- 1 | """Style Net w/ tests for Video Style Net. 2 | 3 | Video Style Net requires OpenCV 3.0.0+ w/ Contrib for Python to be installed. 4 | 5 | Creative Applications of Deep Learning w/ Tensorflow. 6 | Kadenze, Inc. 7 | Copyright Parag K. Mital, June 2016. 8 | """ 9 | import tensorflow as tf 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import os 13 | from . import vgg16 14 | from . import gif 15 | 16 | 17 | def make_4d(img): 18 | """Create a 4-dimensional N x H x W x C image. 19 | 20 | Parameters 21 | ---------- 22 | img : np.ndarray 23 | Given image as H x W x C or H x W. 24 | 25 | Returns 26 | ------- 27 | img : np.ndarray 28 | N x H x W x C image. 29 | 30 | Raises 31 | ------ 32 | ValueError 33 | Unexpected number of dimensions. 34 | """ 35 | if img.ndim == 2: 36 | img = np.expand_dims(img[np.newaxis], 3) 37 | elif img.ndim == 3: 38 | img = img[np.newaxis] 39 | elif img.ndim == 4: 40 | return img 41 | else: 42 | raise ValueError('Incorrect dimensions for image!') 43 | return img 44 | 45 | 46 | def stylize(content_img, style_img, base_img=None, saveto=None, gif_step=5, 47 | n_iterations=100, style_weight=1.0, content_weight=1.0): 48 | """Stylization w/ the given content and style images. 49 | 50 | Follows the approach in Leon Gatys et al. 51 | 52 | Parameters 53 | ---------- 54 | content_img : np.ndarray 55 | Image to use for finding the content features. 56 | style_img : TYPE 57 | Image to use for finding the style features. 58 | base_img : None, optional 59 | Image to use for the base content. Can be noise or an existing image. 60 | If None, the content image will be used. 61 | saveto : str, optional 62 | Name of GIF image to write to, e.g. "stylization.gif" 63 | gif_step : int, optional 64 | Modulo of iterations to save the current stylization. 65 | n_iterations : int, optional 66 | Number of iterations to run for. 67 | style_weight : float, optional 68 | Weighting on the style features. 69 | content_weight : float, optional 70 | Weighting on the content features. 71 | 72 | Returns 73 | ------- 74 | stylization : np.ndarray 75 | Final iteration of the stylization. 76 | """ 77 | # Preprocess both content and style images 78 | content_img = make_4d(content_img) 79 | style_img = make_4d(style_img) 80 | if base_img is None: 81 | base_img = content_img 82 | else: 83 | base_img = make_4d(base_img) 84 | 85 | # Get Content and Style features 86 | net = vgg16.get_vgg_model() 87 | g = tf.Graph() 88 | with tf.Session(graph=g) as sess: 89 | tf.import_graph_def(net['graph_def'], name='vgg') 90 | names = [op.name for op in g.get_operations()] 91 | x = g.get_tensor_by_name(names[0] + ':0') 92 | content_layer = 'vgg/conv3_2/conv3_2:0' 93 | content_features = g.get_tensor_by_name( 94 | content_layer).eval(feed_dict={ 95 | x: content_img, 96 | 'vgg/dropout_1/random_uniform:0': [[1.0]], 97 | 'vgg/dropout/random_uniform:0': [[1.0]]}) 98 | style_layers = ['vgg/conv1_1/conv1_1:0', 99 | 'vgg/conv2_1/conv2_1:0', 100 | 'vgg/conv3_1/conv3_1:0', 101 | 'vgg/conv4_1/conv4_1:0', 102 | 'vgg/conv5_1/conv5_1:0'] 103 | style_activations = [] 104 | for style_i in style_layers: 105 | style_activation_i = g.get_tensor_by_name(style_i).eval( 106 | feed_dict={ 107 | x: style_img, 108 | 'vgg/dropout_1/random_uniform:0': [[1.0]], 109 | 'vgg/dropout/random_uniform:0': [[1.0]]}) 110 | style_activations.append(style_activation_i) 111 | style_features = [] 112 | for style_activation_i in style_activations: 113 | s_i = np.reshape(style_activation_i, 114 | [-1, style_activation_i.shape[-1]]) 115 | gram_matrix = np.matmul(s_i.T, s_i) / s_i.size 116 | style_features.append(gram_matrix.astype(np.float32)) 117 | 118 | # Optimize both 119 | g = tf.Graph() 120 | with tf.Session(graph=g) as sess: 121 | net_input = tf.Variable(base_img) 122 | tf.import_graph_def( 123 | net['graph_def'], 124 | name='vgg', 125 | input_map={'images:0': net_input}) 126 | 127 | content_loss = tf.nn.l2_loss((g.get_tensor_by_name(content_layer) - 128 | content_features) / 129 | content_features.size) 130 | style_loss = np.float32(0.0) 131 | for style_layer_i, style_gram_i in zip(style_layers, style_features): 132 | layer_i = g.get_tensor_by_name(style_layer_i) 133 | layer_shape = layer_i.get_shape().as_list() 134 | layer_size = layer_shape[1] * layer_shape[2] * layer_shape[3] 135 | layer_flat = tf.reshape(layer_i, [-1, layer_shape[3]]) 136 | gram_matrix = tf.matmul( 137 | tf.transpose(layer_flat), layer_flat) / layer_size 138 | style_loss = tf.add( 139 | style_loss, tf.nn.l2_loss( 140 | (gram_matrix - style_gram_i) / 141 | np.float32(style_gram_i.size))) 142 | loss = content_weight * content_loss + style_weight * style_loss 143 | optimizer = tf.train.AdamOptimizer(0.01).minimize(loss) 144 | 145 | sess.run(tf.initialize_all_variables()) 146 | imgs = [] 147 | for it_i in range(n_iterations): 148 | _, this_loss, synth = sess.run( 149 | [optimizer, loss, net_input], 150 | feed_dict={ 151 | 'vgg/dropout_1/random_uniform:0': np.ones( 152 | g.get_tensor_by_name( 153 | 'vgg/dropout_1/random_uniform:0' 154 | ).get_shape().as_list()), 155 | 'vgg/dropout/random_uniform:0': np.ones( 156 | g.get_tensor_by_name( 157 | 'vgg/dropout/random_uniform:0' 158 | ).get_shape().as_list()) 159 | }) 160 | print("iteration %d, loss: %f, range: (%f - %f)" % 161 | (it_i, this_loss, np.min(synth), np.max(synth)), end='\r') 162 | if it_i % gif_step == 0: 163 | imgs.append(np.clip(synth[0], 0, 1)) 164 | if saveto is not None: 165 | gif.build_gif(imgs, saveto=saveto) 166 | return np.clip(synth[0], 0, 1) 167 | 168 | 169 | def warp_img(img, dx, dy): 170 | """Apply the motion vectors to the given image. 171 | 172 | Parameters 173 | ---------- 174 | img : np.ndarray 175 | Input image to apply motion to. 176 | dx : np.ndarray 177 | H x W matrix defining the magnitude of the X vector 178 | dy : np.ndarray 179 | H x W matrix defining the magnitude of the Y vector 180 | 181 | Returns 182 | ------- 183 | img : np.ndarray 184 | Image with pixels warped according to dx, dy. 185 | """ 186 | warped = img.copy() 187 | for row_i in range(img.shape[0]): 188 | for col_i in range(img.shape[1]): 189 | dx_i = int(np.round(dx[row_i, col_i])) 190 | dy_i = int(np.round(dy[row_i, col_i])) 191 | sample_dx = np.clip(dx_i + col_i, 0, img.shape[0] - 1) 192 | sample_dy = np.clip(dy_i + row_i, 0, img.shape[1] - 1) 193 | warped[sample_dy, sample_dx, :] = img[row_i, col_i, :] 194 | return warped 195 | 196 | 197 | def test_video(style_img='arles.jpg', videodir='kurosawa'): 198 | r"""Test for artistic stylization using video. 199 | 200 | This requires the python installation of OpenCV for the Deep Flow algorithm. 201 | If cv2 is not found, then there will be reduced "temporal coherence". 202 | 203 | Unfortunately, installing opencv for python3 is not the easiest thing to do. 204 | OSX users can install this using: 205 | 206 | $ brew install opencv --with-python3 --with-contrib 207 | 208 | then will have to symlink the libraries. I think you can do this w/: 209 | 210 | $ brew link --force opencv3 211 | 212 | But the problems start to arise depending on which python you have 213 | installed, and it is always a mess w/ homebrew. Sorry! 214 | 215 | Your best bet is installing from source. Something along 216 | these lines should get you there: 217 | 218 | $ cd ~ 219 | $ git clone https://github.com/Itseez/opencv.git 220 | $ cd opencv 221 | $ git checkout 3.1.0 222 | $ cd ~ 223 | $ git clone https://github.com/Itseez/opencv_contrib.git 224 | $ cd opencv_contrib 225 | $ git checkout 3.1.0 226 | $ cd ~/opencv 227 | $ mkdir build 228 | $ cd build 229 | $ cmake -D CMAKE_BUILD_TYPE=RELEASE \ 230 | -D CMAKE_INSTALL_PREFIX=/usr/local \ 231 | -D INSTALL_C_EXAMPLES=OFF \ 232 | -D INSTALL_PYTHON_EXAMPLES=OFF \ 233 | -D OPENCV_EXTRA_MODULES_PATH=~/opencv_contrib/modules \ 234 | -D BUILD_EXAMPLES=OFF .. 235 | 236 | Parameters 237 | ---------- 238 | style_img : str, optional 239 | Location to style image 240 | videodir : str, optional 241 | Location to directory containing images of each frame to stylize. 242 | 243 | Returns 244 | ------- 245 | imgs : list of np.ndarray 246 | Stylized images for each frame. 247 | """ 248 | has_cv2 = True 249 | try: 250 | import cv2 251 | has_cv2 = True 252 | optflow = cv2.optflow.createOptFlow_DeepFlow() 253 | except ImportError: 254 | has_cv2 = False 255 | 256 | style_img = plt.imread(style_img) 257 | content_files = [os.path.join(videodir, f) 258 | for f in os.listdir(videodir) if f.endswith('.png')] 259 | content_img = plt.imread(content_files[0]) 260 | from scipy.misc import imresize 261 | style_img = imresize(style_img, (448, 448)).astype(np.float32) / 255.0 262 | content_img = imresize(content_img, (448, 448)).astype(np.float32) / 255.0 263 | if has_cv2: 264 | prev_lum = cv2.cvtColor(content_img, cv2.COLOR_RGB2HSV)[:, :, 2] 265 | else: 266 | prev_lum = (content_img[..., 0] * 0.3 + 267 | content_img[..., 1] * 0.59 + 268 | content_img[..., 2] * 0.11) 269 | imgs = [] 270 | stylized = stylize(content_img, style_img, content_weight=5.0, 271 | style_weight=0.5, n_iterations=50) 272 | plt.imsave(fname=content_files[0] + 'stylized.png', arr=stylized) 273 | imgs.append(stylized) 274 | for f in content_files[1:]: 275 | content_img = plt.imread(f) 276 | content_img = imresize(content_img, (448, 448)).astype(np.float32) / 255.0 277 | if has_cv2: 278 | lum = cv2.cvtColor(content_img, cv2.COLOR_RGB2HSV)[:, :, 2] 279 | flow = optflow.calc(prev_lum, lum, None) 280 | warped = warp_img(stylized, flow[..., 0], flow[..., 1]) 281 | stylized = stylize(content_img, style_img, content_weight=5.0, 282 | style_weight=0.5, base_img=warped, n_iterations=50) 283 | else: 284 | lum = (content_img[..., 0] * 0.3 + 285 | content_img[..., 1] * 0.59 + 286 | content_img[..., 2] * 0.11) 287 | stylized = stylize(content_img, style_img, content_weight=5.0, 288 | style_weight=0.5, base_img=None, n_iterations=50) 289 | imgs.append(stylized) 290 | plt.imsave(fname=f + 'stylized.png', arr=stylized) 291 | prev_lum = lum 292 | return imgs 293 | 294 | 295 | def test(): 296 | """Test for artistic stylization.""" 297 | from six.moves import urllib 298 | f = ('https://upload.wikimedia.org/wikipedia/commons/thumb/5/54/' + 299 | 'Claude_Monet%2C_Impression%2C_soleil_levant.jpg/617px-Claude_Monet' + 300 | '%2C_Impression%2C_soleil_levant.jpg?download') 301 | filepath, _ = urllib.request.urlretrieve(f, f.split('/')[-1], None) 302 | style = plt.imread(filepath) 303 | 304 | f = ('https://upload.wikimedia.org/wikipedia/commons/thumb/a/ae/' + 305 | 'El_jard%C3%ADn_de_las_Delicias%2C_de_El_Bosco.jpg/640px-El_jard' + 306 | '%C3%ADn_de_las_Delicias%2C_de_El_Bosco.jpg') 307 | filepath, _ = urllib.request.urlretrieve(f, f.split('/')[-1], None) 308 | content = plt.imread(filepath) 309 | 310 | stylize(content, style) 311 | 312 | 313 | if __name__ == '__main__': 314 | test_video() 315 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/utils.py: -------------------------------------------------------------------------------- 1 | """Utilities used in the Kadenze Academy Course on Deep Learning w/ Tensorflow. 2 | 3 | Creative Applications of Deep Learning w/ Tensorflow. 4 | Kadenze, Inc. 5 | Parag K. Mital 6 | 7 | Copyright Parag K. Mital, June 2016. 8 | """ 9 | import matplotlib.pyplot as plt 10 | import tensorflow as tf 11 | import urllib 12 | import numpy as np 13 | import zipfile 14 | import os 15 | from scipy.io import wavfile 16 | 17 | 18 | def download(path): 19 | """Use urllib to download a file. 20 | 21 | Parameters 22 | ---------- 23 | path : str 24 | Url to download 25 | 26 | Returns 27 | ------- 28 | path : str 29 | Location of downloaded file. 30 | """ 31 | import os 32 | from six.moves import urllib 33 | 34 | fname = path.split('/')[-1] 35 | if os.path.exists(fname): 36 | return fname 37 | 38 | print('Downloading ' + path) 39 | 40 | def progress(count, block_size, total_size): 41 | if count % 20 == 0: 42 | print('Downloaded %02.02f/%02.02f MB' % ( 43 | count * block_size / 1024.0 / 1024.0, 44 | total_size / 1024.0 / 1024.0), end='\r') 45 | 46 | filepath, _ = urllib.request.urlretrieve( 47 | path, filename=fname, reporthook=progress) 48 | return filepath 49 | 50 | 51 | def download_and_extract_tar(path, dst): 52 | """Download and extract a tar file. 53 | 54 | Parameters 55 | ---------- 56 | path : str 57 | Url to tar file to download. 58 | dst : str 59 | Location to save tar file contents. 60 | """ 61 | import tarfile 62 | filepath = download(path) 63 | if not os.path.exists(dst): 64 | os.makedirs(dst) 65 | tarfile.open(filepath, 'r:gz').extractall(dst) 66 | 67 | 68 | def download_and_extract_zip(path, dst): 69 | """Download and extract a zip file. 70 | 71 | Parameters 72 | ---------- 73 | path : str 74 | Url to zip file to download. 75 | dst : str 76 | Location to save zip file contents. 77 | """ 78 | import zipfile 79 | filepath = download(path) 80 | if not os.path.exists(dst): 81 | os.makedirs(dst) 82 | zf = zipfile.ZipFile(file=filepath) 83 | zf.extractall(dst) 84 | 85 | 86 | def load_audio(filename, b_normalize=True): 87 | """Load the audiofile at the provided filename using scipy.io.wavfile. 88 | 89 | Optionally normalizes the audio to the maximum value. 90 | 91 | Parameters 92 | ---------- 93 | filename : str 94 | File to load. 95 | b_normalize : bool, optional 96 | Normalize to the maximum value. 97 | """ 98 | sr, s = wavfile.read(filename) 99 | if b_normalize: 100 | s = s.astype(np.float32) 101 | s = (s / np.max(np.abs(s))) 102 | s -= np.mean(s) 103 | return s 104 | 105 | 106 | def corrupt(x): 107 | """Take an input tensor and add uniform masking. 108 | 109 | Parameters 110 | ---------- 111 | x : Tensor/Placeholder 112 | Input to corrupt. 113 | Returns 114 | ------- 115 | x_corrupted : Tensor 116 | 50 pct of values corrupted. 117 | """ 118 | return tf.mul(x, tf.cast(tf.random_uniform(shape=tf.shape(x), 119 | minval=0, 120 | maxval=2, 121 | dtype=tf.int32), tf.float32)) 122 | 123 | 124 | def interp(l, r, n_samples): 125 | """Intepolate between the arrays l and r, n_samples times. 126 | 127 | Parameters 128 | ---------- 129 | l : np.ndarray 130 | Left edge 131 | r : np.ndarray 132 | Right edge 133 | n_samples : int 134 | Number of samples 135 | 136 | Returns 137 | ------- 138 | arr : np.ndarray 139 | Inteporalted array 140 | """ 141 | return np.array([ 142 | l + step_i / (n_samples - 1) * (r - l) 143 | for step_i in range(n_samples)]) 144 | 145 | 146 | def make_latent_manifold(corners, n_samples): 147 | """Create a 2d manifold out of the provided corners: n_samples * n_samples. 148 | 149 | Parameters 150 | ---------- 151 | corners : list of np.ndarray 152 | The four corners to intepolate. 153 | n_samples : int 154 | Number of samples to use in interpolation. 155 | 156 | Returns 157 | ------- 158 | arr : np.ndarray 159 | Stacked array of all 2D interpolated samples 160 | """ 161 | left = interp(corners[0], corners[1], n_samples) 162 | right = interp(corners[2], corners[3], n_samples) 163 | 164 | embedding = [] 165 | for row_i in range(n_samples): 166 | embedding.append(interp(left[row_i], right[row_i], n_samples)) 167 | return np.vstack(embedding) 168 | 169 | 170 | def imcrop_tosquare(img): 171 | """Make any image a square image. 172 | 173 | Parameters 174 | ---------- 175 | img : np.ndarray 176 | Input image to crop, assumed at least 2d. 177 | 178 | Returns 179 | ------- 180 | crop : np.ndarray 181 | Cropped image. 182 | """ 183 | size = np.min(img.shape[:2]) 184 | extra = img.shape[:2] - size 185 | crop = img 186 | for i in np.flatnonzero(extra): 187 | crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i) 188 | return crop 189 | 190 | 191 | def slice_montage(montage, img_h, img_w, n_imgs): 192 | """Slice a montage image into n_img h x w images. 193 | 194 | Performs the opposite of the montage function. Takes a montage image and 195 | slices it back into a N x H x W x C image. 196 | 197 | Parameters 198 | ---------- 199 | montage : np.ndarray 200 | Montage image to slice. 201 | img_h : int 202 | Height of sliced image 203 | img_w : int 204 | Width of sliced image 205 | n_imgs : int 206 | Number of images to slice 207 | 208 | Returns 209 | ------- 210 | sliced : np.ndarray 211 | Sliced images as 4d array. 212 | """ 213 | sliced_ds = [] 214 | for i in range(int(np.sqrt(n_imgs))): 215 | for j in range(int(np.sqrt(n_imgs))): 216 | sliced_ds.append(montage[ 217 | 1 + i + i * img_h:1 + i + (i + 1) * img_h, 218 | 1 + j + j * img_w:1 + j + (j + 1) * img_w]) 219 | return np.array(sliced_ds) 220 | 221 | 222 | def montage(images, saveto='montage.png'): 223 | """Draw all images as a montage separated by 1 pixel borders. 224 | 225 | Also saves the file to the destination specified by `saveto`. 226 | 227 | Parameters 228 | ---------- 229 | images : numpy.ndarray 230 | Input array to create montage of. Array should be: 231 | batch x height x width x channels. 232 | saveto : str 233 | Location to save the resulting montage image. 234 | 235 | Returns 236 | ------- 237 | m : numpy.ndarray 238 | Montage image. 239 | """ 240 | if isinstance(images, list): 241 | images = np.array(images) 242 | img_h = images.shape[1] 243 | img_w = images.shape[2] 244 | n_plots = int(np.ceil(np.sqrt(images.shape[0]))) 245 | if len(images.shape) == 4 and images.shape[3] == 3: 246 | m = np.ones( 247 | (images.shape[1] * n_plots + n_plots + 1, 248 | images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5 249 | else: 250 | m = np.ones( 251 | (images.shape[1] * n_plots + n_plots + 1, 252 | images.shape[2] * n_plots + n_plots + 1)) * 0.5 253 | for i in range(n_plots): 254 | for j in range(n_plots): 255 | this_filter = i * n_plots + j 256 | if this_filter < images.shape[0]: 257 | this_img = images[this_filter] 258 | m[1 + i + i * img_h:1 + i + (i + 1) * img_h, 259 | 1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img 260 | plt.imsave(arr=m, fname=saveto) 261 | return m 262 | 263 | 264 | def montage_filters(W): 265 | """Draws all filters (n_input * n_output filters) as a 266 | montage image separated by 1 pixel borders. 267 | 268 | Parameters 269 | ---------- 270 | W : Tensor 271 | Input tensor to create montage of. 272 | 273 | Returns 274 | ------- 275 | m : numpy.ndarray 276 | Montage image. 277 | """ 278 | W = np.reshape(W, [W.shape[0], W.shape[1], 1, W.shape[2] * W.shape[3]]) 279 | n_plots = int(np.ceil(np.sqrt(W.shape[-1]))) 280 | m = np.ones( 281 | (W.shape[0] * n_plots + n_plots + 1, 282 | W.shape[1] * n_plots + n_plots + 1)) * 0.5 283 | for i in range(n_plots): 284 | for j in range(n_plots): 285 | this_filter = i * n_plots + j 286 | if this_filter < W.shape[-1]: 287 | m[1 + i + i * W.shape[0]:1 + i + (i + 1) * W.shape[0], 288 | 1 + j + j * W.shape[1]:1 + j + (j + 1) * W.shape[1]] = ( 289 | np.squeeze(W[:, :, :, this_filter])) 290 | return m 291 | 292 | 293 | def get_celeb_files(dst='img_align_celeba', max_images=100): 294 | """Download the first 100 images of the celeb dataset. 295 | 296 | Files will be placed in a directory 'img_align_celeba' if one 297 | doesn't exist. 298 | 299 | Returns 300 | ------- 301 | files : list of strings 302 | Locations to the first 100 images of the celeb net dataset. 303 | """ 304 | # Create a directory 305 | if not os.path.exists(dst): 306 | os.mkdir(dst) 307 | 308 | # Now perform the following 100 times: 309 | for img_i in range(1, max_images + 1): 310 | 311 | # create a string using the current loop counter 312 | f = '000%03d.jpg' % img_i 313 | 314 | if not os.path.exists(os.path.join(dst, f)): 315 | 316 | # and get the url with that string appended the end 317 | url = 'https://s3.amazonaws.com/cadl/celeb-align/' + f 318 | 319 | # We'll print this out to the console so we can see how far we've gone 320 | print(url, end='\r') 321 | 322 | # And now download the url to a location inside our new directory 323 | urllib.request.urlretrieve(url, os.path.join(dst, f)) 324 | 325 | files = [os.path.join(dst, file_i) 326 | for file_i in os.listdir(dst) 327 | if '.jpg' in file_i][:max_images] 328 | return files 329 | 330 | 331 | def get_celeb_imgs(max_images=100): 332 | """Load the first `max_images` images of the celeb dataset. 333 | 334 | Returns 335 | ------- 336 | imgs : list of np.ndarray 337 | List of the first 100 images from the celeb dataset 338 | """ 339 | return [plt.imread(f_i) for f_i in get_celeb_files(max_images=max_images)] 340 | 341 | 342 | def gauss(mean, stddev, ksize): 343 | """Use Tensorflow to compute a Gaussian Kernel. 344 | 345 | Parameters 346 | ---------- 347 | mean : float 348 | Mean of the Gaussian (e.g. 0.0). 349 | stddev : float 350 | Standard Deviation of the Gaussian (e.g. 1.0). 351 | ksize : int 352 | Size of kernel (e.g. 16). 353 | 354 | Returns 355 | ------- 356 | kernel : np.ndarray 357 | Computed Gaussian Kernel using Tensorflow. 358 | """ 359 | g = tf.Graph() 360 | with tf.Session(graph=g): 361 | x = tf.linspace(-3.0, 3.0, ksize) 362 | z = (tf.exp(tf.neg(tf.pow(x - mean, 2.0) / 363 | (2.0 * tf.pow(stddev, 2.0)))) * 364 | (1.0 / (stddev * tf.sqrt(2.0 * 3.1415)))) 365 | return z.eval() 366 | 367 | 368 | def gauss2d(mean, stddev, ksize): 369 | """Use Tensorflow to compute a 2D Gaussian Kernel. 370 | 371 | Parameters 372 | ---------- 373 | mean : float 374 | Mean of the Gaussian (e.g. 0.0). 375 | stddev : float 376 | Standard Deviation of the Gaussian (e.g. 1.0). 377 | ksize : int 378 | Size of kernel (e.g. 16). 379 | 380 | Returns 381 | ------- 382 | kernel : np.ndarray 383 | Computed 2D Gaussian Kernel using Tensorflow. 384 | """ 385 | z = gauss(mean, stddev, ksize) 386 | g = tf.Graph() 387 | with tf.Session(graph=g): 388 | z_2d = tf.matmul(tf.reshape(z, [ksize, 1]), tf.reshape(z, [1, ksize])) 389 | return z_2d.eval() 390 | 391 | 392 | def convolve(img, kernel): 393 | """Use Tensorflow to convolve a 4D image with a 4D kernel. 394 | 395 | Parameters 396 | ---------- 397 | img : np.ndarray 398 | 4-dimensional image shaped N x H x W x C 399 | kernel : np.ndarray 400 | 4-dimensional image shape K_H, K_W, C_I, C_O corresponding to the 401 | kernel's height and width, the number of input channels, and the 402 | number of output channels. Note that C_I should = C. 403 | 404 | Returns 405 | ------- 406 | result : np.ndarray 407 | Convolved result. 408 | """ 409 | g = tf.Graph() 410 | with tf.Session(graph=g): 411 | convolved = tf.nn.conv2d(img, kernel, strides=[1, 1, 1, 1], padding='SAME') 412 | res = convolved.eval() 413 | return res 414 | 415 | 416 | def gabor(ksize=32): 417 | """Use Tensorflow to compute a 2D Gabor Kernel. 418 | 419 | Parameters 420 | ---------- 421 | ksize : int, optional 422 | Size of kernel. 423 | 424 | Returns 425 | ------- 426 | gabor : np.ndarray 427 | Gabor kernel with ksize x ksize dimensions. 428 | """ 429 | g = tf.Graph() 430 | with tf.Session(graph=g): 431 | z_2d = gauss2d(0.0, 1.0, ksize) 432 | ones = tf.ones((1, ksize)) 433 | ys = tf.sin(tf.linspace(-3.0, 3.0, ksize)) 434 | ys = tf.reshape(ys, [ksize, 1]) 435 | wave = tf.matmul(ys, ones) 436 | gabor = tf.mul(wave, z_2d) 437 | return gabor.eval() 438 | 439 | 440 | def build_submission(filename, file_list, optional_file_list=()): 441 | """Helper utility to check homework assignment submissions and package them. 442 | 443 | Parameters 444 | ---------- 445 | filename : str 446 | Output zip file name 447 | file_list : tuple 448 | Tuple of files to include 449 | """ 450 | # check each file exists 451 | for part_i, file_i in enumerate(file_list): 452 | if not os.path.exists(file_i): 453 | print('\nYou are missing the file {}. '.format(file_i) + 454 | 'It does not look like you have completed Part {}.'.format( 455 | part_i + 1)) 456 | 457 | def zipdir(path, zf): 458 | for root, dirs, files in os.walk(path): 459 | for file in files: 460 | # make sure the files are part of the necessary file list 461 | if file.endswith(file_list) or file.endswith(optional_file_list): 462 | zf.write(os.path.join(root, file)) 463 | 464 | # create a zip file with the necessary files 465 | zipf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) 466 | zipdir('.', zipf) 467 | zipf.close() 468 | print('Your assignment zip file has been created!') 469 | print('Now submit the file:\n{}\nto Kadenze for grading!'.format( 470 | os.path.abspath(filename))) 471 | 472 | 473 | def normalize(a, s=0.1): 474 | '''Normalize the image range for visualization''' 475 | return np.uint8(np.clip( 476 | (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5, 477 | 0, 1) * 255) 478 | 479 | 480 | # %% 481 | def weight_variable(shape, **kwargs): 482 | '''Helper function to create a weight variable initialized with 483 | a normal distribution 484 | Parameters 485 | ---------- 486 | shape : list 487 | Size of weight variable 488 | ''' 489 | if isinstance(shape, list): 490 | initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01) 491 | initial.set_shape(shape) 492 | else: 493 | initial = tf.random_normal(shape, mean=0.0, stddev=0.01) 494 | return tf.Variable(initial, **kwargs) 495 | 496 | 497 | # %% 498 | def bias_variable(shape, **kwargs): 499 | '''Helper function to create a bias variable initialized with 500 | a constant value. 501 | Parameters 502 | ---------- 503 | shape : list 504 | Size of weight variable 505 | ''' 506 | if isinstance(shape, list): 507 | initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01) 508 | initial.set_shape(shape) 509 | else: 510 | initial = tf.random_normal(shape, mean=0.0, stddev=0.01) 511 | return tf.Variable(initial, **kwargs) 512 | 513 | 514 | def binary_cross_entropy(z, x, name=None): 515 | """Binary Cross Entropy measures cross entropy of a binary variable. 516 | 517 | loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) 518 | 519 | Parameters 520 | ---------- 521 | z : tf.Tensor 522 | A `Tensor` of the same type and shape as `x`. 523 | x : tf.Tensor 524 | A `Tensor` of type `float32` or `float64`. 525 | """ 526 | with tf.variable_scope(name or 'bce'): 527 | eps = 1e-12 528 | return (-(x * tf.log(z + eps) + 529 | (1. - x) * tf.log(1. - z + eps))) 530 | 531 | 532 | def conv2d(x, n_output, 533 | k_h=5, k_w=5, d_h=2, d_w=2, 534 | padding='SAME', name='conv2d', reuse=None): 535 | """Helper for creating a 2d convolution operation. 536 | 537 | Parameters 538 | ---------- 539 | x : tf.Tensor 540 | Input tensor to convolve. 541 | n_output : int 542 | Number of filters. 543 | k_h : int, optional 544 | Kernel height 545 | k_w : int, optional 546 | Kernel width 547 | d_h : int, optional 548 | Height stride 549 | d_w : int, optional 550 | Width stride 551 | padding : str, optional 552 | Padding type: "SAME" or "VALID" 553 | name : str, optional 554 | Variable scope 555 | 556 | Returns 557 | ------- 558 | op : tf.Tensor 559 | Output of convolution 560 | """ 561 | with tf.variable_scope(name or 'conv2d', reuse=reuse): 562 | W = tf.get_variable( 563 | name='W', 564 | shape=[k_h, k_w, x.get_shape()[-1], n_output], 565 | initializer=tf.contrib.layers.xavier_initializer_conv2d()) 566 | 567 | conv = tf.nn.conv2d( 568 | name='conv', 569 | input=x, 570 | filter=W, 571 | strides=[1, d_h, d_w, 1], 572 | padding=padding) 573 | 574 | b = tf.get_variable( 575 | name='b', 576 | shape=[n_output], 577 | initializer=tf.constant_initializer(0.0)) 578 | 579 | h = tf.nn.bias_add( 580 | name='h', 581 | value=conv, 582 | bias=b) 583 | 584 | return h, W 585 | 586 | 587 | def deconv2d(x, n_output_h, n_output_w, n_output_ch, n_input_ch=None, 588 | k_h=5, k_w=5, d_h=2, d_w=2, 589 | padding='SAME', name='deconv2d', reuse=None): 590 | """Deconvolution helper. 591 | 592 | Parameters 593 | ---------- 594 | x : tf.Tensor 595 | Input tensor to convolve. 596 | n_output_h : int 597 | Height of output 598 | n_output_w : int 599 | Width of output 600 | n_output_ch : int 601 | Number of filters. 602 | k_h : int, optional 603 | Kernel height 604 | k_w : int, optional 605 | Kernel width 606 | d_h : int, optional 607 | Height stride 608 | d_w : int, optional 609 | Width stride 610 | padding : str, optional 611 | Padding type: "SAME" or "VALID" 612 | name : str, optional 613 | Variable scope 614 | 615 | Returns 616 | ------- 617 | op : tf.Tensor 618 | Output of deconvolution 619 | """ 620 | with tf.variable_scope(name or 'deconv2d', reuse=reuse): 621 | W = tf.get_variable( 622 | name='W', 623 | shape=[k_h, k_h, n_output_ch, n_input_ch or x.get_shape()[-1]], 624 | initializer=tf.contrib.layers.xavier_initializer_conv2d()) 625 | 626 | conv = tf.nn.conv2d_transpose( 627 | name='conv_t', 628 | value=x, 629 | filter=W, 630 | output_shape=tf.pack( 631 | [tf.shape(x)[0], n_output_h, n_output_w, n_output_ch]), 632 | strides=[1, d_h, d_w, 1], 633 | padding=padding) 634 | 635 | conv.set_shape([None, n_output_h, n_output_w, n_output_ch]) 636 | 637 | b = tf.get_variable( 638 | name='b', 639 | shape=[n_output_ch], 640 | initializer=tf.constant_initializer(0.0)) 641 | 642 | h = tf.nn.bias_add(name='h', value=conv, bias=b) 643 | 644 | return h, W 645 | 646 | 647 | def lrelu(features, leak=0.2): 648 | """Leaky rectifier. 649 | 650 | Parameters 651 | ---------- 652 | features : tf.Tensor 653 | Input to apply leaky rectifier to. 654 | leak : float, optional 655 | Percentage of leak. 656 | 657 | Returns 658 | ------- 659 | op : tf.Tensor 660 | Resulting output of applying leaky rectifier activation. 661 | """ 662 | f1 = 0.5 * (1 + leak) 663 | f2 = 0.5 * (1 - leak) 664 | return f1 * features + f2 * abs(features) 665 | 666 | 667 | def linear(x, n_output, name=None, activation=None, reuse=None): 668 | """Fully connected layer. 669 | 670 | Parameters 671 | ---------- 672 | x : tf.Tensor 673 | Input tensor to connect 674 | n_output : int 675 | Number of output neurons 676 | name : None, optional 677 | Scope to apply 678 | 679 | Returns 680 | ------- 681 | h, W : tf.Tensor, tf.Tensor 682 | Output of fully connected layer and the weight matrix 683 | """ 684 | if len(x.get_shape()) != 2: 685 | x = flatten(x, reuse=reuse) 686 | 687 | n_input = x.get_shape().as_list()[1] 688 | 689 | with tf.variable_scope(name or "fc", reuse=reuse): 690 | W = tf.get_variable( 691 | name='W', 692 | shape=[n_input, n_output], 693 | dtype=tf.float32, 694 | initializer=tf.contrib.layers.xavier_initializer()) 695 | 696 | b = tf.get_variable( 697 | name='b', 698 | shape=[n_output], 699 | dtype=tf.float32, 700 | initializer=tf.constant_initializer(0.0)) 701 | 702 | h = tf.nn.bias_add( 703 | name='h', 704 | value=tf.matmul(x, W), 705 | bias=b) 706 | 707 | if activation: 708 | h = activation(h) 709 | 710 | return h, W 711 | 712 | 713 | def flatten(x, name=None, reuse=None): 714 | """Flatten Tensor to 2-dimensions. 715 | 716 | Parameters 717 | ---------- 718 | x : tf.Tensor 719 | Input tensor to flatten. 720 | name : None, optional 721 | Variable scope for flatten operations 722 | 723 | Returns 724 | ------- 725 | flattened : tf.Tensor 726 | Flattened tensor. 727 | """ 728 | with tf.variable_scope('flatten'): 729 | dims = x.get_shape().as_list() 730 | if len(dims) == 4: 731 | flattened = tf.reshape( 732 | x, 733 | shape=[-1, dims[1] * dims[2] * dims[3]]) 734 | elif len(dims) == 2 or len(dims) == 1: 735 | flattened = x 736 | else: 737 | raise ValueError('Expected n dimensions of 1, 2 or 4. Found:', 738 | len(dims)) 739 | 740 | return flattened 741 | 742 | 743 | def to_tensor(x): 744 | """Convert 2 dim Tensor to a 4 dim Tensor ready for convolution. 745 | 746 | Performs the opposite of flatten(x). If the tensor is already 4-D, this 747 | returns the same as the input, leaving it unchanged. 748 | 749 | Parameters 750 | ---------- 751 | x : tf.Tesnor 752 | Input 2-D tensor. If 4-D already, left unchanged. 753 | 754 | Returns 755 | ------- 756 | x : tf.Tensor 757 | 4-D representation of the input. 758 | 759 | Raises 760 | ------ 761 | ValueError 762 | If the tensor is not 2D or already 4D. 763 | """ 764 | if len(x.get_shape()) == 2: 765 | n_input = x.get_shape().as_list()[1] 766 | x_dim = np.sqrt(n_input) 767 | if x_dim == int(x_dim): 768 | x_dim = int(x_dim) 769 | x_tensor = tf.reshape( 770 | x, [-1, x_dim, x_dim, 1], name='reshape') 771 | elif np.sqrt(n_input / 3) == int(np.sqrt(n_input / 3)): 772 | x_dim = int(np.sqrt(n_input / 3)) 773 | x_tensor = tf.reshape( 774 | x, [-1, x_dim, x_dim, 3], name='reshape') 775 | else: 776 | x_tensor = tf.reshape( 777 | x, [-1, 1, 1, n_input], name='reshape') 778 | elif len(x.get_shape()) == 4: 779 | x_tensor = x 780 | else: 781 | raise ValueError('Unsupported input dimensions') 782 | return x_tensor 783 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/vae.py: -------------------------------------------------------------------------------- 1 | """Convolutional/Variational autoencoder, including demonstration of 2 | training such a network on MNIST, CelebNet and the film, "Sita Sings The Blues" 3 | using an image pipeline. 4 | 5 | Copyright Parag K. Mital, January 2016 6 | """ 7 | import tensorflow as tf 8 | import numpy as np 9 | import os 10 | from libs.dataset_utils import create_input_pipeline 11 | from libs.datasets import CELEB, MNIST 12 | from libs.batch_norm import batch_norm 13 | from libs import utils 14 | 15 | 16 | def VAE(input_shape=[None, 784], 17 | n_filters=[64, 64, 64], 18 | filter_sizes=[4, 4, 4], 19 | n_hidden=32, 20 | n_code=2, 21 | activation=tf.nn.tanh, 22 | dropout=False, 23 | denoising=False, 24 | convolutional=False, 25 | variational=False): 26 | """(Variational) (Convolutional) (Denoising) Autoencoder. 27 | 28 | Uses tied weights. 29 | 30 | Parameters 31 | ---------- 32 | input_shape : list, optional 33 | Shape of the input to the network. e.g. for MNIST: [None, 784]. 34 | n_filters : list, optional 35 | Number of filters for each layer. 36 | If convolutional=True, this refers to the total number of output 37 | filters to create for each layer, with each layer's number of output 38 | filters as a list. 39 | If convolutional=False, then this refers to the total number of neurons 40 | for each layer in a fully connected network. 41 | filter_sizes : list, optional 42 | Only applied when convolutional=True. This refers to the ksize (height 43 | and width) of each convolutional layer. 44 | n_hidden : int, optional 45 | Only applied when variational=True. This refers to the first fully 46 | connected layer prior to the variational embedding, directly after 47 | the encoding. After the variational embedding, another fully connected 48 | layer is created with the same size prior to decoding. Set to 0 to 49 | not use an additional hidden layer. 50 | n_code : int, optional 51 | Only applied when variational=True. This refers to the number of 52 | latent Gaussians to sample for creating the inner most encoding. 53 | activation : function, optional 54 | Activation function to apply to each layer, e.g. tf.nn.relu 55 | dropout : bool, optional 56 | Whether or not to apply dropout. If using dropout, you must feed a 57 | value for 'keep_prob', as returned in the dictionary. 1.0 means no 58 | dropout is used. 0.0 means every connection is dropped. Sensible 59 | values are between 0.5-0.8. 60 | denoising : bool, optional 61 | Whether or not to apply denoising. If using denoising, you must feed a 62 | value for 'corrupt_prob', as returned in the dictionary. 1.0 means no 63 | corruption is used. 0.0 means every feature is corrupted. Sensible 64 | values are between 0.5-0.8. 65 | convolutional : bool, optional 66 | Whether or not to use a convolutional network or else a fully connected 67 | network will be created. This effects the n_filters parameter's 68 | meaning. 69 | variational : bool, optional 70 | Whether or not to create a variational embedding layer. This will 71 | create a fully connected layer after the encoding, if `n_hidden` is 72 | greater than 0, then will create a multivariate gaussian sampling 73 | layer, then another fully connected layer. The size of the fully 74 | connected layers are determined by `n_hidden`, and the size of the 75 | sampling layer is determined by `n_code`. 76 | 77 | Returns 78 | ------- 79 | model : dict 80 | { 81 | 'cost': Tensor to optimize. 82 | 'Ws': All weights of the encoder. 83 | 'x': Input Placeholder 84 | 'z': Inner most encoding Tensor (latent features) 85 | 'y': Reconstruction of the Decoder 86 | 'keep_prob': Amount to keep when using Dropout 87 | 'corrupt_prob': Amount to corrupt when using Denoising 88 | 'train': Set to True when training/Applies to Batch Normalization. 89 | } 90 | """ 91 | # network input / placeholders for train (bn) and dropout 92 | x = tf.placeholder(tf.float32, input_shape, 'x') 93 | phase_train = tf.placeholder(tf.bool, name='phase_train') 94 | keep_prob = tf.placeholder(tf.float32, name='keep_prob') 95 | corrupt_prob = tf.placeholder(tf.float32, [1]) 96 | 97 | if denoising: 98 | current_input = utils.corrupt(x) * corrupt_prob + x * (1 - corrupt_prob) 99 | 100 | # 2d -> 4d if convolution 101 | x_tensor = utils.to_tensor(x) if convolutional else x 102 | current_input = x_tensor 103 | 104 | Ws = [] 105 | shapes = [] 106 | 107 | # Build the encoder 108 | for layer_i, n_output in enumerate(n_filters): 109 | with tf.variable_scope('encoder/{}'.format(layer_i)): 110 | shapes.append(current_input.get_shape().as_list()) 111 | if convolutional: 112 | h, W = utils.conv2d(x=current_input, 113 | n_output=n_output, 114 | k_h=filter_sizes[layer_i], 115 | k_w=filter_sizes[layer_i]) 116 | else: 117 | h, W = utils.linear(x=current_input, 118 | n_output=n_output) 119 | h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i))) 120 | if dropout: 121 | h = tf.nn.dropout(h, keep_prob) 122 | Ws.append(W) 123 | current_input = h 124 | 125 | shapes.append(current_input.get_shape().as_list()) 126 | 127 | with tf.variable_scope('variational'): 128 | if variational: 129 | dims = current_input.get_shape().as_list() 130 | flattened = utils.flatten(current_input) 131 | 132 | if n_hidden: 133 | h = utils.linear(flattened, n_hidden, name='W_fc')[0] 134 | h = activation(batch_norm(h, phase_train, 'fc/bn')) 135 | if dropout: 136 | h = tf.nn.dropout(h, keep_prob) 137 | else: 138 | h = flattened 139 | 140 | z_mu = utils.linear(h, n_code, name='mu')[0] 141 | z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0] 142 | 143 | # Sample from noise distribution p(eps) ~ N(0, 1) 144 | epsilon = tf.random_normal( 145 | tf.pack([tf.shape(x)[0], n_code])) 146 | 147 | # Sample from posterior 148 | z = z_mu + tf.mul(epsilon, tf.exp(z_log_sigma)) 149 | 150 | if n_hidden: 151 | h = utils.linear(z, n_hidden, name='fc_t')[0] 152 | h = activation(batch_norm(h, phase_train, 'fc_t/bn')) 153 | if dropout: 154 | h = tf.nn.dropout(h, keep_prob) 155 | else: 156 | h = z 157 | 158 | size = dims[1] * dims[2] * dims[3] if convolutional else dims[1] 159 | h = utils.linear(h, size, name='fc_t2')[0] 160 | current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn')) 161 | if dropout: 162 | current_input = tf.nn.dropout(current_input, keep_prob) 163 | 164 | if convolutional: 165 | current_input = tf.reshape( 166 | current_input, tf.pack([ 167 | tf.shape(current_input)[0], 168 | dims[1], 169 | dims[2], 170 | dims[3]])) 171 | else: 172 | z = current_input 173 | 174 | shapes.reverse() 175 | n_filters.reverse() 176 | Ws.reverse() 177 | 178 | n_filters += [input_shape[-1]] 179 | 180 | # %% 181 | # Decoding layers 182 | for layer_i, n_output in enumerate(n_filters[1:]): 183 | with tf.variable_scope('decoder/{}'.format(layer_i)): 184 | shape = shapes[layer_i + 1] 185 | if convolutional: 186 | h, W = utils.deconv2d(x=current_input, 187 | n_output_h=shape[1], 188 | n_output_w=shape[2], 189 | n_output_ch=shape[3], 190 | n_input_ch=shapes[layer_i][3], 191 | k_h=filter_sizes[layer_i], 192 | k_w=filter_sizes[layer_i]) 193 | else: 194 | h, W = utils.linear(x=current_input, 195 | n_output=n_output) 196 | h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i))) 197 | if dropout: 198 | h = tf.nn.dropout(h, keep_prob) 199 | current_input = h 200 | 201 | y = current_input 202 | x_flat = utils.flatten(x) 203 | y_flat = utils.flatten(y) 204 | 205 | # l2 loss 206 | loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1) 207 | 208 | if variational: 209 | # variational lower bound, kl-divergence 210 | loss_z = -0.5 * tf.reduce_sum( 211 | 1.0 + 2.0 * z_log_sigma - 212 | tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1) 213 | 214 | # add l2 loss 215 | cost = tf.reduce_mean(loss_x + loss_z) 216 | else: 217 | # just optimize l2 loss 218 | cost = tf.reduce_mean(loss_x) 219 | 220 | return {'cost': cost, 'Ws': Ws, 221 | 'x': x, 'z': z, 'y': y, 222 | 'keep_prob': keep_prob, 223 | 'corrupt_prob': corrupt_prob, 224 | 'train': phase_train} 225 | 226 | 227 | def train_vae(files, 228 | input_shape, 229 | learning_rate=0.0001, 230 | batch_size=100, 231 | n_epochs=50, 232 | n_examples=10, 233 | crop_shape=[64, 64, 3], 234 | crop_factor=0.8, 235 | n_filters=[100, 100, 100, 100], 236 | n_hidden=256, 237 | n_code=50, 238 | convolutional=True, 239 | variational=True, 240 | filter_sizes=[3, 3, 3, 3], 241 | dropout=True, 242 | keep_prob=0.8, 243 | activation=tf.nn.relu, 244 | img_step=100, 245 | save_step=100, 246 | ckpt_name="vae.ckpt"): 247 | """General purpose training of a (Variational) (Convolutional) Autoencoder. 248 | 249 | Supply a list of file paths to images, and this will do everything else. 250 | 251 | Parameters 252 | ---------- 253 | files : list of strings 254 | List of paths to images. 255 | input_shape : list 256 | Must define what the input image's shape is. 257 | learning_rate : float, optional 258 | Learning rate. 259 | batch_size : int, optional 260 | Batch size. 261 | n_epochs : int, optional 262 | Number of epochs. 263 | n_examples : int, optional 264 | Number of example to use while demonstrating the current training 265 | iteration's reconstruction. Creates a square montage, so make 266 | sure int(sqrt(n_examples))**2 = n_examples, e.g. 16, 25, 36, ... 100. 267 | crop_shape : list, optional 268 | Size to centrally crop the image to. 269 | crop_factor : float, optional 270 | Resize factor to apply before cropping. 271 | n_filters : list, optional 272 | Same as VAE's n_filters. 273 | n_hidden : int, optional 274 | Same as VAE's n_hidden. 275 | n_code : int, optional 276 | Same as VAE's n_code. 277 | convolutional : bool, optional 278 | Use convolution or not. 279 | variational : bool, optional 280 | Use variational layer or not. 281 | filter_sizes : list, optional 282 | Same as VAE's filter_sizes. 283 | dropout : bool, optional 284 | Use dropout or not 285 | keep_prob : float, optional 286 | Percent of keep for dropout. 287 | activation : function, optional 288 | Which activation function to use. 289 | img_step : int, optional 290 | How often to save training images showing the manifold and 291 | reconstruction. 292 | save_step : int, optional 293 | How often to save checkpoints. 294 | ckpt_name : str, optional 295 | Checkpoints will be named as this, e.g. 'model.ckpt' 296 | """ 297 | batch = create_input_pipeline( 298 | files=files, 299 | batch_size=batch_size, 300 | n_epochs=n_epochs, 301 | crop_shape=crop_shape, 302 | crop_factor=crop_factor, 303 | shape=input_shape) 304 | 305 | ae = VAE(input_shape=[None] + crop_shape, 306 | convolutional=convolutional, 307 | variational=variational, 308 | n_filters=n_filters, 309 | n_hidden=n_hidden, 310 | n_code=n_code, 311 | dropout=dropout, 312 | filter_sizes=filter_sizes, 313 | activation=activation) 314 | 315 | # Create a manifold of our inner most layer to show 316 | # example reconstructions. This is one way to see 317 | # what the "embedding" or "latent space" of the encoder 318 | # is capable of encoding, though note that this is just 319 | # a random hyperplane within the latent space, and does not 320 | # encompass all possible embeddings. 321 | zs = np.random.uniform( 322 | -1.0, 1.0, [4, n_code]).astype(np.float32) 323 | zs = utils.make_latent_manifold(zs, n_examples) 324 | 325 | optimizer = tf.train.AdamOptimizer( 326 | learning_rate=learning_rate).minimize(ae['cost']) 327 | 328 | # We create a session to use the graph 329 | sess = tf.Session() 330 | saver = tf.train.Saver() 331 | sess.run(tf.initialize_all_variables()) 332 | 333 | # This will handle our threaded image pipeline 334 | coord = tf.train.Coordinator() 335 | 336 | # Ensure no more changes to graph 337 | tf.get_default_graph().finalize() 338 | 339 | # Start up the queues for handling the image pipeline 340 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 341 | 342 | if os.path.exists(ckpt_name): 343 | saver.restore(sess, ckpt_name) 344 | 345 | # Fit all training data 346 | t_i = 0 347 | batch_i = 0 348 | epoch_i = 0 349 | cost = 0 350 | n_files = len(files) 351 | test_xs = sess.run(batch) / 255.0 352 | utils.montage(test_xs, 'test_xs.png') 353 | try: 354 | while not coord.should_stop() and epoch_i < n_epochs: 355 | batch_i += 1 356 | batch_xs = sess.run(batch) / 255.0 357 | train_cost = sess.run([ae['cost'], optimizer], feed_dict={ 358 | ae['x']: batch_xs, ae['train']: True, 359 | ae['keep_prob']: keep_prob})[0] 360 | print(batch_i, train_cost) 361 | cost += train_cost 362 | if batch_i % n_files == 0: 363 | print('epoch:', epoch_i) 364 | print('average cost:', cost / batch_i) 365 | cost = 0 366 | batch_i = 0 367 | epoch_i += 1 368 | 369 | if batch_i % img_step == 0: 370 | # Plot example reconstructions from latent layer 371 | recon = sess.run( 372 | ae['y'], feed_dict={ 373 | ae['z']: zs, 374 | ae['train']: False, 375 | ae['keep_prob']: 1.0}) 376 | utils.montage(recon.reshape([-1] + crop_shape), 377 | 'manifold_%08d.png' % t_i) 378 | 379 | # Plot example reconstructions 380 | recon = sess.run( 381 | ae['y'], feed_dict={ae['x']: test_xs, 382 | ae['train']: False, 383 | ae['keep_prob']: 1.0}) 384 | print('reconstruction (min, max, mean):', 385 | recon.min(), recon.max(), recon.mean()) 386 | utils.montage(recon.reshape([-1] + crop_shape), 387 | 'reconstruction_%08d.png' % t_i) 388 | t_i += 1 389 | 390 | if batch_i % save_step == 0: 391 | # Save the variables to disk. 392 | saver.save(sess, "./" + ckpt_name, 393 | global_step=batch_i, 394 | write_meta_graph=False) 395 | except tf.errors.OutOfRangeError: 396 | print('Done.') 397 | finally: 398 | # One of the threads has issued an exception. So let's tell all the 399 | # threads to shutdown. 400 | coord.request_stop() 401 | 402 | # Wait until all threads have finished. 403 | coord.join(threads) 404 | 405 | # Clean up the session. 406 | sess.close() 407 | 408 | 409 | # %% 410 | def test_mnist(): 411 | """Train an autoencoder on MNIST. 412 | 413 | This function will train an autoencoder on MNIST and also 414 | save many image files during the training process, demonstrating 415 | the latent space of the inner most dimension of the encoder, 416 | as well as reconstructions of the decoder. 417 | """ 418 | 419 | # load MNIST 420 | n_code = 2 421 | mnist = MNIST(split=[0.8, 0.1, 0.1]) 422 | ae = VAE(input_shape=[None, 784], n_filters=[512, 256], 423 | n_hidden=64, n_code=n_code, activation=tf.nn.sigmoid, 424 | convolutional=False, variational=True) 425 | 426 | n_examples = 100 427 | zs = np.random.uniform( 428 | -1.0, 1.0, [4, n_code]).astype(np.float32) 429 | zs = utils.make_latent_manifold(zs, n_examples) 430 | 431 | learning_rate = 0.02 432 | optimizer = tf.train.AdamOptimizer( 433 | learning_rate=learning_rate).minimize(ae['cost']) 434 | 435 | # We create a session to use the graph 436 | sess = tf.Session() 437 | sess.run(tf.initialize_all_variables()) 438 | 439 | # Fit all training data 440 | t_i = 0 441 | batch_i = 0 442 | batch_size = 200 443 | n_epochs = 10 444 | test_xs = mnist.test.images[:n_examples] 445 | utils.montage(test_xs.reshape((-1, 28, 28)), 'test_xs.png') 446 | for epoch_i in range(n_epochs): 447 | train_i = 0 448 | train_cost = 0 449 | for batch_xs, _ in mnist.train.next_batch(batch_size): 450 | train_cost += sess.run([ae['cost'], optimizer], feed_dict={ 451 | ae['x']: batch_xs, ae['train']: True, ae['keep_prob']: 1.0})[0] 452 | train_i += 1 453 | if batch_i % 10 == 0: 454 | # Plot example reconstructions from latent layer 455 | recon = sess.run( 456 | ae['y'], feed_dict={ 457 | ae['z']: zs, 458 | ae['train']: False, 459 | ae['keep_prob']: 1.0}) 460 | m = utils.montage(recon.reshape((-1, 28, 28)), 461 | 'manifold_%08d.png' % t_i) 462 | # Plot example reconstructions 463 | recon = sess.run( 464 | ae['y'], feed_dict={ae['x']: test_xs, 465 | ae['train']: False, 466 | ae['keep_prob']: 1.0}) 467 | m = utils.montage(recon.reshape( 468 | (-1, 28, 28)), 'reconstruction_%08d.png' % t_i) 469 | t_i += 1 470 | batch_i += 1 471 | 472 | valid_i = 0 473 | valid_cost = 0 474 | for batch_xs, _ in mnist.valid.next_batch(batch_size): 475 | valid_cost += sess.run([ae['cost']], feed_dict={ 476 | ae['x']: batch_xs, ae['train']: False, ae['keep_prob']: 1.0})[0] 477 | valid_i += 1 478 | print('train:', train_cost / train_i, 'valid:', valid_cost / valid_i) 479 | 480 | 481 | def test_celeb(): 482 | """Train an autoencoder on Celeb Net. 483 | """ 484 | files = CELEB() 485 | train_vae( 486 | files=files, 487 | input_shape=[218, 178, 3], 488 | batch_size=100, 489 | n_epochs=50, 490 | crop_shape=[64, 64, 3], 491 | crop_factor=0.8, 492 | convolutional=True, 493 | variational=True, 494 | n_filters=[100, 100, 100], 495 | n_hidden=250, 496 | n_code=100, 497 | dropout=True, 498 | filter_sizes=[3, 3, 3], 499 | activation=tf.nn.sigmoid, 500 | ckpt_name='celeb.ckpt') 501 | 502 | 503 | def test_sita(): 504 | """Train an autoencoder on Sita Sings The Blues. 505 | """ 506 | if not os.path.exists('sita'): 507 | os.system('wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi') 508 | os.mkdir('sita') 509 | os.system('ffmpeg -i Sita_Sings_the_Blues_640x360_XviD.avi -r 60 -f' + 510 | ' image2 -s 160x90 sita/sita-%08d.jpg') 511 | files = [os.path.join('sita', f) for f in os.listdir('sita')] 512 | 513 | train_vae( 514 | files=files, 515 | input_shape=[90, 160, 3], 516 | batch_size=100, 517 | n_epochs=50, 518 | crop_shape=[90, 160, 3], 519 | crop_factor=1.0, 520 | convolutional=True, 521 | variational=True, 522 | n_filters=[100, 100, 100], 523 | n_hidden=250, 524 | n_code=100, 525 | dropout=True, 526 | filter_sizes=[3, 3, 3], 527 | activation=tf.nn.sigmoid, 528 | ckpt_name='sita.ckpt') 529 | 530 | 531 | if __name__ == '__main__': 532 | test_celeb() 533 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/vaegan.py: -------------------------------------------------------------------------------- 1 | """Convolutional/Variational autoencoder, including demonstration of 2 | training such a network on MNIST, CelebNet and the film, "Sita Sings The Blues" 3 | using an image pipeline. 4 | 5 | Parag K. Mital, Jan 2016 6 | """ 7 | import tensorflow as tf 8 | import numpy as np 9 | import os 10 | from libs.dataset_utils import create_input_pipeline 11 | from libs.datasets import CELEB 12 | from libs.utils import * 13 | 14 | 15 | def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], 16 | convolutional=False, activation=tf.nn.relu, 17 | output_activation=tf.nn.sigmoid): 18 | """Summary 19 | 20 | Parameters 21 | ---------- 22 | x : TYPE 23 | Description 24 | n_hidden : None, optional 25 | Description 26 | dimensions : list, optional 27 | Description 28 | filter_sizes : list, optional 29 | Description 30 | convolutional : bool, optional 31 | Description 32 | activation : TYPE, optional 33 | Description 34 | output_activation : TYPE, optional 35 | Description 36 | 37 | Returns 38 | ------- 39 | name : TYPE 40 | Description 41 | """ 42 | if convolutional: 43 | x_tensor = to_tensor(x) 44 | else: 45 | x_tensor = tf.reshape( 46 | tensor=x, 47 | shape=[-1, dimensions[0]]) 48 | dimensions = dimensions[1:] 49 | current_input = x_tensor 50 | 51 | Ws = [] 52 | hs = [] 53 | shapes = [] 54 | for layer_i, n_output in enumerate(dimensions): 55 | with tf.variable_scope(str(layer_i)): 56 | shapes.append(current_input.get_shape().as_list()) 57 | if convolutional: 58 | h, W = conv2d( 59 | x=current_input, 60 | n_output=n_output, 61 | k_h=filter_sizes[layer_i], 62 | k_w=filter_sizes[layer_i], 63 | padding='SAME') 64 | else: 65 | h, W = linear( 66 | x=current_input, 67 | n_output=n_output) 68 | h = activation(h) 69 | Ws.append(W) 70 | hs.append(h) 71 | 72 | current_input = h 73 | 74 | shapes.append(h.get_shape().as_list()) 75 | 76 | with tf.variable_scope('flatten'): 77 | flattened = flatten(current_input) 78 | 79 | with tf.variable_scope('hidden'): 80 | if n_hidden: 81 | h, W = linear(flattened, n_hidden, name='linear') 82 | h = activation(h) 83 | else: 84 | h = flattened 85 | 86 | return {'z': h, 'Ws': Ws, 'hs': hs, 'shapes': shapes} 87 | 88 | 89 | def decoder(z, shapes, n_hidden=None, 90 | dimensions=[], filter_sizes=[], 91 | convolutional=False, activation=tf.nn.relu, 92 | output_activation=tf.nn.relu): 93 | """Summary 94 | 95 | Parameters 96 | ---------- 97 | z : TYPE 98 | Description 99 | shapes : TYPE 100 | Description 101 | n_hidden : None, optional 102 | Description 103 | dimensions : list, optional 104 | Description 105 | filter_sizes : list, optional 106 | Description 107 | convolutional : bool, optional 108 | Description 109 | activation : TYPE, optional 110 | Description 111 | output_activation : TYPE, optional 112 | Description 113 | 114 | Returns 115 | ------- 116 | name : TYPE 117 | Description 118 | """ 119 | with tf.variable_scope('hidden/1'): 120 | if n_hidden: 121 | h = linear(z, n_hidden, name='linear')[0] 122 | h = activation(h) 123 | else: 124 | h = z 125 | 126 | with tf.variable_scope('hidden/2'): 127 | dims = shapes[0] 128 | size = dims[1] * dims[2] * dims[3] if convolutional else dims[1] 129 | h = linear(h, size, name='linear')[0] 130 | current_input = activation(h) 131 | if convolutional: 132 | current_input = tf.reshape( 133 | current_input, 134 | tf.pack([tf.shape(current_input)[0], dims[1], dims[2], dims[3]])) 135 | 136 | Ws = [] 137 | hs = [] 138 | for layer_i, n_output in enumerate(dimensions[1:]): 139 | with tf.variable_scope('decoder/{}'.format(layer_i)): 140 | if convolutional: 141 | shape = shapes[layer_i + 1] 142 | h, W = deconv2d(x=current_input, 143 | n_output_h=shape[1], 144 | n_output_w=shape[2], 145 | n_output_ch=shape[3], 146 | n_input_ch=shapes[layer_i][3], 147 | k_h=filter_sizes[layer_i], 148 | k_w=filter_sizes[layer_i]) 149 | else: 150 | h, W = linear(x=current_input, 151 | n_output=n_output) 152 | if (layer_i + 1) < len(dimensions): 153 | h = activation(h) 154 | else: 155 | h = output_activation(h) 156 | Ws.append(W) 157 | hs.append(h) 158 | current_input = h 159 | 160 | z = tf.identity(current_input, name="x_tilde") 161 | return {'x_tilde': current_input, 'Ws': Ws, 'hs': hs} 162 | 163 | 164 | def variational_bayes(h, n_code): 165 | """Summary 166 | 167 | Parameters 168 | ---------- 169 | h : TYPE 170 | Description 171 | n_code : TYPE 172 | Description 173 | 174 | Returns 175 | ------- 176 | name : TYPE 177 | Description 178 | """ 179 | z_mu = tf.nn.tanh(linear(h, n_code, name='mu')[0]) 180 | z_log_sigma = 0.5 * tf.nn.tanh(linear(h, n_code, name='log_sigma')[0]) 181 | 182 | # Sample from noise distribution p(eps) ~ N(0, 1) 183 | epsilon = tf.random_normal(tf.pack([tf.shape(h)[0], n_code])) 184 | 185 | # Sample from posterior 186 | z = tf.add(z_mu, tf.mul(epsilon, tf.exp(z_log_sigma)), name='z') 187 | # -log(p(z)/q(z|x)), bits by coding. 188 | # variational bound coding costs kl(p(z|x)||q(z|x)) 189 | # d_kl(q(z|x)||p(z)) 190 | loss_z = -0.5 * tf.reduce_sum( 191 | 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 192 | 1) 193 | return z, z_mu, z_log_sigma, loss_z 194 | 195 | 196 | def discriminator(x, convolutional=True, 197 | filter_sizes=[5, 5, 5, 5], 198 | activation=tf.nn.relu, 199 | n_filters=[100, 100, 100, 100]): 200 | """Summary 201 | 202 | Parameters 203 | ---------- 204 | x : TYPE 205 | Description 206 | convolutional : bool, optional 207 | Description 208 | filter_sizes : list, optional 209 | Description 210 | n_filters : list, optional 211 | Description 212 | 213 | Returns 214 | ------- 215 | name : TYPE 216 | Description 217 | """ 218 | encoding = encoder(x=x, 219 | convolutional=convolutional, 220 | dimensions=n_filters, 221 | filter_sizes=filter_sizes, 222 | activation=activation) 223 | 224 | # flatten, then linear to 1 value 225 | res = flatten(encoding['z'], name='flatten') 226 | if res.get_shape().as_list()[-1] > 1: 227 | res = linear(res, 1)[0] 228 | 229 | return {'logits': res, 'probs': tf.nn.sigmoid(res), 230 | 'Ws': encoding['Ws'], 'hs': encoding['hs']} 231 | 232 | 233 | def VAE(input_shape=[None, 784], 234 | n_filters=[64, 64, 64], 235 | filter_sizes=[4, 4, 4], 236 | n_hidden=32, 237 | n_code=2, 238 | activation=tf.nn.tanh, 239 | convolutional=False, 240 | variational=False): 241 | """Summary 242 | 243 | Parameters 244 | ---------- 245 | input_shape : list, optional 246 | Description 247 | n_filters : list, optional 248 | Description 249 | filter_sizes : list, optional 250 | Description 251 | n_hidden : int, optional 252 | Description 253 | n_code : int, optional 254 | Description 255 | activation : TYPE, optional 256 | Description 257 | convolutional : bool, optional 258 | Description 259 | variational : bool, optional 260 | Description 261 | 262 | Returns 263 | ------- 264 | name : TYPE 265 | Description 266 | """ 267 | # network input / placeholders for train (bn) 268 | x = tf.placeholder(tf.float32, input_shape, 'x') 269 | 270 | with tf.variable_scope('encoder'): 271 | encoding = encoder(x=x, 272 | n_hidden=n_hidden, 273 | convolutional=convolutional, 274 | dimensions=n_filters, 275 | filter_sizes=filter_sizes, 276 | activation=activation) 277 | 278 | if variational: 279 | with tf.variable_scope('variational'): 280 | z, z_mu, z_log_sigma, loss_z = variational_bayes( 281 | h=encoding['z'], n_code=n_code) 282 | else: 283 | z = encoding['z'] 284 | loss_z = None 285 | 286 | shapes = encoding['shapes'].copy() 287 | shapes.reverse() 288 | n_filters = n_filters.copy() 289 | n_filters.reverse() 290 | n_filters += [input_shape[-1]] 291 | 292 | with tf.variable_scope('generator'): 293 | decoding = decoder(z=z, 294 | shapes=shapes, 295 | n_hidden=n_hidden, 296 | dimensions=n_filters, 297 | filter_sizes=filter_sizes, 298 | convolutional=convolutional, 299 | activation=activation) 300 | 301 | x_tilde = decoding['x_tilde'] 302 | x_flat = flatten(x) 303 | x_tilde_flat = flatten(x_tilde) 304 | 305 | # -log(p(x|z)) 306 | loss_x = tf.reduce_sum(tf.squared_difference(x_flat, x_tilde_flat), 1) 307 | return {'loss_x': loss_x, 'loss_z': loss_z, 'x': x, 'z': z, 308 | 'Ws': encoding['Ws'], 'hs': decoding['hs'], 309 | 'x_tilde': x_tilde} 310 | 311 | 312 | def VAEGAN(input_shape=[None, 784], 313 | n_filters=[64, 64, 64], 314 | filter_sizes=[4, 4, 4], 315 | n_hidden=32, 316 | n_code=2, 317 | activation=tf.nn.tanh, 318 | convolutional=False, 319 | variational=False): 320 | """Summary 321 | 322 | Parameters 323 | ---------- 324 | input_shape : list, optional 325 | Description 326 | n_filters : list, optional 327 | Description 328 | filter_sizes : list, optional 329 | Description 330 | n_hidden : int, optional 331 | Description 332 | n_code : int, optional 333 | Description 334 | activation : TYPE, optional 335 | Description 336 | convolutional : bool, optional 337 | Description 338 | variational : bool, optional 339 | Description 340 | 341 | Returns 342 | ------- 343 | name : TYPE 344 | Description 345 | """ 346 | # network input / placeholders for train (bn) 347 | x = tf.placeholder(tf.float32, input_shape, 'x') 348 | z_samp = tf.placeholder(tf.float32, [None, n_code], 'z_samp') 349 | 350 | with tf.variable_scope('encoder'): 351 | encoding = encoder(x=x, 352 | n_hidden=n_hidden, 353 | convolutional=convolutional, 354 | dimensions=n_filters, 355 | filter_sizes=filter_sizes, 356 | activation=activation) 357 | 358 | with tf.variable_scope('variational'): 359 | z, z_mu, z_log_sigma, loss_z = variational_bayes( 360 | h=encoding['z'], n_code=n_code) 361 | 362 | shapes = encoding['shapes'].copy() 363 | shapes.reverse() 364 | n_filters_decoder = n_filters.copy() 365 | n_filters_decoder.reverse() 366 | n_filters_decoder += [input_shape[-1]] 367 | 368 | with tf.variable_scope('generator'): 369 | decoding_actual = decoder(z=z, 370 | shapes=shapes, 371 | n_hidden=n_hidden, 372 | convolutional=convolutional, 373 | dimensions=n_filters_decoder, 374 | filter_sizes=filter_sizes, 375 | activation=activation) 376 | 377 | with tf.variable_scope('generator', reuse=True): 378 | decoding_sampled = decoder(z=z_samp, 379 | shapes=shapes, 380 | n_hidden=n_hidden, 381 | convolutional=convolutional, 382 | dimensions=n_filters_decoder, 383 | filter_sizes=filter_sizes, 384 | activation=activation) 385 | 386 | with tf.variable_scope('discriminator'): 387 | D_real = discriminator(x, 388 | filter_sizes=filter_sizes, 389 | n_filters=n_filters, 390 | activation=activation) 391 | 392 | with tf.variable_scope('discriminator', reuse=True): 393 | D_fake = discriminator(decoding_actual['x_tilde'], 394 | filter_sizes=filter_sizes, 395 | n_filters=n_filters, 396 | activation=activation) 397 | 398 | with tf.variable_scope('discriminator', reuse=True): 399 | D_samp = discriminator(decoding_sampled['x_tilde'], 400 | filter_sizes=filter_sizes, 401 | n_filters=n_filters, 402 | activation=activation) 403 | 404 | with tf.variable_scope('loss'): 405 | # Weights influence of content/style of decoder 406 | gamma = tf.placeholder(tf.float32, name='gamma') 407 | 408 | # Discriminator_l Log Likelihood Loss 409 | loss_D_llike = 0 410 | for h_fake, h_real in zip(D_fake['hs'][3:], D_real['hs'][3:]): 411 | loss_D_llike += tf.reduce_sum( 412 | 0.5 * tf.squared_difference( 413 | flatten(h_fake), flatten(h_real)), 1) 414 | 415 | # GAN Loss 416 | eps = 1e-12 417 | loss_real = tf.reduce_sum(tf.log(D_real['probs'] + eps), 1) 418 | loss_fake = tf.reduce_sum(tf.log(1 - D_fake['probs'] + eps), 1) 419 | loss_samp = tf.reduce_sum(tf.log(1 - D_samp['probs'] + eps), 1) 420 | 421 | loss_GAN = (loss_real + loss_fake + loss_samp) / 3.0 422 | 423 | loss_enc = tf.reduce_mean(loss_z + loss_D_llike) 424 | loss_gen = tf.reduce_mean(gamma * loss_D_llike - loss_GAN) 425 | loss_dis = -tf.reduce_mean(loss_GAN) 426 | 427 | return {'x': x, 'z': z, 'x_tilde': decoding_actual['x_tilde'], 428 | 'z_samp': z_samp, 'x_tilde_samp': decoding_sampled['x_tilde'], 429 | 'loss_real': loss_real, 'loss_fake': loss_fake, 'loss_samp': loss_samp, 430 | 'loss_GAN': loss_GAN, 'loss_D_llike': loss_D_llike, 431 | 'loss_enc': loss_enc, 'loss_gen': loss_gen, 'loss_dis': loss_dis, 432 | 'gamma': gamma} 433 | 434 | 435 | def train_vaegan(files, 436 | learning_rate=0.00001, 437 | batch_size=64, 438 | n_epochs=250, 439 | n_examples=10, 440 | input_shape=[218, 178, 3], 441 | crop_shape=[64, 64, 3], 442 | crop_factor=0.8, 443 | n_filters=[100, 100, 100, 100], 444 | n_hidden=None, 445 | n_code=128, 446 | convolutional=True, 447 | variational=True, 448 | filter_sizes=[3, 3, 3, 3], 449 | activation=tf.nn.elu, 450 | ckpt_name="vaegan.ckpt"): 451 | """Summary 452 | 453 | Parameters 454 | ---------- 455 | files : TYPE 456 | Description 457 | learning_rate : float, optional 458 | Description 459 | batch_size : int, optional 460 | Description 461 | n_epochs : int, optional 462 | Description 463 | n_examples : int, optional 464 | Description 465 | input_shape : list, optional 466 | Description 467 | crop_shape : list, optional 468 | Description 469 | crop_factor : float, optional 470 | Description 471 | n_filters : list, optional 472 | Description 473 | n_hidden : int, optional 474 | Description 475 | n_code : int, optional 476 | Description 477 | convolutional : bool, optional 478 | Description 479 | variational : bool, optional 480 | Description 481 | filter_sizes : list, optional 482 | Description 483 | activation : TYPE, optional 484 | Description 485 | ckpt_name : str, optional 486 | Description 487 | 488 | Returns 489 | ------- 490 | name : TYPE 491 | Description 492 | """ 493 | 494 | ae = VAEGAN(input_shape=[None] + crop_shape, 495 | convolutional=convolutional, 496 | variational=variational, 497 | n_filters=n_filters, 498 | n_hidden=n_hidden, 499 | n_code=n_code, 500 | filter_sizes=filter_sizes, 501 | activation=activation) 502 | 503 | batch = create_input_pipeline( 504 | files=files, 505 | batch_size=batch_size, 506 | n_epochs=n_epochs, 507 | crop_shape=crop_shape, 508 | crop_factor=crop_factor, 509 | shape=input_shape) 510 | 511 | zs = np.random.randn(4, n_code).astype(np.float32) 512 | zs = make_latent_manifold(zs, n_examples) 513 | 514 | opt_enc = tf.train.AdamOptimizer( 515 | learning_rate=learning_rate).minimize( 516 | ae['loss_enc'], 517 | var_list=[var_i for var_i in tf.trainable_variables() 518 | if var_i.name.startswith('encoder')]) 519 | 520 | opt_gen = tf.train.AdamOptimizer( 521 | learning_rate=learning_rate).minimize( 522 | ae['loss_gen'], 523 | var_list=[var_i for var_i in tf.trainable_variables() 524 | if var_i.name.startswith('generator')]) 525 | 526 | opt_dis = tf.train.AdamOptimizer( 527 | learning_rate=learning_rate).minimize( 528 | ae['loss_dis'], 529 | var_list=[var_i for var_i in tf.trainable_variables() 530 | if var_i.name.startswith('discriminator')]) 531 | 532 | sess = tf.Session() 533 | saver = tf.train.Saver() 534 | sess.run(tf.initialize_all_variables()) 535 | coord = tf.train.Coordinator() 536 | tf.get_default_graph().finalize() 537 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 538 | 539 | if os.path.exists(ckpt_name): 540 | saver.restore(sess, ckpt_name) 541 | print("VAE model restored.") 542 | 543 | t_i = 0 544 | batch_i = 0 545 | epoch_i = 0 546 | 547 | equilibrium = 0.693 548 | margin = 0.4 549 | 550 | n_files = len(files) 551 | test_xs = sess.run(batch) / 255.0 552 | montage(test_xs, 'test_xs.png') 553 | try: 554 | while not coord.should_stop() or epoch_i < n_epochs: 555 | if batch_i % (n_files // batch_size) == 0: 556 | batch_i = 0 557 | epoch_i += 1 558 | print('---------- EPOCH:', epoch_i) 559 | 560 | batch_i += 1 561 | batch_xs = sess.run(batch) / 255.0 562 | batch_zs = np.random.randn(batch_size, n_code).astype(np.float32) 563 | real_cost, fake_cost, _ = sess.run([ 564 | ae['loss_real'], ae['loss_fake'], opt_enc], 565 | feed_dict={ 566 | ae['x']: batch_xs, 567 | ae['gamma']: 0.5}) 568 | real_cost = -np.mean(real_cost) 569 | fake_cost = -np.mean(fake_cost) 570 | print('real:', real_cost, '/ fake:', fake_cost) 571 | 572 | gen_update = True 573 | dis_update = True 574 | 575 | if real_cost > (equilibrium + margin) or \ 576 | fake_cost > (equilibrium + margin): 577 | gen_update = False 578 | 579 | if real_cost < (equilibrium - margin) or \ 580 | fake_cost < (equilibrium - margin): 581 | dis_update = False 582 | 583 | if not (gen_update or dis_update): 584 | gen_update = True 585 | dis_update = True 586 | 587 | if gen_update: 588 | sess.run(opt_gen, feed_dict={ 589 | ae['x']: batch_xs, 590 | ae['z_samp']: batch_zs, 591 | ae['gamma']: 0.5}) 592 | if dis_update: 593 | sess.run(opt_dis, feed_dict={ 594 | ae['x']: batch_xs, 595 | ae['z_samp']: batch_zs, 596 | ae['gamma']: 0.5}) 597 | 598 | if batch_i % 50 == 0: 599 | 600 | # Plot example reconstructions from latent layer 601 | recon = sess.run( 602 | ae['x_tilde'], feed_dict={ 603 | ae['z']: zs}) 604 | print('recon:', recon.min(), recon.max()) 605 | recon = np.clip(recon / recon.max(), 0, 1) 606 | montage(recon.reshape([-1] + crop_shape), 607 | 'imgs/manifold_%08d.png' % t_i) 608 | 609 | # Plot example reconstructions 610 | recon = sess.run( 611 | ae['x_tilde'], feed_dict={ 612 | ae['x']: test_xs}) 613 | print('recon:', recon.min(), recon.max()) 614 | recon = np.clip(recon / recon.max(), 0, 1) 615 | montage(recon.reshape([-1] + crop_shape), 616 | 'imgs/reconstruction_%08d.png' % t_i) 617 | t_i += 1 618 | 619 | if batch_i % 100 == 0: 620 | # Save the variables to disk. 621 | save_path = saver.save(sess, "./" + ckpt_name, 622 | global_step=batch_i, 623 | write_meta_graph=False) 624 | print("Model saved in file: %s" % save_path) 625 | except tf.errors.OutOfRangeError: 626 | print('Done training -- epoch limit reached') 627 | finally: 628 | # One of the threads has issued an exception. So let's tell all the 629 | # threads to shutdown. 630 | coord.request_stop() 631 | 632 | # Wait until all threads have finished. 633 | coord.join(threads) 634 | 635 | # Clean up the session. 636 | sess.close() 637 | 638 | 639 | def test_celeb(): 640 | """Summary 641 | 642 | Returns 643 | ------- 644 | name : TYPE 645 | Description 646 | """ 647 | files = CELEB() 648 | train_vaegan( 649 | files=files, 650 | batch_size=64, 651 | n_epochs=100, 652 | crop_shape=[100, 100, 3], 653 | crop_factor=0.8, 654 | input_shape=[218, 178, 3], 655 | convolutional=True, 656 | variational=True, 657 | n_filters=[256, 384, 512, 1024, 2048], 658 | n_hidden=None, 659 | n_code=512, 660 | filter_sizes=[3, 3, 3, 3, 3], 661 | activation=tf.nn.elu, 662 | ckpt_name='celeb.ckpt') 663 | 664 | 665 | def test_sita(): 666 | """Summary 667 | 668 | Returns 669 | ------- 670 | name : TYPE 671 | Description 672 | """ 673 | if not os.path.exists('sita'): 674 | os.system('wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi') 675 | os.mkdir('sita') 676 | os.system('ffmpeg -i Sita_Sings_the_Blues_640x360_XviD.avi -r 60 -f' + 677 | ' image2 -s 160x90 sita/sita-%08d.jpg') 678 | files = [os.path.join('sita', f) for f in os.listdir('sita')] 679 | 680 | train_vaegan( 681 | files=files, 682 | batch_size=64, 683 | n_epochs=50, 684 | crop_shape=[90, 160, 3], 685 | crop_factor=1.0, 686 | input_shape=[218, 178, 3], 687 | convolutional=True, 688 | variational=True, 689 | n_filters=[100, 100, 100, 100, 100], 690 | n_hidden=250, 691 | n_code=100, 692 | filter_sizes=[3, 3, 3, 3, 2], 693 | activation=tf.nn.elu, 694 | ckpt_name='sita.ckpt') 695 | 696 | 697 | if __name__ == '__main__': 698 | test_celeb() 699 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/libs/vgg16.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creative Applications of Deep Learning w/ Tensorflow. 3 | Kadenze, Inc. 4 | Copyright Parag K. Mital, June 2016. 5 | """ 6 | import tensorflow as tf 7 | import os 8 | import json 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from skimage.transform import resize as imresize 12 | from .utils import download 13 | 14 | 15 | def get_vgg_face_model(): 16 | download('https://s3.amazonaws.com/cadl/models/vgg_face.tfmodel') 17 | with open("vgg_face.tfmodel", mode='rb') as f: 18 | graph_def = tf.GraphDef() 19 | try: 20 | graph_def.ParseFromString(f.read()) 21 | except: 22 | print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ' + 23 | 'to environment. e.g.:\n' + 24 | 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' + 25 | 'See here for info: ' + 26 | 'https://github.com/tensorflow/tensorflow/issues/582') 27 | 28 | download('https://s3.amazonaws.com/cadl/models/vgg_face.json') 29 | labels = json.load(open('vgg_face.json')) 30 | 31 | return { 32 | 'graph_def': graph_def, 33 | 'labels': labels, 34 | 'preprocess': preprocess, 35 | 'deprocess': deprocess 36 | } 37 | 38 | 39 | def get_vgg_model(): 40 | download('https://s3.amazonaws.com/cadl/models/vgg16.tfmodel') 41 | with open("vgg16.tfmodel", mode='rb') as f: 42 | graph_def = tf.GraphDef() 43 | try: 44 | graph_def.ParseFromString(f.read()) 45 | except: 46 | print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ' + 47 | 'to environment. e.g.:\n' + 48 | 'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' + 49 | 'See here for info: ' + 50 | 'https://github.com/tensorflow/tensorflow/issues/582') 51 | 52 | download('https://s3.amazonaws.com/cadl/models/synset.txt') 53 | with open('synset.txt') as f: 54 | labels = [(idx, l.strip()) for idx, l in enumerate(f.readlines())] 55 | 56 | return { 57 | 'graph_def': graph_def, 58 | 'labels': labels, 59 | 'preprocess': preprocess, 60 | 'deprocess': deprocess 61 | } 62 | 63 | 64 | def preprocess(img, crop=True, resize=True, dsize=(224, 224)): 65 | if img.dtype == np.uint8: 66 | img = img / 255.0 67 | 68 | if crop: 69 | short_edge = min(img.shape[:2]) 70 | yy = int((img.shape[0] - short_edge) / 2) 71 | xx = int((img.shape[1] - short_edge) / 2) 72 | crop_img = img[yy: yy + short_edge, xx: xx + short_edge] 73 | else: 74 | crop_img = img 75 | 76 | if resize: 77 | norm_img = imresize(crop_img, dsize, preserve_range=True) 78 | else: 79 | norm_img = crop_img 80 | 81 | return (norm_img).astype(np.float32) 82 | 83 | 84 | def deprocess(img): 85 | return np.clip(img * 255, 0, 255).astype(np.uint8) 86 | # return ((img / np.max(np.abs(img))) * 127.5 + 87 | # 127.5).astype(np.uint8) 88 | 89 | 90 | def test_vgg(): 91 | """Loads the VGG network and applies it to a test image. 92 | """ 93 | with tf.Session() as sess: 94 | net = get_vgg_model() 95 | tf.import_graph_def(net['graph_def'], name='vgg') 96 | g = tf.get_default_graph() 97 | names = [op.name for op in g.get_operations()] 98 | input_name = names[0] + ':0' 99 | x = g.get_tensor_by_name(input_name) 100 | softmax = g.get_tensor_by_name(names[-2] + ':0') 101 | 102 | og = plt.imread('bosch.png') 103 | img = preprocess(og)[np.newaxis, ...] 104 | res = np.squeeze(softmax.eval(feed_dict={ 105 | x: img, 106 | 'vgg/dropout_1/random_uniform:0': [[1.0]], 107 | 'vgg/dropout/random_uniform:0': [[1.0]]})) 108 | print([(res[idx], net['labels'][idx]) 109 | for idx in res.argsort()[-5:][::-1]]) 110 | 111 | """Let's visualize the network's gradient activation 112 | when backpropagated to the original input image. This 113 | is effectively telling us which pixels contribute to the 114 | predicted class or given neuron""" 115 | features = [name for name in names if 'BiasAdd' in name.split()[-1]] 116 | from math import sqrt, ceil 117 | n_plots = ceil(sqrt(len(features) + 1)) 118 | fig, axs = plt.subplots(n_plots, n_plots) 119 | plot_i = 0 120 | axs[0][0].imshow(img[0]) 121 | for feature_i, featurename in enumerate(features): 122 | plot_i += 1 123 | feature = g.get_tensor_by_name(featurename + ':0') 124 | neuron = tf.reduce_max(feature, 1) 125 | saliency = tf.gradients(tf.reduce_sum(neuron), x) 126 | neuron_idx = tf.arg_max(feature, 1) 127 | this_res = sess.run([saliency[0], neuron_idx], feed_dict={ 128 | x: img, 129 | 'vgg/dropout_1/random_uniform:0': [[1.0]], 130 | 'vgg/dropout/random_uniform:0': [[1.0]]}) 131 | 132 | grad = this_res[0][0] / np.max(np.abs(this_res[0])) 133 | ax = axs[plot_i // n_plots][plot_i % n_plots] 134 | ax.imshow((grad * 127.5 + 127.5).astype(np.uint8)) 135 | ax.set_title(featurename) 136 | 137 | """Deep Dreaming takes the backpropagated gradient activations 138 | and simply adds it to the image, running the same process again 139 | and again in a loop. There are many tricks one can add to this 140 | idea, such as infinitely zooming into the image by cropping and 141 | scaling, adding jitter by randomly moving the image around, or 142 | adding constraints on the total activations.""" 143 | og = plt.imread('street.png') 144 | crop = 2 145 | img = preprocess(og)[np.newaxis, ...] 146 | layer = g.get_tensor_by_name(features[3] + ':0') 147 | n_els = layer.get_shape().as_list()[1] 148 | neuron_i = np.random.randint(1000) 149 | layer_vec = np.zeros((1, n_els)) 150 | layer_vec[0, neuron_i] = 1 151 | neuron = tf.reduce_max(layer, 1) 152 | saliency = tf.gradients(tf.reduce_sum(neuron), x) 153 | for it_i in range(3): 154 | print(it_i) 155 | this_res = sess.run(saliency[0], feed_dict={ 156 | x: img, 157 | layer: layer_vec, 158 | 'vgg/dropout_1/random_uniform:0': [[1.0]], 159 | 'vgg/dropout/random_uniform:0': [[1.0]]}) 160 | grad = this_res[0] / np.mean(np.abs(grad)) 161 | img = img[:, crop:-crop - 1, crop:-crop - 1, :] 162 | img = imresize(img[0], (224, 224))[np.newaxis] 163 | img += grad 164 | plt.imshow(deprocess(img[0])) 165 | 166 | 167 | def test_vgg_face(): 168 | """Loads the VGG network and applies it to a test image. 169 | """ 170 | with tf.Session() as sess: 171 | net = get_vgg_face_model() 172 | x = tf.placeholder(tf.float32, [1, 224, 224, 3], name='x') 173 | tf.import_graph_def(net['graph_def'], name='vgg', 174 | input_map={'Placeholder:0': x}) 175 | g = tf.get_default_graph() 176 | names = [op.name for op in g.get_operations()] 177 | 178 | og = plt.imread('bricks.png')[..., :3] 179 | img = preprocess(og)[np.newaxis, ...] 180 | plt.imshow(img[0]) 181 | plt.show() 182 | 183 | """Let's visualize the network's gradient activation 184 | when backpropagated to the original input image. This 185 | is effectively telling us which pixels contribute to the 186 | predicted class or given neuron""" 187 | features = [name for name in names if 'BiasAdd' in name.split()[-1]] 188 | from math import sqrt, ceil 189 | n_plots = ceil(sqrt(len(features) + 1)) 190 | fig, axs = plt.subplots(n_plots, n_plots) 191 | plot_i = 0 192 | axs[0][0].imshow(img[0]) 193 | for feature_i, featurename in enumerate(features): 194 | plot_i += 1 195 | feature = g.get_tensor_by_name(featurename + ':0') 196 | neuron = tf.reduce_max(feature, 1) 197 | saliency = tf.gradients(tf.reduce_sum(neuron), x) 198 | neuron_idx = tf.arg_max(feature, 1) 199 | this_res = sess.run([saliency[0], neuron_idx], feed_dict={x: img}) 200 | 201 | grad = this_res[0][0] / np.max(np.abs(this_res[0])) 202 | ax = axs[plot_i // n_plots][plot_i % n_plots] 203 | ax.imshow((grad * 127.5 + 127.5).astype(np.uint8)) 204 | ax.set_title(featurename) 205 | plt.waitforbuttonpress() 206 | 207 | """Deep Dreaming takes the backpropagated gradient activations 208 | and simply adds it to the image, running the same process again 209 | and again in a loop. There are many tricks one can add to this 210 | idea, such as infinitely zooming into the image by cropping and 211 | scaling, adding jitter by randomly moving the image around, or 212 | adding constraints on the total activations.""" 213 | og = plt.imread('street.png') 214 | crop = 2 215 | img = preprocess(og)[np.newaxis, ...] 216 | layer = g.get_tensor_by_name(features[3] + ':0') 217 | n_els = layer.get_shape().as_list()[1] 218 | neuron_i = np.random.randint(1000) 219 | layer_vec = np.zeros((1, n_els)) 220 | layer_vec[0, neuron_i] = 1 221 | neuron = tf.reduce_max(layer, 1) 222 | saliency = tf.gradients(tf.reduce_sum(neuron), x) 223 | for it_i in range(3): 224 | print(it_i) 225 | this_res = sess.run(saliency[0], feed_dict={ 226 | x: img, 227 | layer: layer_vec, 228 | 'vgg/dropout_1/random_uniform:0': [[1.0]], 229 | 'vgg/dropout/random_uniform:0': [[1.0]]}) 230 | grad = this_res[0] / np.mean(np.abs(grad)) 231 | img = img[:, crop:-crop - 1, crop:-crop - 1, :] 232 | img = imresize(img[0], (224, 224))[np.newaxis] 233 | img += grad 234 | plt.imshow(deprocess(img[0])) 235 | 236 | if __name__ == '__main__': 237 | test_vgg_face() 238 | -------------------------------------------------------------------------------- /Zero-shot Classification by Deep Learning/testclasses_akata.txt: -------------------------------------------------------------------------------- 1 | 001.Black_footed_Albatross 2 | 004.Groove_billed_Ani 3 | 006.Least_Auklet 4 | 008.Rhinoceros_Auklet 5 | 009.Brewer_Blackbird 6 | 014.Indigo_Bunting 7 | 023.Brandt_Cormorant 8 | 029.American_Crow 9 | 031.Black_billed_Cuckoo 10 | 033.Yellow_billed_Cuckoo 11 | 034.Gray_crowned_Rosy_Finch 12 | 035.Purple_Finch 13 | 036.Northern_Flicker 14 | 037.Acadian_Flycatcher 15 | 038.Great_Crested_Flycatcher 16 | 043.Yellow_bellied_Flycatcher 17 | 049.Boat_tailed_Grackle 18 | 051.Horned_Grebe 19 | 053.Western_Grebe 20 | 066.Western_Gull 21 | 072.Pomarine_Jaeger 22 | 079.Belted_Kingfisher 23 | 083.White_breasted_Kingfisher 24 | 084.Red_legged_Kittiwake 25 | 086.Pacific_Loon 26 | 091.Mockingbird 27 | 095.Baltimore_Oriole 28 | 096.Hooded_Oriole 29 | 098.Scott_Oriole 30 | 101.White_Pelican 31 | 102.Western_Wood_Pewee 32 | 103.Sayornis 33 | 112.Great_Grey_Shrike 34 | 114.Black_throated_Sparrow 35 | 119.Field_Sparrow 36 | 121.Grasshopper_Sparrow 37 | 130.Tree_Sparrow 38 | 135.Bank_Swallow 39 | 138.Tree_Swallow 40 | 147.Least_Tern 41 | 156.White_eyed_Vireo 42 | 163.Cape_May_Warbler 43 | 165.Chestnut_sided_Warbler 44 | 166.Golden_winged_Warbler 45 | 180.Wilson_Warbler 46 | 183.Northern_Waterthrush 47 | 185.Bohemian_Waxwing 48 | 186.Cedar_Waxwing 49 | 187.American_Three_toed_Woodpecker 50 | 197.Marsh_Wren 51 | --------------------------------------------------------------------------------