├── README.md
└── Zero-shot Classification by Deep Learning
    ├── CADL_Project_DLZSL_Final.ipynb
    ├── Zero-shot Classification by Deep Learning_kadenze.gif
    ├── Zero-shot Classification by Deep Learning_kadenze.pptx
    ├── Zero-shot Classification by Deep Learning_kadenze.wmv
    ├── images_project
        ├── ae_graph.png
        ├── ae_result.png
        ├── concept_zsl.jpg
        ├── cub.jpg
        ├── deep_regression_graph.png
        ├── deep_regression_result.png
        ├── fea_example.png
        └── regression_result.png
    ├── libs
        ├── __pycache__
        │   ├── __init__.cpython-35.pyc
        │   ├── batch_norm.cpython-35.pyc
        │   ├── celeb_vaegan.cpython-35.pyc
        │   ├── charrnn.cpython-35.pyc
        │   ├── dataset_utils.cpython-35.pyc
        │   ├── datasets.cpython-35.pyc
        │   ├── dft.cpython-35.pyc
        │   ├── gif.cpython-35.pyc
        │   ├── i2v.cpython-35.pyc
        │   ├── inception.cpython-35.pyc
        │   ├── nb_utils.cpython-35.pyc
        │   ├── utils.cpython-35.pyc
        │   └── vgg16.cpython-35.pyc
        ├── batch_norm.py
        ├── celeb_vaegan.py
        ├── charrnn.py
        ├── dataset_utils.py
        ├── datasets.py
        ├── deepdream.py
        ├── dft.py
        ├── gan.py
        ├── gif.py
        ├── i2v.py
        ├── inception.py
        ├── nb_utils.py
        ├── stylenet.py
        ├── utils.py
        ├── vae.py
        ├── vaegan.py
        └── vgg16.py
    ├── slim_birds_final.ipynb
    └── testclasses_akata.txt


/README.md:
--------------------------------------------------------------------------------
1 | # zsl-deep-learning
2 | This doc shows zero-shot learning experiments by deep learning.
3 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.gif


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.pptx


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.wmv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/Zero-shot Classification by Deep Learning_kadenze.wmv


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/ae_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/ae_graph.png


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/ae_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/ae_result.png


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/concept_zsl.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/concept_zsl.jpg


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/cub.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/cub.jpg


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/deep_regression_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/deep_regression_graph.png


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/deep_regression_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/deep_regression_result.png


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/fea_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/fea_example.png


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/images_project/regression_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/images_project/regression_result.png


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/batch_norm.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/batch_norm.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/celeb_vaegan.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/celeb_vaegan.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/charrnn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/charrnn.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/dataset_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/dataset_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/datasets.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/datasets.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/dft.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/dft.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/gif.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/gif.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/i2v.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/i2v.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/inception.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/inception.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/nb_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/nb_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/utils.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/__pycache__/vgg16.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Elyorcv/zsl-deep-learning/f4ec02ea723837992a15f1a94b4e62da7a68bee8/Zero-shot Classification by Deep Learning/libs/__pycache__/vgg16.cpython-35.pyc


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/batch_norm.py:
--------------------------------------------------------------------------------
 1 | """Batch Normalization for TensorFlow.
 2 | Parag K. Mital, Jan 2016.
 3 | """
 4 | 
 5 | import tensorflow as tf
 6 | from tensorflow.python.ops import control_flow_ops
 7 | 
 8 | 
 9 | def batch_norm(x, phase_train, name='bn', decay=0.9, reuse=None,
10 |                affine=True):
11 |     """
12 |     Batch normalization on convolutional maps.
13 |     from: https://stackoverflow.com/questions/33949786/how-could-i-
14 |     use-batch-normalization-in-tensorflow
15 |     Only modified to infer shape from input tensor x.
16 |     Parameters
17 |     ----------
18 |     x
19 |         Tensor, 4D BHWD input maps
20 |     phase_train
21 |         boolean tf.Variable, true indicates training phase
22 |     name
23 |         string, variable name
24 |     affine
25 |         whether to affine-transform outputs
26 |     Return
27 |     ------
28 |     normed
29 |         batch-normalized maps
30 |     """
31 |     with tf.variable_scope(name, reuse=reuse):
32 |         shape = x.get_shape().as_list()
33 |         beta = tf.get_variable(name='beta', shape=[shape[-1]],
34 |                                initializer=tf.constant_initializer(0.0),
35 |                                trainable=True)
36 |         gamma = tf.get_variable(name='gamma', shape=[shape[-1]],
37 |                                 initializer=tf.constant_initializer(1.0),
38 |                                 trainable=affine)
39 |         if len(shape) == 4:
40 |             batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
41 |         else:
42 |             batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
43 |         ema = tf.train.ExponentialMovingAverage(decay=decay)
44 |         ema_apply_op = ema.apply([batch_mean, batch_var])
45 |         ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
46 | 
47 |         def mean_var_with_update():
48 |             """Summary
49 |             Returns
50 |             -------
51 |             name : TYPE
52 |                 Description
53 |             """
54 |             with tf.control_dependencies([ema_apply_op]):
55 |                 return tf.identity(batch_mean), tf.identity(batch_var)
56 |         mean, var = control_flow_ops.cond(phase_train,
57 |                                           mean_var_with_update,
58 |                                           lambda: (ema_mean, ema_var))
59 | 
60 |         # tf.nn.batch_normalization
61 |         normed = tf.nn.batch_norm_with_global_normalization(
62 |             x, mean, var, beta, gamma, 1e-6, affine)
63 |     return normed
64 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/celeb_vaegan.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Creative Applications of Deep Learning w/ Tensorflow.
 3 | Kadenze, Inc.
 4 | Copyright Parag K. Mital, June 2016.
 5 | """
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | from tensorflow.python.platform import gfile
 9 | from .utils import download
10 | from skimage.transform import resize as imresize
11 | 
12 | 
13 | def celeb_vaegan_download():
14 |     """Download a pretrained celeb vae/gan network."""
15 | 
16 |     # Load the model and labels
17 |     model = download('https://s3.amazonaws.com/cadl/models/celeb.vaegan.tfmodel')
18 |     labels = download('https://s3.amazonaws.com/cadl/celeb-align/list_attr_celeba.txt')
19 |     return model, labels
20 | 
21 | 
22 | def get_celeb_vaegan_model():
23 |     """Get a pretrained model.
24 | 
25 |     Returns
26 |     -------
27 |     net : dict
28 |         {
29 |             'graph_def': tf.GraphDef
30 |                 The graph definition
31 |             'labels': list
32 |                 List of different possible attributes from celeb
33 |             'attributes': np.ndarray
34 |                 One hot encoding of the attributes per image
35 |                 [n_els x n_labels]
36 |             'preprocess': function
37 |                 Preprocess function
38 |         }
39 |     """
40 |     # Download the trained net
41 |     model, labels = celeb_vaegan_download()
42 | 
43 |     # Parse the ids and synsets
44 |     txt = open(labels).readlines()
45 |     n_els = int(txt[0].strip())
46 |     labels = txt[1].strip().split()
47 |     n_labels = len(labels)
48 |     attributes = np.zeros((n_els, n_labels), dtype=bool)
49 |     for i, txt_i in enumerate(txt[2:]):
50 |         attributes[i] = (np.array(txt_i.strip().split()[1:]).astype(int) > 0)
51 | 
52 |     # Load the saved graph
53 |     with gfile.GFile(model, 'rb') as f:
54 |         graph_def = tf.GraphDef()
55 |         try:
56 |             graph_def.ParseFromString(f.read())
57 |         except:
58 |             print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' +
59 |                   'to environment.  e.g.:\n' +
60 |                   'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' +
61 |                   'See here for info: ' +
62 |                   'https://github.com/tensorflow/tensorflow/issues/582')
63 |     net = {
64 |         'graph_def': graph_def,
65 |         'labels': labels,
66 |         'attributes': attributes,
67 |         'preprocess': preprocess,
68 |     }
69 |     return net
70 | 
71 | 
72 | def preprocess(img, crop_factor=0.8):
73 |     """Replicate the preprocessing we did on the VAE/GAN.
74 | 
75 |     This model used a crop_factor of 0.8 and crop size of [100, 100, 3].
76 |     """
77 |     crop = np.min(img.shape[:2])
78 |     r = (img.shape[0] - crop) // 2
79 |     c = (img.shape[1] - crop) // 2
80 |     cropped = img[r: r + crop, c: c + crop]
81 |     r, c, *d = cropped.shape
82 |     if crop_factor < 1.0:
83 |         amt = (1 - crop_factor) / 2
84 |         h, w = int(c * amt), int(r * amt)
85 |         cropped = cropped[h:-h, w:-w]
86 |     rsz = imresize(cropped, (100, 100), preserve_range=False)
87 |     return rsz
88 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/charrnn.py:
--------------------------------------------------------------------------------
  1 | """Creative Applications of Deep Learning w/ Tensorflow.
  2 | Kadenze, Inc.
  3 | Copyright Parag K. Mital, June 2016.
  4 | 
  5 | TODO:
  6 | argparse
  7 | better sound example/model
  8 | prime with text input
  9 | """
 10 | 
 11 | import tensorflow as tf
 12 | import numpy as np
 13 | import os
 14 | import sys
 15 | from six.moves import urllib
 16 | import collections
 17 | 
 18 | 
 19 | def build_model(txt,
 20 |                 batch_size=1,
 21 |                 sequence_length=1,
 22 |                 n_layers=2,
 23 |                 n_cells=100,
 24 |                 gradient_clip=10.0,
 25 |                 learning_rate=0.001):
 26 | 
 27 |     vocab = list(set(txt))
 28 |     vocab.sort()
 29 |     n_chars = len(vocab)
 30 |     encoder = collections.OrderedDict(zip(vocab, range(n_chars)))
 31 |     decoder = collections.OrderedDict(zip(range(n_chars), vocab))
 32 | 
 33 |     X = tf.placeholder(tf.int32, [None, sequence_length], name='X')
 34 |     Y = tf.placeholder(tf.int32, [None, sequence_length], name='Y')
 35 |     keep_prob = tf.placeholder(tf.float32, name='keep_prob')
 36 | 
 37 |     with tf.variable_scope('embedding'):
 38 |         embedding = tf.get_variable("embedding", [n_chars, n_cells])
 39 |         # Each sequence element will be connected to n_cells
 40 |         Xs = tf.nn.embedding_lookup(embedding, X)
 41 |         # Then slice each sequence element
 42 |         Xs = tf.split(1, sequence_length, Xs)
 43 |         # Get rid of singleton sequence element dimension
 44 |         Xs = [tf.squeeze(X_i, [1]) for X_i in Xs]
 45 | 
 46 |     with tf.variable_scope('rnn'):
 47 |         cells = tf.nn.rnn_cell.BasicLSTMCell(
 48 |             num_units=n_cells, forget_bias=0.0, state_is_tuple=True)
 49 |         initial_state = cells.zero_state(tf.shape(X)[0], tf.float32)
 50 |         if n_layers > 1:
 51 |             cells = tf.nn.rnn_cell.MultiRNNCell(
 52 |                 [cells] * n_layers, state_is_tuple=True)
 53 |             initial_state = cells.zero_state(tf.shape(X)[0], tf.float32)
 54 |         cells = tf.nn.rnn_cell.DropoutWrapper(
 55 |             cells, output_keep_prob=keep_prob)
 56 |         outputs, final_state = tf.nn.rnn(
 57 |             cells, Xs, initial_state=initial_state)
 58 |         outputs_flat = tf.reshape(tf.concat(1, outputs), [-1, n_cells])
 59 | 
 60 |     with tf.variable_scope('prediction'):
 61 |         W = tf.get_variable(
 62 |             "W",
 63 |             shape=[n_cells, n_chars],
 64 |             initializer=tf.contrib.layers.xavier_initializer())
 65 |         b = tf.get_variable(
 66 |             "b",
 67 |             shape=[n_chars],
 68 |             initializer=tf.constant_initializer())
 69 |         logits = tf.matmul(outputs_flat, W) + b
 70 |         probs = tf.nn.softmax(logits)
 71 |         Y_pred = tf.argmax(probs, 1)
 72 | 
 73 |     with tf.variable_scope('loss'):
 74 |         loss = tf.nn.seq2seq.sequence_loss_by_example(
 75 |             [logits],
 76 |             [tf.reshape(tf.concat(1, Y), [-1])],
 77 |             [tf.ones([batch_size * sequence_length])])
 78 |         cost = tf.reduce_sum(loss) / batch_size
 79 | 
 80 |     with tf.name_scope('optimizer'):
 81 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
 82 |         gradients = []
 83 |         clip = tf.constant(gradient_clip, name="clip")
 84 |         for grad, var in optimizer.compute_gradients(cost):
 85 |             gradients.append((tf.clip_by_value(grad, -clip, clip), var))
 86 |         updates = optimizer.apply_gradients(gradients)
 87 | 
 88 |     model = {'X': X, 'Y': Y, 'logits': logits, 'probs': probs,
 89 |              'Y_pred': Y_pred, 'keep_prob': keep_prob,
 90 |              'cost': cost, 'updates': updates, 'initial_state': initial_state,
 91 |              'final_state': final_state, 'decoder': decoder, 'encoder': encoder,
 92 |              'vocab_size': n_chars}
 93 |     return model
 94 | 
 95 | 
 96 | def train(txt, batch_size=100, sequence_length=150, n_cells=100, n_layers=3,
 97 |           learning_rate=0.00001, max_iter=50000, gradient_clip=5.0,
 98 |           ckpt_name="model.ckpt", keep_prob=1.0):
 99 | 
100 |     g = tf.Graph()
101 |     with tf.Session(graph=g) as sess:
102 |         model = build_model(txt=txt,
103 |                             batch_size=batch_size,
104 |                             sequence_length=sequence_length,
105 |                             n_layers=n_layers,
106 |                             n_cells=n_cells,
107 |                             gradient_clip=gradient_clip,
108 |                             learning_rate=learning_rate)
109 | 
110 |         init_op = tf.initialize_all_variables()
111 |         saver = tf.train.Saver()
112 |         sess.run(init_op)
113 |         if os.path.exists(ckpt_name):
114 |             saver.restore(sess, ckpt_name)
115 |             print("Model restored.")
116 | 
117 |         cursor = 0
118 |         it_i = 0
119 |         print_step = 100
120 |         avg_cost = 0
121 |         while it_i < max_iter:
122 |             Xs, Ys = [], []
123 |             for batch_i in range(batch_size):
124 |                 Xs.append([model['encoder'][ch]
125 |                            for ch in txt[cursor:cursor + sequence_length]])
126 |                 Ys.append([model['encoder'][ch]
127 |                            for ch in txt[cursor + 1:
128 |                                          cursor + sequence_length + 1]])
129 |                 cursor += sequence_length
130 |                 if (cursor + 1) >= len(txt) - sequence_length - 1:
131 |                     cursor = np.random.randint(0, high=sequence_length)
132 | 
133 |             feed_dict = {model['X']: Xs, model['Y']: Ys, model['keep_prob']: keep_prob}
134 |             out = sess.run([model['cost'], model['updates']], feed_dict=feed_dict)
135 |             avg_cost += out[0]
136 | 
137 |             if (it_i + 1) % print_step == 0:
138 |                 p = sess.run(model['probs'], feed_dict={
139 |                     model['X']: np.array(Xs[-1])[np.newaxis], model['keep_prob']: 1.0})
140 |                 print(p.shape, 'min:', np.min(p), 'max:', np.max(p),
141 |                       'mean:', np.mean(p), 'std:', np.std(p))
142 |                 if isinstance(txt[0], str):
143 |                     # Print original string
144 |                     print('original:', "".join(
145 |                         [model['decoder'][ch] for ch in Xs[-1]]))
146 | 
147 |                     # Print max guess
148 |                     amax = []
149 |                     for p_i in p:
150 |                         amax.append(model['decoder'][np.argmax(p_i)])
151 |                     print('synth(amax):', "".join(amax))
152 | 
153 |                     # Print w/ sampling
154 |                     samp = []
155 |                     for p_i in p:
156 |                         p_i = p_i.astype(np.float64)
157 |                         p_i = p_i / p_i.sum()
158 |                         idx = np.argmax(np.random.multinomial(1, p_i.ravel()))
159 |                         samp.append(model['decoder'][idx])
160 |                     print('synth(samp):', "".join(samp))
161 | 
162 |                 print(it_i, avg_cost / print_step)
163 |                 avg_cost = 0
164 | 
165 |                 save_path = saver.save(sess, "./" + ckpt_name, global_step=it_i)
166 |                 print("Model saved in file: %s" % save_path)
167 | 
168 |             print(it_i, out[0], end='\r')
169 |             it_i += 1
170 | 
171 |         return model
172 | 
173 | 
174 | def infer(txt, ckpt_name, n_iterations, n_cells=512, n_layers=3,
175 |           learning_rate=0.001, max_iter=5000, gradient_clip=10.0,
176 |           init_value=[0], keep_prob=1.0, sampling='prob', temperature=1.0):
177 | 
178 |     g = tf.Graph()
179 |     with tf.Session(graph=g) as sess:
180 |         sequence_length = len(init_value)
181 |         model = build_model(txt=txt,
182 |                             batch_size=1,
183 |                             sequence_length=sequence_length,
184 |                             n_layers=n_layers,
185 |                             n_cells=n_cells,
186 |                             gradient_clip=gradient_clip,
187 |                             learning_rate=learning_rate)
188 | 
189 |         init_op = tf.initialize_all_variables()
190 |         saver = tf.train.Saver()
191 |         sess.run(init_op)
192 |         if os.path.exists(ckpt_name):
193 |             saver.restore(sess, ckpt_name)
194 |             print("Model restored.")
195 | 
196 |         state = []
197 |         synth = [init_value]
198 |         for s_i in model['final_state']:
199 |             state += sess.run([s_i.c, s_i.h], feed_dict={
200 |                 model['X']: [synth[-1]], model['keep_prob']: keep_prob})
201 | 
202 |         for i in range(n_iterations):
203 |             # print('iteration: {}/{}'.format(i, n_iterations), end='\r')
204 |             feed_dict = {model['X']: [synth[-1]],
205 |                          model['keep_prob']: keep_prob}
206 |             state_updates = []
207 |             for state_i in range(n_layers):
208 |                 feed_dict[model['initial_state'][state_i].c] = state[state_i * 2]
209 |                 feed_dict[model['initial_state'][state_i].h] = state[state_i * 2 + 1]
210 |                 state_updates.append(model['final_state'][state_i].c)
211 |                 state_updates.append(model['final_state'][state_i].h)
212 |             p = sess.run(model['probs'], feed_dict=feed_dict)[0]
213 |             if sampling == 'max':
214 |                 p = np.argmax(p)
215 |             else:
216 |                 p = p.astype(np.float64)
217 |                 p = np.log(p) / temperature
218 |                 p = np.exp(p) / np.sum(np.exp(p))
219 |                 p = np.random.multinomial(1, p.ravel())
220 |                 p = np.argmax(p)
221 |             # Get the current state
222 |             state = [sess.run(s_i, feed_dict=feed_dict)
223 |                      for s_i in state_updates]
224 |             synth.append([p])
225 |             print(model['decoder'][p], end='')
226 |             sys.stdout.flush()
227 |             if model['decoder'][p] in ['.', '?', '!']:
228 |                 print('\n')
229 |         print(np.concatenate(synth).shape)
230 |     print("".join([model['decoder'][ch] for ch in np.concatenate(synth)]))
231 |     return [model['decoder'][ch] for ch in np.concatenate(synth)]
232 | 
233 | 
234 | def test_alice():
235 |     f, _ = urllib.request.urlretrieve(
236 |         'https://www.gutenberg.org/cache/epub/11/pg11.txt', 'alice.txt')
237 |     with open(f, 'r') as fp:
238 |         txt = fp.read()
239 |     train(txt, max_iter=50000)
240 | 
241 | 
242 | def test_trump():
243 |     with open('trump.txt', 'r') as fp:
244 |         txt = fp.read()
245 |     # train(txt, max_iter=50000)
246 |     print(infer(txt, 'trump.ckpt', 50000))
247 | 
248 | 
249 | def test_wtc():
250 |     from scipy.io.wavfile import write, read
251 |     rate, aud = read('wtc.wav')
252 |     txt = np.int8(np.round(aud / 16384.0 * 128.0))
253 |     txt = np.squeeze(txt).tolist()
254 |     train(txt, sequence_length=250, n_layers=3, n_cells=512, max_iter=100000)
255 |     synthesis = infer(txt, 'model.ckpt', 8000 * 30, n_layers=3,
256 |                       n_cells=150, keep_prob=1.0, sampling='prob')
257 |     snd = np.int16(np.array(synthesis) / 128.0 * 16384.0)
258 |     write('wtc-synth.wav', 8000, snd)
259 | 
260 | 
261 | if __name__ == '__main__':
262 |     test_alice()
263 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/dataset_utils.py:
--------------------------------------------------------------------------------
  1 | """Utils for dataset creation.
  2 | 
  3 | Creative Applications of Deep Learning w/ Tensorflow.
  4 | Kadenze, Inc.
  5 | Copyright Parag K. Mital, June 2016.
  6 | """
  7 | 
  8 | import os
  9 | import pickle
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | from . import dft
 13 | from .utils import download_and_extract_tar
 14 | 
 15 | 
 16 | def create_input_pipeline(files, batch_size, n_epochs, shape, crop_shape=None,
 17 |                           crop_factor=1.0, n_threads=2):
 18 |     """Creates a pipefile from a list of image files.
 19 |     Includes batch generator/central crop/resizing options.
 20 |     The resulting generator will dequeue the images batch_size at a time until
 21 |     it throws tf.errors.OutOfRangeError when there are no more images left in
 22 |     the queue.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     files : list
 27 |         List of paths to image files.
 28 |     batch_size : int
 29 |         Number of image files to load at a time.
 30 |     n_epochs : int
 31 |         Number of epochs to run before raising tf.errors.OutOfRangeError
 32 |     shape : list
 33 |         [height, width, channels]
 34 |     crop_shape : list
 35 |         [height, width] to crop image to.
 36 |     crop_factor : float
 37 |         Percentage of image to take starting from center.
 38 |     n_threads : int, optional
 39 |         Number of threads to use for batch shuffling
 40 |     """
 41 | 
 42 |     # We first create a "producer" queue.  It creates a production line which
 43 |     # will queue up the file names and allow another queue to deque the file
 44 |     # names all using a tf queue runner.
 45 |     # Put simply, this is the entry point of the computational graph.
 46 |     # It will generate the list of file names.
 47 |     # We also specify it's capacity beforehand.
 48 |     producer = tf.train.string_input_producer(
 49 |         files, capacity=len(files))
 50 | 
 51 |     # We need something which can open the files and read its contents.
 52 |     reader = tf.WholeFileReader()
 53 | 
 54 |     # We pass the filenames to this object which can read the file's contents.
 55 |     # This will create another queue running which dequeues the previous queue.
 56 |     keys, vals = reader.read(producer)
 57 | 
 58 |     # And then have to decode its contents as we know it is a jpeg image
 59 |     imgs = tf.image.decode_jpeg(
 60 |         vals,
 61 |         channels=3 if len(shape) > 2 and shape[2] == 3 else 0)
 62 | 
 63 |     # We have to explicitly define the shape of the tensor.
 64 |     # This is because the decode_jpeg operation is still a node in the graph
 65 |     # and doesn't yet know the shape of the image.  Future operations however
 66 |     # need explicit knowledge of the image's shape in order to be created.
 67 |     imgs.set_shape(shape)
 68 | 
 69 |     # Next we'll centrally crop the image to the size of 100x100.
 70 |     # This operation required explicit knowledge of the image's shape.
 71 |     if shape[0] > shape[1]:
 72 |         rsz_shape = [int(shape[0] / shape[1] * crop_shape[0] / crop_factor),
 73 |                      int(crop_shape[1] / crop_factor)]
 74 |     else:
 75 |         rsz_shape = [int(crop_shape[0] / crop_factor),
 76 |                      int(shape[1] / shape[0] * crop_shape[1] / crop_factor)]
 77 |     rszs = tf.image.resize_images(imgs, rsz_shape)
 78 |     crops = (tf.image.resize_image_with_crop_or_pad(
 79 |         rszs, crop_shape[0], crop_shape[1])
 80 |         if crop_shape is not None
 81 |         else imgs)
 82 | 
 83 |     # Now we'll create a batch generator that will also shuffle our examples.
 84 |     # We tell it how many it should have in its buffer when it randomly
 85 |     # permutes the order.
 86 |     min_after_dequeue = len(files) // 100
 87 | 
 88 |     # The capacity should be larger than min_after_dequeue, and determines how
 89 |     # many examples are prefetched.  TF docs recommend setting this value to:
 90 |     # min_after_dequeue + (num_threads + a small safety margin) * batch_size
 91 |     capacity = min_after_dequeue + (n_threads + 1) * batch_size
 92 | 
 93 |     # Randomize the order and output batches of batch_size.
 94 |     batch = tf.train.shuffle_batch([crops],
 95 |                                    enqueue_many=False,
 96 |                                    batch_size=batch_size,
 97 |                                    capacity=capacity,
 98 |                                    min_after_dequeue=min_after_dequeue,
 99 |                                    num_threads=n_threads)
100 | 
101 |     # alternatively, we could use shuffle_batch_join to use multiple reader
102 |     # instances, or set shuffle_batch's n_threads to higher than 1.
103 | 
104 |     return batch
105 | 
106 | 
107 | def gtzan_music_speech_download(dst='gtzan_music_speech'):
108 |     """Download the GTZAN music and speech dataset.
109 | 
110 |     Parameters
111 |     ----------
112 |     dst : str, optional
113 |         Location to put the GTZAN music and speech datset.
114 |     """
115 |     path = 'http://opihi.cs.uvic.ca/sound/music_speech.tar.gz'
116 |     download_and_extract_tar(path, dst)
117 | 
118 | 
119 | def gtzan_music_speech_load(dst='gtzan_music_speech'):
120 |     """Load the GTZAN Music and Speech dataset.
121 | 
122 |     Downloads the dataset if it does not exist into the dst directory.
123 | 
124 |     Parameters
125 |     ----------
126 |     dst : str, optional
127 |         Location of GTZAN Music and Speech dataset.
128 | 
129 |     Returns
130 |     -------
131 |     Xs, ys : np.ndarray, np.ndarray
132 |         Array of data, Array of labels
133 |     """
134 |     from scipy.io import wavfile
135 | 
136 |     if not os.path.exists(dst):
137 |         gtzan_music_speech_download(dst)
138 |     music_dir = os.path.join(os.path.join(dst, 'music_speech'), 'music_wav')
139 |     music = [os.path.join(music_dir, file_i)
140 |              for file_i in os.listdir(music_dir)
141 |              if file_i.endswith('.wav')]
142 |     speech_dir = os.path.join(os.path.join(dst, 'music_speech'), 'speech_wav')
143 |     speech = [os.path.join(speech_dir, file_i)
144 |               for file_i in os.listdir(speech_dir)
145 |               if file_i.endswith('.wav')]
146 |     Xs = []
147 |     ys = []
148 |     for i in music:
149 |         sr, s = wavfile.read(i)
150 |         s = s / 16384.0 - 1.0
151 |         re, im = dft.dft_np(s)
152 |         mag, phs = dft.ztoc(re, im)
153 |         Xs.append((mag, phs))
154 |         ys.append(0)
155 |     for i in speech:
156 |         sr, s = wavfile.read(i)
157 |         s = s / 16384.0 - 1.0
158 |         re, im = dft.dft_np(s)
159 |         mag, phs = dft.ztoc(re, im)
160 |         Xs.append((mag, phs))
161 |         ys.append(1)
162 |     Xs = np.array(Xs)
163 |     Xs = np.transpose(Xs, [0, 2, 3, 1])
164 |     ys = np.array(ys)
165 |     return Xs, ys
166 | 
167 | 
168 | def cifar10_download(dst='cifar10'):
169 |     """Download the CIFAR10 dataset.
170 | 
171 |     Parameters
172 |     ----------
173 |     dst : str, optional
174 |         Directory to download into.
175 |     """
176 |     path = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
177 |     download_and_extract_tar(path, dst)
178 | 
179 | 
180 | def cifar10_load(dst='cifar10'):
181 |     """Load the CIFAR10 dataset.
182 | 
183 |     Downloads the dataset if it does not exist into the dst directory.
184 | 
185 |     Parameters
186 |     ----------
187 |     dst : str, optional
188 |         Location of CIFAR10 dataset.
189 | 
190 |     Returns
191 |     -------
192 |     Xs, ys : np.ndarray, np.ndarray
193 |         Array of data, Array of labels
194 |     """
195 |     if not os.path.exists(dst):
196 |         cifar10_download(dst)
197 |     Xs = None
198 |     ys = None
199 |     for f in range(1, 6):
200 |         cf = pickle.load(open(
201 |             '%s/cifar-10-batches-py/data_batch_%d' % (dst, f), 'rb'),
202 |             encoding='LATIN')
203 |         if Xs is not None:
204 |             Xs = np.r_[Xs, cf['data']]
205 |             ys = np.r_[ys, np.array(cf['labels'])]
206 |         else:
207 |             Xs = cf['data']
208 |             ys = cf['labels']
209 |     Xs = np.swapaxes(np.swapaxes(Xs.reshape(-1, 3, 32, 32), 1, 3), 1, 2)
210 |     return Xs, ys
211 | 
212 | 
213 | def dense_to_one_hot(labels, n_classes=2):
214 |     """Convert class labels from scalars to one-hot vectors.
215 | 
216 |     Parameters
217 |     ----------
218 |     labels : array
219 |         Input labels to convert to one-hot representation.
220 |     n_classes : int, optional
221 |         Number of possible one-hot.
222 | 
223 |     Returns
224 |     -------
225 |     one_hot : array
226 |         One hot representation of input.
227 |     """
228 |     return np.eye(n_classes).astype(np.float32)[labels]
229 | 
230 | 
231 | class DatasetSplit(object):
232 |     """Utility class for batching data and handling multiple splits.
233 | 
234 |     Attributes
235 |     ----------
236 |     current_batch_idx : int
237 |         Description
238 |     images : np.ndarray
239 |         Xs of the dataset.  Not necessarily images.
240 |     labels : np.ndarray
241 |         ys of the dataset.
242 |     n_labels : int
243 |         Number of possible labels
244 |     num_examples : int
245 |         Number of total observations
246 |     """
247 | 
248 |     def __init__(self, images, labels):
249 |         """Initialize a DatasetSplit object.
250 | 
251 |         Parameters
252 |         ----------
253 |         images : np.ndarray
254 |             Xs/inputs
255 |         labels : np.ndarray
256 |             ys/outputs
257 |         """
258 |         self.images = np.array(images).astype(np.float32)
259 |         if labels is not None:
260 |             self.labels = np.array(labels).astype(np.int32)
261 |             self.n_labels = len(np.unique(labels))
262 |         else:
263 |             self.labels = None
264 |         self.num_examples = len(self.images)
265 | 
266 |     def next_batch(self, batch_size=100):
267 |         """Batch generator with randomization.
268 | 
269 |         Parameters
270 |         ----------
271 |         batch_size : int, optional
272 |             Size of each minibatch.
273 | 
274 |         Returns
275 |         -------
276 |         Xs, ys : np.ndarray, np.ndarray
277 |             Next batch of inputs and labels (if no labels, then None).
278 |         """
279 |         # Shuffle each epoch
280 |         current_permutation = np.random.permutation(range(len(self.images)))
281 |         epoch_images = self.images[current_permutation, ...]
282 |         if self.labels is not None:
283 |             epoch_labels = self.labels[current_permutation, ...]
284 | 
285 |         # Then iterate over the epoch
286 |         self.current_batch_idx = 0
287 |         while self.current_batch_idx < len(self.images):
288 |             end_idx = min(
289 |                 self.current_batch_idx + batch_size, len(self.images))
290 |             this_batch = {
291 |                 'images': epoch_images[self.current_batch_idx:end_idx],
292 |                 'labels': epoch_labels[self.current_batch_idx:end_idx]
293 |                 if self.labels is not None else None
294 |             }
295 |             self.current_batch_idx += batch_size
296 |             yield this_batch['images'], this_batch['labels']
297 | 
298 | 
299 | class Dataset(object):
300 |     """Create a dataset from data and their labels.
301 | 
302 |     Allows easy use of train/valid/test splits; Batch generator.
303 | 
304 |     Attributes
305 |     ----------
306 |     all_idxs : list
307 |         All indexes across all splits.
308 |     all_inputs : list
309 |         All inputs across all splits.
310 |     all_labels : list
311 |         All labels across all splits.
312 |     n_labels : int
313 |         Number of labels.
314 |     split : list
315 |         Percentage split of train, valid, test sets.
316 |     test_idxs : list
317 |         Indexes of the test split.
318 |     train_idxs : list
319 |         Indexes of the train split.
320 |     valid_idxs : list
321 |         Indexes of the valid split.
322 |     """
323 | 
324 |     def __init__(self, Xs, ys=None, split=[1.0, 0.0, 0.0], one_hot=False):
325 |         """Initialize a Dataset object.
326 | 
327 |         Parameters
328 |         ----------
329 |         Xs : np.ndarray
330 |             Images/inputs to a network
331 |         ys : np.ndarray
332 |             Labels/outputs to a network
333 |         split : list, optional
334 |             Percentage of train, valid, and test sets.
335 |         one_hot : bool, optional
336 |             Whether or not to use one-hot encoding of labels (ys).
337 |         """
338 |         self.all_idxs = []
339 |         self.all_labels = []
340 |         self.all_inputs = []
341 |         self.train_idxs = []
342 |         self.valid_idxs = []
343 |         self.test_idxs = []
344 |         self.n_labels = 0
345 |         self.split = split
346 | 
347 |         # Now mix all the labels that are currently stored as blocks
348 |         self.all_inputs = Xs
349 |         n_idxs = len(self.all_inputs)
350 |         idxs = range(n_idxs)
351 |         rand_idxs = np.random.permutation(idxs)
352 |         self.all_inputs = self.all_inputs[rand_idxs, ...]
353 |         if ys is not None:
354 |             self.all_labels = ys if not one_hot else dense_to_one_hot(ys)
355 |             self.all_labels = self.all_labels[rand_idxs, ...]
356 |         else:
357 |             self.all_labels = None
358 | 
359 |         # Get splits
360 |         self.train_idxs = idxs[:round(split[0] * n_idxs)]
361 |         self.valid_idxs = idxs[len(self.train_idxs):
362 |                                len(self.train_idxs) + round(split[1] * n_idxs)]
363 |         self.test_idxs = idxs[
364 |             (len(self.valid_idxs) + len(self.train_idxs)):
365 |             (len(self.valid_idxs) + len(self.train_idxs)) +
366 |              round(split[2] * n_idxs)]
367 | 
368 |     @property
369 |     def X(self):
370 |         """Inputs/Xs/Images.
371 | 
372 |         Returns
373 |         -------
374 |         all_inputs : np.ndarray
375 |             Original Inputs/Xs.
376 |         """
377 |         return self.all_inputs
378 | 
379 |     @property
380 |     def Y(self):
381 |         """Outputs/ys/Labels.
382 | 
383 |         Returns
384 |         -------
385 |         all_labels : np.ndarray
386 |             Original Outputs/ys.
387 |         """
388 |         return self.all_labels
389 | 
390 |     @property
391 |     def train(self):
392 |         """Train split.
393 | 
394 |         Returns
395 |         -------
396 |         split : DatasetSplit
397 |             Split of the train dataset.
398 |         """
399 |         if len(self.train_idxs):
400 |             inputs = self.all_inputs[self.train_idxs, ...]
401 |             if self.all_labels is not None:
402 |                 labels = self.all_labels[self.train_idxs, ...]
403 |             else:
404 |                 labels = None
405 |         else:
406 |             inputs, labels = [], []
407 |         return DatasetSplit(inputs, labels)
408 | 
409 |     @property
410 |     def valid(self):
411 |         """Validation split.
412 | 
413 |         Returns
414 |         -------
415 |         split : DatasetSplit
416 |             Split of the validation dataset.
417 |         """
418 |         if len(self.valid_idxs):
419 |             inputs = self.all_inputs[self.valid_idxs, ...]
420 |             if self.all_labels is not None:
421 |                 labels = self.all_labels[self.valid_idxs, ...]
422 |             else:
423 |                 labels = None
424 |         else:
425 |             inputs, labels = [], []
426 |         return DatasetSplit(inputs, labels)
427 | 
428 |     @property
429 |     def test(self):
430 |         """Test split.
431 | 
432 |         Returns
433 |         -------
434 |         split : DatasetSplit
435 |             Split of the test dataset.
436 |         """
437 |         if len(self.test_idxs):
438 |             inputs = self.all_inputs[self.test_idxs, ...]
439 |             if self.all_labels is not None:
440 |                 labels = self.all_labels[self.test_idxs, ...]
441 |             else:
442 |                 labels = None
443 |         else:
444 |             inputs, labels = [], []
445 |         return DatasetSplit(inputs, labels)
446 | 
447 |     def mean(self):
448 |         """Mean of the inputs/Xs.
449 | 
450 |         Returns
451 |         -------
452 |         mean : np.ndarray
453 |             Calculates mean across 0th (batch) dimension.
454 |         """
455 |         return np.mean(self.all_inputs, axis=0)
456 | 
457 |     def std(self):
458 |         """Standard deviation of the inputs/Xs.
459 | 
460 |         Returns
461 |         -------
462 |         std : np.ndarray
463 |             Calculates std across 0th (batch) dimension.
464 |         """
465 |         return np.std(self.all_inputs, axis=0)
466 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/datasets.py:
--------------------------------------------------------------------------------
 1 | """Creative Applications of Deep Learning w/ Tensorflow.
 2 | Kadenze, Inc.
 3 | Copyright Parag K. Mital, June 2016.
 4 | """
 5 | import tensorflow.examples.tutorials.mnist.input_data as input_data
 6 | from .dataset_utils import *
 7 | 
 8 | 
 9 | def MNIST(one_hot=True, split=[1.0, 0.0, 0.0]):
10 |     """Returns the MNIST dataset.
11 | 
12 |     Returns
13 |     -------
14 |     mnist : DataSet
15 |         DataSet object w/ convenienve props for accessing
16 |         train/validation/test sets and batches.
17 |     """
18 |     ds = input_data.read_data_sets('MNIST_data/', one_hot=one_hot)
19 |     return Dataset(np.r_[ds.train.images,
20 |                          ds.validation.images,
21 |                          ds.test.images],
22 |                    np.r_[ds.train.labels,
23 |                          ds.validation.labels,
24 |                          ds.test.labels],
25 |                    split=split)
26 | 
27 | 
28 | def CIFAR10(flatten=True, split=[1.0, 0.0, 0.0]):
29 |     """Returns the CIFAR10 dataset.
30 | 
31 |     Parameters
32 |     ----------
33 |     flatten : bool, optional
34 |         Convert the 3 x 32 x 32 pixels to a single vector
35 | 
36 |     Returns
37 |     -------
38 |     cifar : Dataset
39 |         Description
40 |     """
41 |     # plt.imshow(np.transpose(np.reshape(
42 |     #   cifar.train.images[10], (3, 32, 32)), [1, 2, 0]))
43 |     Xs, ys = cifar10_load()
44 |     if flatten:
45 |         Xs = Xs.reshape((Xs.shape[0], -1))
46 |     return Dataset(Xs, ys, split=split)
47 | 
48 | 
49 | def CELEB(path='./img_align_celeba/'):
50 |     """Attempt to load the files of the CELEB dataset.
51 | 
52 |     Requires the files already be downloaded and placed in the `dst` directory.
53 | 
54 |     http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
55 | 
56 |     Parameters
57 |     ----------
58 |     path : str, optional
59 |         Directory where the aligned/cropped celeb dataset can be found.
60 | 
61 |     Returns
62 |     -------
63 |     files : list
64 |         List of file paths to the dataset.
65 |     """
66 |     if not os.path.exists(path):
67 |         print('Could not find celeb dataset under {}.'.format(path))
68 |         print('Try downloading the dataset from the "Aligned and Cropped" ' +
69 |               'link located here (imgs/img_align_celeba.zip [1.34 GB]): ' +
70 |               'http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html')
71 |         return None
72 |     else:
73 |         fs = [os.path.join(path, f)
74 |               for f in os.listdir(path) if f.endswith('.jpg')]
75 |         if len(fs) < 202598:
76 |             print('It does not look like you have downloaded the entire ' +
77 |                   'Celeb Dataset.\n' +
78 |                   'Try downloading the dataset from the "Aligned and Cropped" ' +
79 |                   'link located here (imgs/img_align_celeba.zip [1.34 GB]): ' +
80 |                   'http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html')
81 |         return fs
82 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/deepdream.py:
--------------------------------------------------------------------------------
  1 | """Deep Dream using the Inception v5 network.
  2 | 
  3 | Creative Applications of Deep Learning w/ Tensorflow.
  4 | Kadenze, Inc.
  5 | Copyright Parag K. Mital, June 2016.
  6 | """
  7 | import os
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | from scipy.ndimage.filters import gaussian_filter
 11 | from skimage.transform import resize
 12 | from scipy.misc import imsave
 13 | from . import inception, vgg16, i2v
 14 | from . import gif
 15 | 
 16 | 
 17 | def get_labels(model='inception'):
 18 |     """Return labels corresponding to the `neuron_i` parameter of deep dream.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     model : str, optional
 23 |         Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v',
 24 |         'vgg16', or 'vgg_face'.
 25 | 
 26 |     Raises
 27 |     ------
 28 |     ValueError
 29 |         Unknown model.  Must be one of: ['inception'], 'i2v_tag', 'i2v',
 30 |         'vgg16', or 'vgg_face'.
 31 |     """
 32 |     if model == 'inception':
 33 |         net = inception.get_inception_model()
 34 |         return net['labels']
 35 |     elif model == 'i2v_tag':
 36 |         net = i2v.get_i2v_tag_model()
 37 |         return net['labels']
 38 |     elif model == 'vgg16':
 39 |         net = vgg16.get_vgg_model()
 40 |         return net['labels']
 41 |     elif model == 'vgg_face':
 42 |         net = vgg16.get_vgg_face_model()
 43 |         return net['labels']
 44 |     else:
 45 |         raise ValueError("Unknown model or this model does not have labels!")
 46 | 
 47 | 
 48 | def get_layer_names(model='inception'):
 49 |     """Retun every layer's index and name in the given model.
 50 | 
 51 |     Parameters
 52 |     ----------
 53 |     model : str, optional
 54 |         Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v',
 55 |         'vgg16', or 'vgg_face'.
 56 | 
 57 |     Returns
 58 |     -------
 59 |     names : list of tuples
 60 |         The index and layer's name for every layer in the given model.
 61 |     """
 62 |     g = tf.Graph()
 63 |     with tf.Session(graph=g):
 64 |         if model == 'inception':
 65 |             net = inception.get_inception_model()
 66 |         elif model == 'vgg_face':
 67 |             net = vgg16.get_vgg_face_model()
 68 |         elif model == 'vgg16':
 69 |             net = vgg16.get_vgg_model()
 70 |         elif model == 'i2v':
 71 |             net = i2v.get_i2v_model()
 72 |         elif model == 'i2v-tag':
 73 |             net = i2v.get_i2v_tag_model()
 74 | 
 75 |         tf.import_graph_def(net['graph_def'], name='net')
 76 |         names = [(i, op.name) for i, op in enumerate(g.get_operations())]
 77 |         return names
 78 | 
 79 | 
 80 | def _setup(input_img, model, downsize):
 81 |     """Internal use only. Load the given model's graph and preprocess an image.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     input_img : np.ndarray
 86 |         Image to process with the model's normalizaiton process.
 87 |     model : str
 88 |         Which model to load. Must be one of: ['inception'], 'i2v_tag', 'i2v',
 89 |         'vgg16', or 'vgg_face'.
 90 |     downsize : bool
 91 |         Optionally crop/resize the input image to the standard shape.  Only
 92 |         applies to inception network which is all convolutional.
 93 | 
 94 |     Returns
 95 |     -------
 96 |     net, img, preprocess, deprocess : dict, np.ndarray, function, function
 97 |         net : The networks graph_def and labels
 98 |         img : The preprocessed input image
 99 |         preprocess: Function for preprocessing an image
100 |         deprocess: Function for deprocessing an image
101 | 
102 |     Raises
103 |     ------
104 |     ValueError
105 |         If model is unknown.
106 |     """
107 |     if model == 'inception':
108 |         net = inception.get_inception_model()
109 |         img = inception.preprocess(input_img, resize=downsize, crop=downsize)[np.newaxis]
110 |         deprocess, preprocess = inception.deprocess, inception.preprocess
111 |     elif model == 'vgg_face':
112 |         net = vgg16.get_vgg_face_model()
113 |         img = vgg16.preprocess(input_img)[np.newaxis]
114 |         deprocess, preprocess = vgg16.deprocess, vgg16.preprocess
115 |     elif model == 'vgg16':
116 |         net = vgg16.get_vgg_model()
117 |         img = vgg16.preprocess(input_img)[np.newaxis]
118 |         deprocess, preprocess = vgg16.deprocess, vgg16.preprocess
119 |     elif model == 'i2v':
120 |         net = i2v.get_i2v_model()
121 |         img = i2v.preprocess(input_img)[np.newaxis]
122 |         deprocess, preprocess = i2v.deprocess, i2v.preprocess
123 |     elif model == 'i2v_tag':
124 |         net = i2v.get_i2v_tag_model()
125 |         img = i2v.preprocess(input_img)[np.newaxis]
126 |         deprocess, preprocess = i2v.deprocess, i2v.preprocess
127 |     else:
128 |         raise ValueError(
129 |             "Unknown model name!  Supported: " +
130 |             "['inception', 'vgg_face', 'vgg16', 'i2v', 'i2v_tag']")
131 | 
132 |     return net, img, preprocess, deprocess
133 | 
134 | 
135 | def _apply(img,
136 |            gradient,
137 |            it_i,
138 |            decay=0.998,
139 |            sigma=1.5,
140 |            blur_step=10,
141 |            step=1.0,
142 |            crop=0,
143 |            crop_step=1,
144 |            pth=0):
145 |     """Interal use only. Apply the gradient to an image with the given params.
146 | 
147 |     Parameters
148 |     ----------
149 |     img : np.ndarray
150 |         Tensor to apply gradient ascent to.
151 |     gradient : np.ndarray
152 |         Gradient to ascend to.
153 |     it_i : int
154 |         Current iteration (used for step modulos)
155 |     decay : float, optional
156 |         Amount to decay.
157 |     sigma : float, optional
158 |         Sigma for Gaussian Kernel.
159 |     blur_step : int, optional
160 |         How often to blur.
161 |     step : float, optional
162 |         Step for gradient ascent.
163 |     crop : int, optional
164 |         Amount to crop from each border.
165 |     crop_step : int, optional
166 |         How often to crop.
167 |     pth : int, optional
168 |         Percentile to mask out.
169 | 
170 |     Returns
171 |     -------
172 |     img : np.ndarray
173 |         Ascended image.
174 |     """
175 |     gradient /= (np.std(gradient) + 1e-10)
176 |     img += gradient * step
177 |     img *= decay
178 | 
179 |     if pth:
180 |         mask = (np.abs(img) < np.percentile(np.abs(img), pth))
181 |         img = img - img * mask
182 | 
183 |     if blur_step and it_i % blur_step == 0:
184 |         for ch_i in range(3):
185 |             img[..., ch_i] = gaussian_filter(img[..., ch_i], sigma)
186 | 
187 |     if crop and it_i % crop_step == 0:
188 |         height, width, *ch = img[0].shape
189 | 
190 |         # Crop a 1 pixel border from height and width
191 |         img = img[:, crop:-crop, crop:-crop, :]
192 | 
193 |         # Resize
194 |         img = resize(img[0], (height, width), order=3,
195 |                      clip=False, preserve_range=True
196 |                      )[np.newaxis].astype(np.float32)
197 | 
198 | 
199 | def deep_dream(input_img,
200 |                downsize=False,
201 |                model='inception',
202 |                layer_i=-1,
203 |                neuron_i=-1,
204 |                n_iterations=100,
205 |                save_gif=None,
206 |                save_images='imgs',
207 |                device='/cpu:0',
208 |                **kwargs):
209 |     """Deep Dream with the given parameters.
210 | 
211 |     Parameters
212 |     ----------
213 |     input_img : np.ndarray
214 |         Image to apply deep dream to.  Should be 3-dimenionsal H x W x C
215 |         RGB uint8 or float32.
216 |     downsize : bool, optional
217 |         Whether or not to downsize the image.  Only applies to
218 |         model=='inception'.
219 |     model : str, optional
220 |         Which model to load.  Must be one of: ['inception'], 'i2v_tag', 'i2v',
221 |         'vgg16', or 'vgg_face'.
222 |     layer_i : int, optional
223 |         Which layer to use for finding the gradient.  E.g. the softmax layer
224 |         for inception is -1, for vgg networks it is -2.  Use the function
225 |         "get_layer_names" to find the layer number that you need.
226 |     neuron_i : int, optional
227 |         Which neuron to use.  -1 for the entire layer.
228 |     n_iterations : int, optional
229 |         Number of iterations to dream.
230 |     save_gif : bool, optional
231 |         Save a GIF.
232 |     save_images : str, optional
233 |         Folder to save images to.
234 |     device : str, optional
235 |         Which device to use, e.g. ['/cpu:0'] or '/gpu:0'.
236 |     **kwargs : dict
237 |         See "_apply" for additional parameters.
238 | 
239 |     Returns
240 |     -------
241 |     imgs : list of np.array
242 |         Images of every iteration
243 |     """
244 |     net, img, preprocess, deprocess = _setup(input_img, model, downsize)
245 |     batch, height, width, *ch = img.shape
246 | 
247 |     g = tf.Graph()
248 |     with tf.Session(graph=g) as sess, g.device(device):
249 | 
250 |         tf.import_graph_def(net['graph_def'], name='net')
251 |         names = [op.name for op in g.get_operations()]
252 |         input_name = names[0] + ':0'
253 |         x = g.get_tensor_by_name(input_name)
254 | 
255 |         layer = g.get_tensor_by_name(names[layer_i] + ':0')
256 |         layer_shape = sess.run(tf.shape(layer), feed_dict={x: img})
257 |         layer_vec = np.ones(layer_shape) / layer_shape[-1]
258 |         layer_vec[..., neuron_i] = 1.0 - (1.0 / layer_shape[-1])
259 | 
260 |         ascent = tf.gradients(layer, x)
261 | 
262 |         imgs = []
263 |         for it_i in range(n_iterations):
264 |             print(it_i, np.min(img), np.max(img))
265 |             if neuron_i == -1:
266 |                 this_res = sess.run(
267 |                     ascent, feed_dict={x: img})[0]
268 |             else:
269 |                 this_res = sess.run(
270 |                     ascent, feed_dict={x: img, layer: layer_vec})[0]
271 | 
272 |             _apply(img, this_res, it_i, **kwargs)
273 |             imgs.append(deprocess(img[0]))
274 | 
275 |             if save_images is not None:
276 |                 imsave(os.path.join(save_images,
277 |                                     'frame{}.png'.format(it_i)), imgs[-1])
278 | 
279 |         if save_gif is not None:
280 |             gif.build_gif(imgs, saveto=save_gif)
281 | 
282 |     return imgs
283 | 
284 | 
285 | def guided_dream(input_img,
286 |                  guide_img=None,
287 |                  downsize=False,
288 |                  layers=[162, 183, 184, 247],
289 |                  label_i=962,
290 |                  layer_i=-1,
291 |                  feature_loss_weight=1.0,
292 |                  tv_loss_weight=1.0,
293 |                  l2_loss_weight=1.0,
294 |                  softmax_loss_weight=1.0,
295 |                  model='inception',
296 |                  neuron_i=920,
297 |                  n_iterations=100,
298 |                  save_gif=None,
299 |                  save_images='imgs',
300 |                  device='/cpu:0',
301 |                  **kwargs):
302 |     """Deep Dream v2.  Use an optional guide image and other techniques.
303 | 
304 |     Parameters
305 |     ----------
306 |     input_img : np.ndarray
307 |         Image to apply deep dream to.  Should be 3-dimenionsal H x W x C
308 |         RGB uint8 or float32.
309 |     guide_img : np.ndarray, optional
310 |         Optional image to find features at different layers for.  Must pass in
311 |         a list of layers that you want to find features for.  Then the guided
312 |         dream will try to match this images features at those layers.
313 |     downsize : bool, optional
314 |         Whether or not to downsize the image.  Only applies to
315 |         model=='inception'.
316 |     layers : list, optional
317 |         A list of layers to find features for in the "guide_img".
318 |     label_i : int, optional
319 |         Which label to use for the softmax layer.  Use the "get_labels" function
320 |         to find the index corresponding the object of interest.  If None, not
321 |         used.
322 |     layer_i : int, optional
323 |         Which layer to use for finding the gradient.  E.g. the softmax layer
324 |         for inception is -1, for vgg networks it is -2.  Use the function
325 |         "get_layer_names" to find the layer number that you need.
326 |     feature_loss_weight : float, optional
327 |         Weighting for the feature loss from the guide_img.
328 |     tv_loss_weight : float, optional
329 |         Total variational loss weighting.  Enforces smoothness.
330 |     l2_loss_weight : float, optional
331 |         L2 loss weighting.  Enforces smaller values and reduces saturation.
332 |     softmax_loss_weight : float, optional
333 |         Softmax loss weighting.  Must set label_i.
334 |     model : str, optional
335 |         Which model to load.  Must be one of: ['inception'], 'i2v_tag', 'i2v',
336 |         'vgg16', or 'vgg_face'.
337 |     neuron_i : int, optional
338 |         Which neuron to use.  -1 for the entire layer.
339 |     n_iterations : int, optional
340 |         Number of iterations to dream.
341 |     save_gif : bool, optional
342 |         Save a GIF.
343 |     save_images : str, optional
344 |         Folder to save images to.
345 |     device : str, optional
346 |         Which device to use, e.g. ['/cpu:0'] or '/gpu:0'.
347 |     **kwargs : dict
348 |         See "_apply" for additional parameters.
349 | 
350 |     Returns
351 |     -------
352 |     imgs : list of np.ndarray
353 |         Images of the dream.
354 |     """
355 |     net, img, preprocess, deprocess = _setup(input_img, model, downsize)
356 |     print(img.shape, input_img.shape)
357 |     print(img.min(), img.max())
358 | 
359 |     if guide_img is not None:
360 |         guide_img = preprocess(guide_img.copy(), model)[np.newaxis]
361 |         assert(guide_img.shape == img.shape)
362 |     batch, height, width, *ch = img.shape
363 | 
364 |     g = tf.Graph()
365 |     with tf.Session(graph=g) as sess, g.device(device):
366 |         tf.import_graph_def(net['graph_def'], name='net')
367 |         names = [op.name for op in g.get_operations()]
368 |         input_name = names[0] + ':0'
369 |         x = g.get_tensor_by_name(input_name)
370 | 
371 |         features = [names[layer_i] + ':0' for layer_i in layers]
372 |         feature_loss = tf.Variable(0.0)
373 |         for feature_i in features:
374 |             layer = g.get_tensor_by_name(feature_i)
375 |             if guide_img is None:
376 |                 feature_loss += tf.reduce_mean(layer)
377 |             else:
378 |                 # Reshape it to 2D vector
379 |                 layer = tf.reshape(layer, [-1, 1])
380 |                 # Do the same for our guide image
381 |                 guide_layer = sess.run(layer, feed_dict={x: guide_img})
382 |                 guide_layer = guide_layer.reshape(-1, 1)
383 |                 # Now calculate their dot product
384 |                 correlation = tf.matmul(guide_layer.T, layer)
385 |                 feature_loss += feature_loss_weight * tf.reduce_mean(correlation)
386 |         softmax_loss = tf.Variable(0.0)
387 |         if label_i is not None:
388 |             layer = g.get_tensor_by_name(names[layer_i] + ':0')
389 |             layer_shape = sess.run(tf.shape(layer), feed_dict={x: img})
390 |             layer_vec = np.ones(layer_shape) / layer_shape[-1]
391 |             layer_vec[..., neuron_i] = 1.0 - 1.0 / layer_shape[1]
392 |             softmax_loss += softmax_loss_weight * tf.reduce_mean(tf.nn.l2_loss(layer - layer_vec))
393 | 
394 |         dx = tf.square(x[:, :height - 1, :width - 1, :] - x[:, :height - 1, 1:, :])
395 |         dy = tf.square(x[:, :height - 1, :width - 1, :] - x[:, 1:, :width - 1, :])
396 |         tv_loss = tv_loss_weight * tf.reduce_mean(tf.pow(dx + dy, 1.2))
397 |         l2_loss = l2_loss_weight * tf.reduce_mean(tf.nn.l2_loss(x))
398 | 
399 |         ascent = tf.gradients(feature_loss + softmax_loss + tv_loss + l2_loss, x)[0]
400 |         sess.run(tf.initialize_all_variables())
401 |         imgs = []
402 |         for it_i in range(n_iterations):
403 |             this_res, this_feature_loss, this_softmax_loss, this_tv_loss, this_l2_loss = sess.run(
404 |                 [ascent, feature_loss, softmax_loss, tv_loss, l2_loss], feed_dict={x: img})
405 |             print('feature:', this_feature_loss,
406 |                   'softmax:', this_softmax_loss,
407 |                   'tv', this_tv_loss,
408 |                   'l2', this_l2_loss)
409 | 
410 |             _apply(img, -this_res, it_i, **kwargs)
411 |             imgs.append(deprocess(img[0]))
412 | 
413 |             if save_images is not None:
414 |                 imsave(os.path.join(save_images,
415 |                                     'frame{}.png'.format(it_i)), imgs[-1])
416 | 
417 |         if save_gif is not None:
418 |             gif.build_gif(imgs, saveto=save_gif)
419 | 
420 |     return imgs
421 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/dft.py:
--------------------------------------------------------------------------------
 1 | """Summary.
 2 | 
 3 | #CADL
 4 | Copyright Parag K. Mital 2016
 5 | """
 6 | import numpy as np
 7 | from scipy.signal import hann
 8 | 
 9 | 
10 | def ztoc(re, im):
11 |     return np.sqrt(re**2 + im**2), np.angle(re + im * 1j)
12 | 
13 | 
14 | def ctoz(mag, phs):
15 |     return mag * np.cos(phs), mag * np.sin(phs)
16 | 
17 | 
18 | def dft_np(signal, hop_size=256, fft_size=512):
19 |     n_hops = len(signal) // hop_size
20 |     s = []
21 |     hann_win = hann(fft_size)
22 |     for hop_i in range(n_hops):
23 |         frame = signal[(hop_i * hop_size):(hop_i * hop_size + fft_size)]
24 |         frame = np.pad(frame, (0, fft_size - len(frame)), 'constant')
25 |         frame *= hann_win
26 |         s.append(frame)
27 |     s = np.array(s)
28 |     N = s.shape[-1]
29 |     k = np.reshape(np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [1, N // 2])
30 |     x = np.reshape(np.linspace(0.0, N - 1, N), [N, 1])
31 |     freqs = np.dot(x, k)
32 |     reals = np.dot(s, np.cos(freqs)) * (2.0 / N)
33 |     imags = np.dot(s, np.sin(freqs)) * (2.0 / N)
34 |     return reals, imags
35 | 
36 | 
37 | def idft_np(re, im, hop_size=256, fft_size=512):
38 |     N = re.shape[1] * 2
39 |     k = np.reshape(np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [N // 2, 1])
40 |     x = np.reshape(np.linspace(0.0, N - 1, N), [1, N])
41 |     freqs = np.dot(k, x)
42 |     signal = np.zeros((re.shape[0] * hop_size + fft_size,))
43 |     recon = np.dot(re, np.cos(freqs)) + np.dot(im, np.sin(freqs))
44 |     for hop_i, frame in enumerate(recon):
45 |         signal[(hop_i * hop_size): (hop_i * hop_size + fft_size)] += frame
46 |     return signal
47 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/gan.py:
--------------------------------------------------------------------------------
  1 | """Generative Adversarial Network.
  2 | 
  3 | Creative Applications of Deep Learning w/ Tensorflow.
  4 | Kadenze, Inc.
  5 | Copyright Parag K. Mital, June 2016.
  6 | """
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | import os
 11 | import libs.batch_norm as bn
 12 | from libs.utils import *
 13 | 
 14 | 
 15 | def encoder(x, phase_train, dimensions=[], filter_sizes=[],
 16 |             convolutional=False, activation=tf.nn.relu,
 17 |             output_activation=tf.nn.sigmoid, reuse=False):
 18 |     """Encoder network codes input `x` to layers defined by dimensions.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     x : tf.Tensor
 23 |         Input to the encoder network, e.g. tf.Placeholder or tf.Variable
 24 |     phase_train : tf.Placeholder
 25 |         Placeholder defining whether the network is in train mode or not.
 26 |         Used for changing the behavior of batch normalization which updates
 27 |         its statistics during train mode.
 28 |     dimensions : list, optional
 29 |         List of the number of neurons in each layer (convolutional=False) -or-
 30 |         List of the number of filters in each layer (convolutional=True), e.g.
 31 |         [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer.
 32 |     filter_sizes : list, optional
 33 |         List of the size of the kernel in each layer, e.g.:
 34 |         [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer.
 35 |     convolutional : bool, optional
 36 |         Whether or not to use convolutional layers.
 37 |     activation : fn, optional
 38 |         Function for applying an activation, e.g. tf.nn.relu
 39 |     output_activation : fn, optional
 40 |         Function for applying an activation on the last layer, e.g. tf.nn.relu
 41 |     reuse : bool, optional
 42 |         For each layer's variable scope, whether to reuse existing variables.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     h : tf.Tensor
 47 |         Output tensor of the encoder
 48 |     """
 49 |     # %%
 50 |     # ensure 2-d is converted to square tensor.
 51 |     if convolutional:
 52 |         x_tensor = to_tensor(x)
 53 |     else:
 54 |         x_tensor = tf.reshape(
 55 |             tensor=x,
 56 |             shape=[-1, dimensions[0]])
 57 |         dimensions = dimensions[1:]
 58 |     current_input = x_tensor
 59 | 
 60 |     for layer_i, n_output in enumerate(dimensions):
 61 |         with tf.variable_scope(str(layer_i), reuse=reuse):
 62 |             if convolutional:
 63 |                 h, W = conv2d(
 64 |                     x=current_input,
 65 |                     n_output=n_output,
 66 |                     k_h=filter_sizes[layer_i],
 67 |                     k_w=filter_sizes[layer_i],
 68 |                     padding='SAME',
 69 |                     reuse=reuse)
 70 |             else:
 71 |                 h, W = linear(
 72 |                     x=current_input,
 73 |                     n_output=n_output,
 74 |                     reuse=reuse)
 75 |             norm = bn.batch_norm(
 76 |                 x=h,
 77 |                 phase_train=phase_train,
 78 |                 name='bn',
 79 |                 reuse=reuse)
 80 |             output = activation(norm)
 81 | 
 82 |         current_input = output
 83 | 
 84 |     flattened = flatten(current_input, name='flatten', reuse=reuse)
 85 | 
 86 |     if output_activation is None:
 87 |         return flattened
 88 |     else:
 89 |         return output_activation(flattened)
 90 | 
 91 | 
 92 | def decoder(z,
 93 |             phase_train,
 94 |             dimensions=[],
 95 |             channels=[],
 96 |             filter_sizes=[],
 97 |             convolutional=False,
 98 |             activation=tf.nn.relu,
 99 |             output_activation=tf.nn.tanh,
100 |             reuse=None):
101 |     """Decoder network codes input `x` to layers defined by dimensions.
102 | 
103 |     In contrast with `encoder`, this requires information on the number of
104 |     output channels in each layer for convolution.  Otherwise, it is mostly
105 |     the same.
106 | 
107 |     Parameters
108 |     ----------
109 |     z : tf.Tensor
110 |         Input to the decoder network, e.g. tf.Placeholder or tf.Variable
111 |     phase_train : tf.Placeholder
112 |         Placeholder defining whether the network is in train mode or not.
113 |         Used for changing the behavior of batch normalization which updates
114 |         its statistics during train mode.
115 |     dimensions : list, optional
116 |         List of the number of neurons in each layer (convolutional=False) -or-
117 |         List of the number of filters in each layer (convolutional=True), e.g.
118 |         [100, 100, 100, 100] for a 4-layer deep network with 100 in each layer.
119 |     channels : list, optional
120 |         For decoding when convolutional=True, require the number of output
121 |         channels in each layer.
122 |     filter_sizes : list, optional
123 |         List of the size of the kernel in each layer, e.g.:
124 |         [3, 3, 3, 3] is a 4-layer deep network w/ 3 x 3 kernels in every layer.
125 |     convolutional : bool, optional
126 |         Whether or not to use convolutional layers.
127 |     activation : fn, optional
128 |         Function for applying an activation, e.g. tf.nn.relu
129 |     output_activation : fn, optional
130 |         Function for applying an activation on the last layer, e.g. tf.nn.relu
131 |     reuse : bool, optional
132 |         For each layer's variable scope, whether to reuse existing variables.
133 | 
134 |     Returns
135 |     -------
136 |     h : tf.Tensor
137 |         Output tensor of the decoder
138 |     """
139 | 
140 |     if convolutional:
141 |         with tf.variable_scope('fc', reuse=reuse):
142 |             z1, W = linear(
143 |                 x=z,
144 |                 n_output=channels[0] * dimensions[0][0] * dimensions[0][1],
145 |                 reuse=reuse)
146 |             rsz = tf.reshape(
147 |                 z1, [-1, dimensions[0][0], dimensions[0][1], channels[0]])
148 |             current_input = activation(
149 |                 features=bn.batch_norm(
150 |                     name='bn',
151 |                     x=rsz,
152 |                     phase_train=phase_train,
153 |                     reuse=reuse))
154 | 
155 |         dimensions = dimensions[1:]
156 |         channels = channels[1:]
157 |         filter_sizes = filter_sizes[1:]
158 |     else:
159 |         current_input = z
160 | 
161 |     for layer_i, n_output in enumerate(dimensions):
162 |         with tf.variable_scope(str(layer_i), reuse=reuse):
163 | 
164 |             if convolutional:
165 |                 h, W = deconv2d(
166 |                     x=current_input,
167 |                     n_output_h=n_output[0],
168 |                     n_output_w=n_output[1],
169 |                     n_output_ch=channels[layer_i],
170 |                     k_h=filter_sizes[layer_i],
171 |                     k_w=filter_sizes[layer_i],
172 |                     padding='SAME',
173 |                     reuse=reuse)
174 |             else:
175 |                 h, W = linear(
176 |                     x=current_input,
177 |                     n_output=n_output,
178 |                     reuse=reuse)
179 | 
180 |             if layer_i < len(dimensions) - 1:
181 |                 norm = bn.batch_norm(
182 |                     x=h,
183 |                     phase_train=phase_train,
184 |                     name='bn', reuse=reuse)
185 |                 output = activation(norm)
186 |             else:
187 |                 output = h
188 |         current_input = output
189 | 
190 |     if output_activation is None:
191 |         return current_input
192 |     else:
193 |         return output_activation(current_input)
194 | 
195 | 
196 | def generator(z, phase_train, output_h, output_w, convolutional=True,
197 |               n_features=32, rgb=False, reuse=None):
198 |     """Simple interface to build a decoder network given the input parameters.
199 |     
200 |     Parameters
201 |     ----------
202 |     z : tf.Tensor
203 |         Input to the generator, i.e. tf.Placeholder of tf.Variable
204 |     phase_train : tf.Placeholder of type bool
205 |         Whether or not the network should be trained (used for Batch Norm).
206 |     output_h : int
207 |         Final generated height
208 |     output_w : int
209 |         Final generated width
210 |     convolutional : bool, optional
211 |         Whether or not to build a convolutional generative network.
212 |     n_features : int, optional
213 |         Number of channels to use in the last hidden layer.
214 |     rgb : bool, optional
215 |         Whether or not the final generated image is RGB or not.
216 |     reuse : None, optional
217 |         Whether or not to reuse the variables if they are already created.
218 |     
219 |     Returns
220 |     -------
221 |     x_tilde : tf.Tensor
222 |         Output of the generator network.
223 |     """
224 |     n_channels = 3 if rgb else 1
225 |     with tf.variable_scope('generator', reuse=reuse):
226 |         return decoder(z=z,
227 |                        phase_train=phase_train,
228 |                        convolutional=convolutional,
229 |                        filter_sizes=[5, 5, 5, 5, 5],
230 |                        channels=[n_features * 8, n_features * 4,
231 |                                  n_features * 2, n_features, n_channels],
232 |                        dimensions=[
233 |                            [output_h // 16, output_w // 16],
234 |                            [output_h // 8, output_w // 8],
235 |                            [output_h // 4, output_w // 4],
236 |                            [output_h // 2, output_w // 2],
237 |                            [output_h, output_w]]
238 |                        if convolutional else [384, 512, n_features],
239 |                        activation=tf.nn.relu6,
240 |                        output_activation=tf.nn.tanh,
241 |                        reuse=reuse)
242 | 
243 | 
244 | def discriminator(x, phase_train, convolutional=True,
245 |                   n_features=32, rgb=False, reuse=False):
246 |     """Summary
247 |     
248 |     Parameters
249 |     ----------
250 |     x : TYPE
251 |         Description
252 |     phase_train : TYPE
253 |         Description
254 |     convolutional : bool, optional
255 |         Description
256 |     n_features : int, optional
257 |         Description
258 |     rgb : bool, optional
259 |         Description
260 |     reuse : bool, optional
261 |         Description
262 |     
263 |     Returns
264 |     -------
265 |     name : TYPE
266 |         Description
267 |     """
268 |     n_channels = 3 if rgb else 1
269 |     with tf.variable_scope('discriminator', reuse=reuse):
270 |         return encoder(x=x,
271 |                        phase_train=phase_train,
272 |                        convolutional=convolutional,
273 |                        filter_sizes=[5, 5, 5, 5],
274 |                        dimensions=[n_features, n_features * 2,
275 |                                    n_features * 4, n_features * 8]
276 |                        if convolutional
277 |                        else [n_features, 128, 256],
278 |                        activation=tf.nn.relu6,
279 |                        output_activation=None,
280 |                        reuse=reuse)
281 | 
282 | 
283 | def GAN(input_shape, n_latent, n_features, rgb, debug=True):
284 |     """Summary
285 |     
286 |     Parameters
287 |     ----------
288 |     input_shape : TYPE
289 |         Description
290 |     n_latent : TYPE
291 |         Description
292 |     n_features : TYPE
293 |         Description
294 |     rgb : TYPE
295 |         Description
296 |     debug : bool, optional
297 |         Description
298 |     
299 |     Returns
300 |     -------
301 |     name : TYPE
302 |         Description
303 |     """
304 |     # Real input samples
305 |     # n_features is either the image dimension or flattened number of features
306 |     x = tf.placeholder(tf.float32, input_shape, 'x')
307 |     x = (x / 127.5) - 1.0
308 |     sum_x = tf.image_summary("x", x)
309 |     phase_train = tf.placeholder(tf.bool, name='phase_train')
310 | 
311 |     # Discriminator for real input samples
312 |     D_real_logits = discriminator(
313 |         x, phase_train, n_features=n_features, rgb=rgb)
314 |     D_real = tf.nn.sigmoid(D_real_logits)
315 |     sum_D_real = tf.histogram_summary("D_real", D_real)
316 | 
317 |     # Generator tries to recreate input samples using latent feature vector
318 |     z = tf.placeholder(tf.float32, [None, n_latent], 'z')
319 |     sum_z = tf.histogram_summary("z", z)
320 |     G = generator(
321 |         z, phase_train,
322 |         output_h=input_shape[1], output_w=input_shape[2],
323 |         n_features=n_features, rgb=rgb)
324 |     sum_G = tf.image_summary("G", G)
325 | 
326 |     # Discriminator for generated samples
327 |     D_fake_logits = discriminator(
328 |         G, phase_train, n_features=n_features, rgb=rgb, reuse=True)
329 |     D_fake = tf.nn.sigmoid(D_fake_logits)
330 |     sum_D_fake = tf.histogram_summary("D_fake", D_fake)
331 | 
332 |     with tf.variable_scope('loss'):
333 |         # Loss functions
334 |         loss_D_real = binary_cross_entropy(
335 |             D_real, tf.ones_like(D_real), name='loss_D_real')
336 |         loss_D_fake = binary_cross_entropy(
337 |             D_fake, tf.zeros_like(D_fake), name='loss_D_fake')
338 |         loss_D = tf.reduce_mean((loss_D_real + loss_D_fake) / 2)
339 |         loss_G = tf.reduce_mean(binary_cross_entropy(
340 |             D_fake, tf.ones_like(D_fake), name='loss_G'))
341 | 
342 |         # Summaries
343 |         sum_loss_D_real = tf.histogram_summary("loss_D_real", loss_D_real)
344 |         sum_loss_D_fake = tf.histogram_summary("loss_D_fake", loss_D_fake)
345 |         sum_loss_D = tf.scalar_summary("loss_D", loss_D)
346 |         sum_loss_G = tf.scalar_summary("loss_G", loss_G)
347 |         sum_D_real = tf.histogram_summary("D_real", D_real)
348 |         sum_D_fake = tf.histogram_summary("D_fake", D_fake)
349 | 
350 |     return {
351 |         'loss_D': loss_D,
352 |         'loss_G': loss_G,
353 |         'x': x,
354 |         'G': G,
355 |         'z': z,
356 |         'train': phase_train,
357 |         'sums': {
358 |             'G': sum_G,
359 |             'D_real': sum_D_real,
360 |             'D_fake': sum_D_fake,
361 |             'loss_G': sum_loss_G,
362 |             'loss_D': sum_loss_D,
363 |             'loss_D_real': sum_loss_D_real,
364 |             'loss_D_fake': sum_loss_D_fake,
365 |             'z': sum_z,
366 |             'x': sum_x
367 |         }
368 |     }
369 | 
370 | 
371 | def train_ds():
372 |     """Summary
373 |     
374 |     Returns
375 |     -------
376 |     name : TYPE
377 |         Description
378 |     """
379 |     init_lr_g = 1e-4
380 |     init_lr_d = 1e-4
381 |     n_latent = 100
382 |     n_epochs = 1000000
383 |     batch_size = 200
384 |     n_samples = 15
385 |     input_shape = [218, 178, 3]
386 |     crop_shape = [64, 64, 3]
387 |     crop_factor = 0.8
388 | 
389 |     from libs.dataset_utils import create_input_pipeline
390 |     from libs.datasets import CELEB
391 | 
392 |     files = CELEB()
393 |     batch = create_input_pipeline(
394 |         files=files,
395 |         batch_size=batch_size,
396 |         n_epochs=n_epochs,
397 |         crop_shape=crop_shape,
398 |         crop_factor=crop_factor,
399 |         shape=input_shape)
400 | 
401 |     gan = GAN(input_shape=[None] + crop_shape, n_features=10,
402 |               n_latent=n_latent, rgb=True, debug=False)
403 | 
404 |     vars_d = [v for v in tf.trainable_variables()
405 |               if v.name.startswith('discriminator')]
406 |     print('Training discriminator variables:')
407 |     [print(v.name) for v in tf.trainable_variables()
408 |      if v.name.startswith('discriminator')]
409 | 
410 |     vars_g = [v for v in tf.trainable_variables()
411 |               if v.name.startswith('generator')]
412 |     print('Training generator variables:')
413 |     [print(v.name) for v in tf.trainable_variables()
414 |      if v.name.startswith('generator')]
415 |     zs = np.random.uniform(
416 |         -1.0, 1.0, [4, n_latent]).astype(np.float32)
417 |     zs = make_latent_manifold(zs, n_samples)
418 | 
419 |     lr_g = tf.placeholder(tf.float32, shape=[], name='learning_rate_g')
420 |     lr_d = tf.placeholder(tf.float32, shape=[], name='learning_rate_d')
421 | 
422 |     try:
423 |         from tf.contrib.layers import apply_regularization
424 |         d_reg = apply_regularization(
425 |             tf.contrib.layers.l2_regularizer(1e-6), vars_d)
426 |         g_reg = apply_regularization(
427 |             tf.contrib.layers.l2_regularizer(1e-6), vars_g)
428 |     except:
429 |         d_reg, g_reg = 0, 0
430 | 
431 |     opt_g = tf.train.AdamOptimizer(lr_g, name='Adam_g').minimize(
432 |         gan['loss_G'] + g_reg, var_list=vars_g)
433 |     opt_d = tf.train.AdamOptimizer(lr_d, name='Adam_d').minimize(
434 |         gan['loss_D'] + d_reg, var_list=vars_d)
435 | 
436 |     # %%
437 |     # We create a session to use the graph
438 |     sess = tf.Session()
439 |     init_op = tf.initialize_all_variables()
440 | 
441 |     saver = tf.train.Saver()
442 |     sums = gan['sums']
443 |     G_sum_op = tf.merge_summary([
444 |         sums['G'], sums['loss_G'], sums['z'],
445 |         sums['loss_D_fake'], sums['D_fake']])
446 |     D_sum_op = tf.merge_summary([
447 |         sums['loss_D'], sums['loss_D_real'], sums['loss_D_fake'],
448 |         sums['z'], sums['x'], sums['D_real'], sums['D_fake']])
449 |     writer = tf.train.SummaryWriter("./logs", sess.graph_def)
450 | 
451 |     coord = tf.train.Coordinator()
452 |     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
453 |     sess.run(init_op)
454 |     # g = tf.get_default_graph()
455 |     # [print(op.name) for op in g.get_operations()]
456 | 
457 |     if os.path.exists("gan.ckpt"):
458 |         saver.restore(sess, "gan.ckpt")
459 |         print("GAN model restored.")
460 | 
461 |     fig, ax = plt.subplots(1, 1, figsize=(10, 10))
462 |     step_i, t_i = 0, 0
463 |     loss_d = 1
464 |     loss_g = 1
465 |     n_loss_d, total_loss_d = 1, 1
466 |     n_loss_g, total_loss_g = 1, 1
467 |     try:
468 |         while not coord.should_stop():
469 |             batch_xs = sess.run(batch)
470 |             step_i += 1
471 |             batch_zs = np.random.uniform(
472 |                 -1.0, 1.0, [batch_size, n_latent]).astype(np.float32)
473 | 
474 |             this_lr_g = min(1e-2, max(1e-6, init_lr_g * (loss_g / loss_d)**2))
475 |             this_lr_d = min(1e-2, max(1e-6, init_lr_d * (loss_d / loss_g)**2))
476 |             # this_lr_d *= ((1.0 - (step_i / 100000)) ** 2)
477 |             # this_lr_g *= ((1.0 - (step_i / 100000)) ** 2)
478 | 
479 |             # if np.random.random() > (loss_g / (loss_d + loss_g)):
480 |             if step_i % 3 == 1:
481 |                 loss_d, _, sum_d = sess.run([gan['loss_D'], opt_d, D_sum_op],
482 |                                             feed_dict={gan['x']: batch_xs,
483 |                                                        gan['z']: batch_zs,
484 |                                                        gan['train']: True,
485 |                                                        lr_d: this_lr_d})
486 |                 total_loss_d += loss_d
487 |                 n_loss_d += 1
488 |                 writer.add_summary(sum_d, step_i)
489 |                 print('%04d d* = lr: %0.08f, loss: %08.06f, \t' %
490 |                       (step_i, this_lr_d, loss_d) +
491 |                       'g  = lr: %0.08f, loss: %08.06f' % (this_lr_g, loss_g))
492 |             else:
493 |                 loss_g, _, sum_g = sess.run([gan['loss_G'], opt_g, G_sum_op],
494 |                                             feed_dict={gan['z']: batch_zs,
495 |                                                        gan['train']: True,
496 |                                                        lr_g: this_lr_g})
497 |                 total_loss_g += loss_g
498 |                 n_loss_g += 1
499 |                 writer.add_summary(sum_g, step_i)
500 |                 print('%04d d  = lr: %0.08f, loss: %08.06f, \t' %
501 |                       (step_i, this_lr_d, loss_d) +
502 |                       'g* = lr: %0.08f, loss: %08.06f' % (this_lr_g, loss_g))
503 | 
504 |             if step_i % 100 == 0:
505 |                 samples = sess.run(gan['G'], feed_dict={
506 |                     gan['z']: zs,
507 |                     gan['train']: False})
508 |                 montage(np.clip((samples + 1) * 127.5, 0, 255).astype(np.uint8),
509 |                         'imgs/gan_%08d.png' % t_i)
510 |                 t_i += 1
511 | 
512 |                 print('generator loss:', total_loss_g / n_loss_g)
513 |                 print('discriminator loss:', total_loss_d / n_loss_d)
514 | 
515 |                 # Save the variables to disk.
516 |                 save_path = saver.save(sess, "./gan.ckpt",
517 |                                        global_step=step_i,
518 |                                        write_meta_graph=False)
519 |                 print("Model saved in file: %s" % save_path)
520 |     except tf.errors.OutOfRangeError:
521 |         print('Done training -- epoch limit reached')
522 |     finally:
523 |         # One of the threads has issued an exception.  So let's tell all the
524 |         # threads to shutdown.
525 |         coord.request_stop()
526 | 
527 |     # Wait until all threads have finished.
528 |     coord.join(threads)
529 | 
530 |     # Clean up the session.
531 |     sess.close()
532 | 
533 | 
534 | if __name__ == '__main__':
535 |     train_ds()
536 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/gif.py:
--------------------------------------------------------------------------------
 1 | """Utility for creating a GIF.
 2 | 
 3 | Creative Applications of Deep Learning w/ Tensorflow.
 4 | Kadenze, Inc.
 5 | Copyright Parag K. Mital, June 2016.
 6 | """
 7 | import numpy as np
 8 | import matplotlib.pyplot as plt
 9 | import matplotlib.animation as animation
10 | 
11 | 
12 | def build_gif(imgs, interval=0.1, dpi=72,
13 |               save_gif=True, saveto='animation.gif',
14 |               show_gif=False, cmap=None):
15 |     """Take an array or list of images and create a GIF.
16 | 
17 |     Parameters
18 |     ----------
19 |     imgs : np.ndarray or list
20 |         List of images to create a GIF of
21 |     interval : float, optional
22 |         Spacing in seconds between successive images.
23 |     dpi : int, optional
24 |         Dots per inch.
25 |     save_gif : bool, optional
26 |         Whether or not to save the GIF.
27 |     saveto : str, optional
28 |         Filename of GIF to save.
29 |     show_gif : bool, optional
30 |         Whether or not to render the GIF using plt.
31 |     cmap : None, optional
32 |         Optional colormap to apply to the images.
33 | 
34 |     Returns
35 |     -------
36 |     ani : matplotlib.animation.ArtistAnimation
37 |         The artist animation from matplotlib.  Likely not useful.
38 |     """
39 |     imgs = np.asarray(imgs)
40 |     h, w, *c = imgs[0].shape
41 |     fig, ax = plt.subplots(figsize=(np.round(w / dpi), np.round(h / dpi)))
42 |     fig.subplots_adjust(bottom=0)
43 |     fig.subplots_adjust(top=1)
44 |     fig.subplots_adjust(right=1)
45 |     fig.subplots_adjust(left=0)
46 |     ax.set_axis_off()
47 | 
48 |     if cmap is not None:
49 |         axs = list(map(lambda x: [
50 |             ax.imshow(x, cmap=cmap)], imgs))
51 |     else:
52 |         axs = list(map(lambda x: [
53 |             ax.imshow(x)], imgs))
54 | 
55 |     ani = animation.ArtistAnimation(
56 |         fig, axs, interval=interval*1000, repeat_delay=0, blit=True)
57 | 
58 |     if save_gif:
59 |         ani.save(saveto, writer='imagemagick', dpi=dpi)
60 | 
61 |     if show_gif:
62 |         plt.show()
63 | 
64 |     return ani
65 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/i2v.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Creative Applications of Deep Learning w/ Tensorflow.
  3 | Kadenze, Inc.
  4 | Copyright Parag K. Mital, June 2016.
  5 | """
  6 | import json
  7 | import numpy as np
  8 | from tensorflow.python.platform import gfile
  9 | import tensorflow as tf
 10 | import matplotlib.pyplot as plt
 11 | from skimage.transform import resize as imresize
 12 | from .utils import download
 13 | 
 14 | 
 15 | def i2v_download():
 16 |     """Download a pretrained i2v network."""
 17 |     model = download('https://s3.amazonaws.com/cadl/models/illust2vec.tfmodel')
 18 |     return model
 19 | 
 20 | 
 21 | def i2v_tag_download():
 22 |     """Download a pretrained i2v network."""
 23 |     model = download('https://s3.amazonaws.com/cadl/models/illust2vec_tag.tfmodel')
 24 |     tags = download('https://s3.amazonaws.com/cadl/models/tag_list.json')
 25 |     return model, tags
 26 | 
 27 | 
 28 | def get_i2v_model():
 29 |     """Get a pretrained i2v network.
 30 | 
 31 |     Returns
 32 |     -------
 33 |     net : dict
 34 |         {'graph_def': graph_def, 'labels': synsets}
 35 |         where the graph_def is a tf.GraphDef and the synsets
 36 |         map an integer label from 0-1000 to a list of names
 37 |     """
 38 |     # Download the trained net
 39 |     model = i2v_download()
 40 | 
 41 |     # Load the saved graph
 42 |     with gfile.GFile(model, 'rb') as f:
 43 |         graph_def = tf.GraphDef()
 44 |         try:
 45 |             graph_def.ParseFromString(f.read())
 46 |         except:
 47 |             print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' +
 48 |                   'to environment.  e.g.:\n' +
 49 |                   'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' +
 50 |                   'See here for info: ' +
 51 |                   'https://github.com/tensorflow/tensorflow/issues/582')
 52 | 
 53 |     return {'graph_def': graph_def}
 54 | 
 55 | 
 56 | def get_i2v_tag_model():
 57 |     """Get a pretrained i2v tag network.
 58 | 
 59 |     Returns
 60 |     -------
 61 |     net : dict
 62 |         {'graph_def': graph_def, 'labels': synsets}
 63 |         where the graph_def is a tf.GraphDef and the synsets
 64 |         map an integer label from 0-1000 to a list of names
 65 |     """
 66 |     # Download the trained net
 67 |     model, tags = i2v_tag_download()
 68 |     tags = json.load(open(tags, 'r'))
 69 | 
 70 |     # Load the saved graph
 71 |     with gfile.GFile(model, 'rb') as f:
 72 |         graph_def = tf.GraphDef()
 73 |         try:
 74 |             graph_def.ParseFromString(f.read())
 75 |         except:
 76 |             print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' +
 77 |                   'to environment.  e.g.:\n' +
 78 |                   'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' +
 79 |                   'See here for info: ' +
 80 |                   'https://github.com/tensorflow/tensorflow/issues/582')
 81 | 
 82 |     return {
 83 |         'graph_def': graph_def,
 84 |         'labels': tags,
 85 |         'preprocess': preprocess,
 86 |         'deprocess': deprocess
 87 |     }
 88 | 
 89 | 
 90 | def preprocess(img, crop=True, resize=True, dsize=(224, 224)):
 91 |     mean_img = np.array([164.76139251, 167.47864617, 181.13838569])
 92 |     if img.dtype == np.uint8:
 93 |         img = (img[..., ::-1] - mean_img).astype(np.float32)
 94 |     else:
 95 |         img = img[..., ::-1] * 255.0 - mean_img
 96 | 
 97 |     if crop:
 98 |         short_edge = min(img.shape[:2])
 99 |         yy = int((img.shape[0] - short_edge) / 2)
100 |         xx = int((img.shape[1] - short_edge) / 2)
101 |         crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
102 |     else:
103 |         crop_img = img
104 |     if resize:
105 |         norm_img = imresize(crop_img, dsize, preserve_range=True)
106 |     else:
107 |         norm_img = crop_img
108 |     return (norm_img).astype(np.float32)
109 | 
110 | 
111 | def deprocess(img):
112 |     mean_img = np.array([164.76139251, 167.47864617, 181.13838569])
113 |     processed = (img + mean_img)[..., ::-1]
114 |     return np.clip(processed, 0, 255).astype(np.uint8)
115 |     # return ((img / np.max(np.abs(img))) * 127.5 +
116 |     #         127.5).astype(np.uint8)
117 | 
118 | 
119 | def test_i2v():
120 |     """Loads the i2v network and applies it to a test image.
121 |     """
122 |     with tf.Session() as sess:
123 |         net = get_i2v_model()
124 |         tf.import_graph_def(net['graph_def'], name='i2v')
125 |         g = tf.get_default_graph()
126 |         names = [op.name for op in g.get_operations()]
127 |         x = g.get_tensor_by_name(names[0] + ':0')
128 |         softmax = g.get_tensor_by_name(names[-3] + ':0')
129 | 
130 |         from skimage import data
131 |         img = preprocess(data.coffee())[np.newaxis]
132 |         res = np.squeeze(softmax.eval(feed_dict={x: img}))
133 |         print([(res[idx], net['labels'][idx])
134 |                for idx in res.argsort()[-5:][::-1]])
135 | 
136 |         """Let's visualize the network's gradient activation
137 |         when backpropagated to the original input image.  This
138 |         is effectively telling us which pixels contribute to the
139 |         predicted class or given neuron"""
140 |         pools = [name for name in names if 'pool' in name.split('/')[-1]]
141 |         fig, axs = plt.subplots(1, len(pools))
142 |         for pool_i, poolname in enumerate(pools):
143 |             pool = g.get_tensor_by_name(poolname + ':0')
144 |             pool.get_shape()
145 |             neuron = tf.reduce_max(pool, 1)
146 |             saliency = tf.gradients(neuron, x)
147 |             neuron_idx = tf.arg_max(pool, 1)
148 |             this_res = sess.run([saliency[0], neuron_idx],
149 |                                 feed_dict={x: img})
150 | 
151 |             grad = this_res[0][0] / np.max(np.abs(this_res[0]))
152 |             axs[pool_i].imshow((grad * 128 + 128).astype(np.uint8))
153 |             axs[pool_i].set_title(poolname)
154 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/inception.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Creative Applications of Deep Learning w/ Tensorflow.
  3 | Kadenze, Inc.
  4 | Copyright Parag K. Mital, June 2016.
  5 | """
  6 | import os
  7 | import numpy as np
  8 | from tensorflow.python.platform import gfile
  9 | import tensorflow as tf
 10 | import matplotlib.pyplot as plt
 11 | from skimage.transform import resize as imresize
 12 | from .utils import download_and_extract_tar, download_and_extract_zip
 13 | 
 14 | 
 15 | def inception_download(data_dir='inception', version='v5'):
 16 |     """Download a pretrained inception network.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     data_dir : str, optional
 21 |         Location of the pretrained inception network download.
 22 |     version : str, optional
 23 |         Version of the model: ['v3'] or 'v5'.
 24 |     """
 25 |     if version == 'v3':
 26 |         download_and_extract_tar(
 27 |             'https://s3.amazonaws.com/cadl/models/inception-2015-12-05.tgz',
 28 |             data_dir)
 29 |         return (os.path.join(data_dir, 'classify_image_graph_def.pb'),
 30 |                 os.path.join(data_dir, 'imagenet_synset_to_human_label_map.txt'))
 31 |     else:
 32 |         download_and_extract_zip(
 33 |             'https://s3.amazonaws.com/cadl/models/inception5h.zip', data_dir)
 34 |         return (os.path.join(data_dir, 'tensorflow_inception_graph.pb'),
 35 |                 os.path.join(data_dir, 'imagenet_comp_graph_label_strings.txt'))
 36 | 
 37 | 
 38 | def get_inception_model(data_dir='inception', version='v5'):
 39 |     """Get a pretrained inception network.
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     data_dir : str, optional
 44 |         Location of the pretrained inception network download.
 45 |     version : str, optional
 46 |         Version of the model: ['v3'] or 'v5'.
 47 | 
 48 |     Returns
 49 |     -------
 50 |     net : dict
 51 |         {'graph_def': graph_def, 'labels': synsets}
 52 |         where the graph_def is a tf.GraphDef and the synsets
 53 |         map an integer label from 0-1000 to a list of names
 54 |     """
 55 |     # Download the trained net
 56 |     model, labels = inception_download(data_dir, version)
 57 | 
 58 |     # Parse the ids and synsets
 59 |     txt = open(labels).readlines()
 60 |     synsets = [(key, val.strip()) for key, val in enumerate(txt)]
 61 | 
 62 |     # Load the saved graph
 63 |     with gfile.GFile(model, 'rb') as f:
 64 |         graph_def = tf.GraphDef()
 65 |         try:
 66 |             graph_def.ParseFromString(f.read())
 67 |         except:
 68 |             print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python' +
 69 |                   'to environment.  e.g.:\n' +
 70 |                   'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' +
 71 |                   'See here for info: ' +
 72 |                   'https://github.com/tensorflow/tensorflow/issues/582')
 73 |     return {
 74 |         'graph_def': graph_def,
 75 |         'labels': synsets,
 76 |         'preprocess': preprocess,
 77 |         'deprocess': deprocess
 78 |     }
 79 | 
 80 | 
 81 | def preprocess(img, crop=True, resize=True, dsize=(299, 299)):
 82 |     if img.dtype != np.uint8:
 83 |         img *= 255.0
 84 | 
 85 |     if crop:
 86 |         crop = np.min(img.shape[:2])
 87 |         r = (img.shape[0] - crop) // 2
 88 |         c = (img.shape[1] - crop) // 2
 89 |         cropped = img[r: r + crop, c: c + crop]
 90 |     else:
 91 |         cropped = img
 92 | 
 93 |     if resize:
 94 |         rsz = imresize(cropped, dsize, preserve_range=True)
 95 |     else:
 96 |         rsz = cropped
 97 | 
 98 |     if rsz.ndim == 2:
 99 |         rsz = rsz[..., np.newaxis]
100 | 
101 |     rsz = rsz.astype(np.float32)
102 |     # subtract imagenet mean
103 |     return (rsz - 117)
104 | 
105 | 
106 | def deprocess(img):
107 |     return np.clip(img + 117, 0, 255).astype(np.uint8)
108 | 
109 | 
110 | def test_inception():
111 |     """Loads the inception network and applies it to a test image.
112 |     """
113 |     with tf.Session() as sess:
114 |         net = get_inception_model()
115 |         tf.import_graph_def(net['graph_def'], name='inception')
116 |         g = tf.get_default_graph()
117 |         names = [op.name for op in g.get_operations()]
118 |         x = g.get_tensor_by_name(names[0] + ':0')
119 |         softmax = g.get_tensor_by_name(names[-3] + ':0')
120 | 
121 |         from skimage import data
122 |         img = preprocess(data.coffee())[np.newaxis]
123 |         res = np.squeeze(softmax.eval(feed_dict={x: img}))
124 |         print([(res[idx], net['labels'][idx])
125 |                for idx in res.argsort()[-5:][::-1]])
126 | 
127 |         """Let's visualize the network's gradient activation
128 |         when backpropagated to the original input image.  This
129 |         is effectively telling us which pixels contribute to the
130 |         predicted class or given neuron"""
131 |         pools = [name for name in names if 'pool' in name.split('/')[-1]]
132 |         fig, axs = plt.subplots(1, len(pools))
133 |         for pool_i, poolname in enumerate(pools):
134 |             pool = g.get_tensor_by_name(poolname + ':0')
135 |             pool.get_shape()
136 |             neuron = tf.reduce_max(pool, 1)
137 |             saliency = tf.gradients(neuron, x)
138 |             neuron_idx = tf.arg_max(pool, 1)
139 |             this_res = sess.run([saliency[0], neuron_idx],
140 |                                 feed_dict={x: img})
141 | 
142 |             grad = this_res[0][0] / np.max(np.abs(this_res[0]))
143 |             axs[pool_i].imshow((grad * 128 + 128).astype(np.uint8))
144 |             axs[pool_i].set_title(poolname)
145 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/nb_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility for displaying Tensorflow graphs from:
 2 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
 3 | """
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | from IPython.display import display, HTML
 7 | 
 8 | 
 9 | def show_graph(graph_def):
10 |     # Helper functions for TF Graph visualization
11 |     def _strip_consts(graph_def, max_const_size=32):
12 |         """Strip large constant values from graph_def."""
13 |         strip_def = tf.GraphDef()
14 |         for n0 in graph_def.node:
15 |             n = strip_def.node.add()
16 |             n.MergeFrom(n0)
17 |             if n.op == 'Const':
18 |                 tensor = n.attr['value'].tensor
19 |                 size = len(tensor.tensor_content)
20 |                 if size > max_const_size:
21 |                     tensor.tensor_content = "<stripped {} bytes>".format(size).encode()
22 |         return strip_def
23 | 
24 |     def _rename_nodes(graph_def, rename_func):
25 |         res_def = tf.GraphDef()
26 |         for n0 in graph_def.node:
27 |             n = res_def.node.add()
28 |             n.MergeFrom(n0)
29 |             n.name = rename_func(n.name)
30 |             for i, s in enumerate(n.input):
31 |                 n.input[i] = rename_func(s) if s[0] != '^' else '^' + rename_func(s[1:])
32 |         return res_def
33 | 
34 |     def _show_entire_graph(graph_def, max_const_size=32):
35 |         """Visualize TensorFlow graph."""
36 |         if hasattr(graph_def, 'as_graph_def'):
37 |             graph_def = graph_def.as_graph_def()
38 |         strip_def = _strip_consts(graph_def, max_const_size=max_const_size)
39 |         code = """
40 |             <script>
41 |               function load() {{
42 |                 document.getElementById("{id}").pbtxt = {data};
43 |               }}
44 |             </script>
45 |             <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
46 |             <div style="height:600px">
47 |               <tf-graph-basic id="{id}"></tf-graph-basic>
48 |             </div>
49 |         """.format(data=repr(str(strip_def)), id='graph' + str(np.random.rand()))
50 | 
51 |         iframe = """
52 |             <iframe seamless style="width:800px;height:620px;border:0" srcdoc="{}"></iframe>
53 |         """.format(code.replace('"', '&quot;'))
54 |         display(HTML(iframe))
55 |     # Visualizing the network graph. Be sure expand the "mixed" nodes to see their
56 |     # internal structure. We are going to visualize "Conv2D" nodes.
57 |     tmp_def = _rename_nodes(graph_def, lambda s: "/".join(s.split('_', 1)))
58 |     _show_entire_graph(tmp_def)
59 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/stylenet.py:
--------------------------------------------------------------------------------
  1 | """Style Net w/ tests for Video Style Net.
  2 | 
  3 | Video Style Net requires OpenCV 3.0.0+ w/ Contrib for Python to be installed.
  4 | 
  5 | Creative Applications of Deep Learning w/ Tensorflow.
  6 | Kadenze, Inc.
  7 | Copyright Parag K. Mital, June 2016.
  8 | """
  9 | import tensorflow as tf
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | import os
 13 | from . import vgg16
 14 | from . import gif
 15 | 
 16 | 
 17 | def make_4d(img):
 18 |     """Create a 4-dimensional N x H x W x C image.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     img : np.ndarray
 23 |         Given image as H x W x C or H x W.
 24 | 
 25 |     Returns
 26 |     -------
 27 |     img : np.ndarray
 28 |         N x H x W x C image.
 29 | 
 30 |     Raises
 31 |     ------
 32 |     ValueError
 33 |         Unexpected number of dimensions.
 34 |     """
 35 |     if img.ndim == 2:
 36 |         img = np.expand_dims(img[np.newaxis], 3)
 37 |     elif img.ndim == 3:
 38 |         img = img[np.newaxis]
 39 |     elif img.ndim == 4:
 40 |         return img
 41 |     else:
 42 |         raise ValueError('Incorrect dimensions for image!')
 43 |     return img
 44 | 
 45 | 
 46 | def stylize(content_img, style_img, base_img=None, saveto=None, gif_step=5,
 47 |             n_iterations=100, style_weight=1.0, content_weight=1.0):
 48 |     """Stylization w/ the given content and style images.
 49 | 
 50 |     Follows the approach in Leon Gatys et al.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     content_img : np.ndarray
 55 |         Image to use for finding the content features.
 56 |     style_img : TYPE
 57 |         Image to use for finding the style features.
 58 |     base_img : None, optional
 59 |         Image to use for the base content.  Can be noise or an existing image.
 60 |         If None, the content image will be used.
 61 |     saveto : str, optional
 62 |         Name of GIF image to write to, e.g. "stylization.gif"
 63 |     gif_step : int, optional
 64 |         Modulo of iterations to save the current stylization.
 65 |     n_iterations : int, optional
 66 |         Number of iterations to run for.
 67 |     style_weight : float, optional
 68 |         Weighting on the style features.
 69 |     content_weight : float, optional
 70 |         Weighting on the content features.
 71 | 
 72 |     Returns
 73 |     -------
 74 |     stylization : np.ndarray
 75 |         Final iteration of the stylization.
 76 |     """
 77 |     # Preprocess both content and style images
 78 |     content_img = make_4d(content_img)
 79 |     style_img = make_4d(style_img)
 80 |     if base_img is None:
 81 |         base_img = content_img
 82 |     else:
 83 |         base_img = make_4d(base_img)
 84 | 
 85 |     # Get Content and Style features
 86 |     net = vgg16.get_vgg_model()
 87 |     g = tf.Graph()
 88 |     with tf.Session(graph=g) as sess:
 89 |         tf.import_graph_def(net['graph_def'], name='vgg')
 90 |         names = [op.name for op in g.get_operations()]
 91 |         x = g.get_tensor_by_name(names[0] + ':0')
 92 |         content_layer = 'vgg/conv3_2/conv3_2:0'
 93 |         content_features = g.get_tensor_by_name(
 94 |             content_layer).eval(feed_dict={
 95 |                 x: content_img,
 96 |                 'vgg/dropout_1/random_uniform:0': [[1.0]],
 97 |                 'vgg/dropout/random_uniform:0': [[1.0]]})
 98 |         style_layers = ['vgg/conv1_1/conv1_1:0',
 99 |                         'vgg/conv2_1/conv2_1:0',
100 |                         'vgg/conv3_1/conv3_1:0',
101 |                         'vgg/conv4_1/conv4_1:0',
102 |                         'vgg/conv5_1/conv5_1:0']
103 |         style_activations = []
104 |         for style_i in style_layers:
105 |             style_activation_i = g.get_tensor_by_name(style_i).eval(
106 |                 feed_dict={
107 |                     x: style_img,
108 |                     'vgg/dropout_1/random_uniform:0': [[1.0]],
109 |                     'vgg/dropout/random_uniform:0': [[1.0]]})
110 |             style_activations.append(style_activation_i)
111 |         style_features = []
112 |         for style_activation_i in style_activations:
113 |             s_i = np.reshape(style_activation_i,
114 |                              [-1, style_activation_i.shape[-1]])
115 |             gram_matrix = np.matmul(s_i.T, s_i) / s_i.size
116 |             style_features.append(gram_matrix.astype(np.float32))
117 | 
118 |     # Optimize both
119 |     g = tf.Graph()
120 |     with tf.Session(graph=g) as sess:
121 |         net_input = tf.Variable(base_img)
122 |         tf.import_graph_def(
123 |             net['graph_def'],
124 |             name='vgg',
125 |             input_map={'images:0': net_input})
126 | 
127 |         content_loss = tf.nn.l2_loss((g.get_tensor_by_name(content_layer) -
128 |                                       content_features) /
129 |                                      content_features.size)
130 |         style_loss = np.float32(0.0)
131 |         for style_layer_i, style_gram_i in zip(style_layers, style_features):
132 |             layer_i = g.get_tensor_by_name(style_layer_i)
133 |             layer_shape = layer_i.get_shape().as_list()
134 |             layer_size = layer_shape[1] * layer_shape[2] * layer_shape[3]
135 |             layer_flat = tf.reshape(layer_i, [-1, layer_shape[3]])
136 |             gram_matrix = tf.matmul(
137 |                 tf.transpose(layer_flat), layer_flat) / layer_size
138 |             style_loss = tf.add(
139 |                 style_loss, tf.nn.l2_loss(
140 |                     (gram_matrix - style_gram_i) /
141 |                     np.float32(style_gram_i.size)))
142 |         loss = content_weight * content_loss + style_weight * style_loss
143 |         optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
144 | 
145 |         sess.run(tf.initialize_all_variables())
146 |         imgs = []
147 |         for it_i in range(n_iterations):
148 |             _, this_loss, synth = sess.run(
149 |                 [optimizer, loss, net_input],
150 |                 feed_dict={
151 |                     'vgg/dropout_1/random_uniform:0': np.ones(
152 |                         g.get_tensor_by_name(
153 |                             'vgg/dropout_1/random_uniform:0'
154 |                         ).get_shape().as_list()),
155 |                     'vgg/dropout/random_uniform:0': np.ones(
156 |                         g.get_tensor_by_name(
157 |                             'vgg/dropout/random_uniform:0'
158 |                         ).get_shape().as_list())
159 |                 })
160 |             print("iteration %d, loss: %f, range: (%f - %f)" %
161 |                   (it_i, this_loss, np.min(synth), np.max(synth)), end='\r')
162 |             if it_i % gif_step == 0:
163 |                 imgs.append(np.clip(synth[0], 0, 1))
164 |         if saveto is not None:
165 |             gif.build_gif(imgs, saveto=saveto)
166 |     return np.clip(synth[0], 0, 1)
167 | 
168 | 
169 | def warp_img(img, dx, dy):
170 |     """Apply the motion vectors to the given image.
171 | 
172 |     Parameters
173 |     ----------
174 |     img : np.ndarray
175 |         Input image to apply motion to.
176 |     dx : np.ndarray
177 |         H x W matrix defining the magnitude of the X vector
178 |     dy : np.ndarray
179 |         H x W matrix defining the magnitude of the Y vector
180 | 
181 |     Returns
182 |     -------
183 |     img : np.ndarray
184 |         Image with pixels warped according to dx, dy.
185 |     """
186 |     warped = img.copy()
187 |     for row_i in range(img.shape[0]):
188 |         for col_i in range(img.shape[1]):
189 |             dx_i = int(np.round(dx[row_i, col_i]))
190 |             dy_i = int(np.round(dy[row_i, col_i]))
191 |             sample_dx = np.clip(dx_i + col_i, 0, img.shape[0] - 1)
192 |             sample_dy = np.clip(dy_i + row_i, 0, img.shape[1] - 1)
193 |             warped[sample_dy, sample_dx, :] = img[row_i, col_i, :]
194 |     return warped
195 | 
196 | 
197 | def test_video(style_img='arles.jpg', videodir='kurosawa'):
198 |     r"""Test for artistic stylization using video.
199 | 
200 |     This requires the python installation of OpenCV for the Deep Flow algorithm.
201 |     If cv2 is not found, then there will be reduced "temporal coherence".
202 | 
203 |     Unfortunately, installing opencv for python3 is not the easiest thing to do.
204 |     OSX users can install this using:
205 | 
206 |     $ brew install opencv --with-python3 --with-contrib
207 | 
208 |     then will have to symlink the libraries.  I think you can do this w/:
209 | 
210 |     $ brew link --force opencv3
211 | 
212 |     But the problems start to arise depending on which python you have
213 |     installed, and it is always a mess w/ homebrew.  Sorry!
214 | 
215 |     Your best bet is installing from source.  Something along
216 |     these lines should get you there:
217 | 
218 |     $ cd ~
219 |     $ git clone https://github.com/Itseez/opencv.git
220 |     $ cd opencv
221 |     $ git checkout 3.1.0
222 |     $ cd ~
223 |     $ git clone https://github.com/Itseez/opencv_contrib.git
224 |     $ cd opencv_contrib
225 |     $ git checkout 3.1.0
226 |     $ cd ~/opencv
227 |     $ mkdir build
228 |     $ cd build
229 |     $ cmake -D CMAKE_BUILD_TYPE=RELEASE \
230 |         -D CMAKE_INSTALL_PREFIX=/usr/local \
231 |         -D INSTALL_C_EXAMPLES=OFF \
232 |         -D INSTALL_PYTHON_EXAMPLES=OFF \
233 |         -D OPENCV_EXTRA_MODULES_PATH=~/opencv_contrib/modules \
234 |         -D BUILD_EXAMPLES=OFF ..
235 | 
236 |     Parameters
237 |     ----------
238 |     style_img : str, optional
239 |         Location to style image
240 |     videodir : str, optional
241 |         Location to directory containing images of each frame to stylize.
242 | 
243 |     Returns
244 |     -------
245 |     imgs : list of np.ndarray
246 |         Stylized images for each frame.
247 |     """
248 |     has_cv2 = True
249 |     try:
250 |         import cv2
251 |         has_cv2 = True
252 |         optflow = cv2.optflow.createOptFlow_DeepFlow()
253 |     except ImportError:
254 |         has_cv2 = False
255 | 
256 |     style_img = plt.imread(style_img)
257 |     content_files = [os.path.join(videodir, f)
258 |                      for f in os.listdir(videodir) if f.endswith('.png')]
259 |     content_img = plt.imread(content_files[0])
260 |     from scipy.misc import imresize
261 |     style_img = imresize(style_img, (448, 448)).astype(np.float32) / 255.0
262 |     content_img = imresize(content_img, (448, 448)).astype(np.float32) / 255.0
263 |     if has_cv2:
264 |         prev_lum = cv2.cvtColor(content_img, cv2.COLOR_RGB2HSV)[:, :, 2]
265 |     else:
266 |         prev_lum = (content_img[..., 0] * 0.3 +
267 |                     content_img[..., 1] * 0.59 +
268 |                     content_img[..., 2] * 0.11)
269 |     imgs = []
270 |     stylized = stylize(content_img, style_img, content_weight=5.0,
271 |                        style_weight=0.5, n_iterations=50)
272 |     plt.imsave(fname=content_files[0] + 'stylized.png', arr=stylized)
273 |     imgs.append(stylized)
274 |     for f in content_files[1:]:
275 |         content_img = plt.imread(f)
276 |         content_img = imresize(content_img, (448, 448)).astype(np.float32) / 255.0
277 |         if has_cv2:
278 |             lum = cv2.cvtColor(content_img, cv2.COLOR_RGB2HSV)[:, :, 2]
279 |             flow = optflow.calc(prev_lum, lum, None)
280 |             warped = warp_img(stylized, flow[..., 0], flow[..., 1])
281 |             stylized = stylize(content_img, style_img, content_weight=5.0,
282 |                                style_weight=0.5, base_img=warped, n_iterations=50)
283 |         else:
284 |             lum = (content_img[..., 0] * 0.3 +
285 |                    content_img[..., 1] * 0.59 +
286 |                    content_img[..., 2] * 0.11)
287 |             stylized = stylize(content_img, style_img, content_weight=5.0,
288 |                                style_weight=0.5, base_img=None, n_iterations=50)
289 |         imgs.append(stylized)
290 |         plt.imsave(fname=f + 'stylized.png', arr=stylized)
291 |         prev_lum = lum
292 |     return imgs
293 | 
294 | 
295 | def test():
296 |     """Test for artistic stylization."""
297 |     from six.moves import urllib
298 |     f = ('https://upload.wikimedia.org/wikipedia/commons/thumb/5/54/' +
299 |          'Claude_Monet%2C_Impression%2C_soleil_levant.jpg/617px-Claude_Monet' +
300 |          '%2C_Impression%2C_soleil_levant.jpg?download')
301 |     filepath, _ = urllib.request.urlretrieve(f, f.split('/')[-1], None)
302 |     style = plt.imread(filepath)
303 | 
304 |     f = ('https://upload.wikimedia.org/wikipedia/commons/thumb/a/ae/' +
305 |          'El_jard%C3%ADn_de_las_Delicias%2C_de_El_Bosco.jpg/640px-El_jard' +
306 |          '%C3%ADn_de_las_Delicias%2C_de_El_Bosco.jpg')
307 |     filepath, _ = urllib.request.urlretrieve(f, f.split('/')[-1], None)
308 |     content = plt.imread(filepath)
309 | 
310 |     stylize(content, style)
311 | 
312 | 
313 | if __name__ == '__main__':
314 |     test_video()
315 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/utils.py:
--------------------------------------------------------------------------------
  1 | """Utilities used in the Kadenze Academy Course on Deep Learning w/ Tensorflow.
  2 | 
  3 | Creative Applications of Deep Learning w/ Tensorflow.
  4 | Kadenze, Inc.
  5 | Parag K. Mital
  6 | 
  7 | Copyright Parag K. Mital, June 2016.
  8 | """
  9 | import matplotlib.pyplot as plt
 10 | import tensorflow as tf
 11 | import urllib
 12 | import numpy as np
 13 | import zipfile
 14 | import os
 15 | from scipy.io import wavfile
 16 | 
 17 | 
 18 | def download(path):
 19 |     """Use urllib to download a file.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     path : str
 24 |         Url to download
 25 | 
 26 |     Returns
 27 |     -------
 28 |     path : str
 29 |         Location of downloaded file.
 30 |     """
 31 |     import os
 32 |     from six.moves import urllib
 33 | 
 34 |     fname = path.split('/')[-1]
 35 |     if os.path.exists(fname):
 36 |         return fname
 37 | 
 38 |     print('Downloading ' + path)
 39 | 
 40 |     def progress(count, block_size, total_size):
 41 |         if count % 20 == 0:
 42 |             print('Downloaded %02.02f/%02.02f MB' % (
 43 |                 count * block_size / 1024.0 / 1024.0,
 44 |                 total_size / 1024.0 / 1024.0), end='\r')
 45 | 
 46 |     filepath, _ = urllib.request.urlretrieve(
 47 |         path, filename=fname, reporthook=progress)
 48 |     return filepath
 49 | 
 50 | 
 51 | def download_and_extract_tar(path, dst):
 52 |     """Download and extract a tar file.
 53 | 
 54 |     Parameters
 55 |     ----------
 56 |     path : str
 57 |         Url to tar file to download.
 58 |     dst : str
 59 |         Location to save tar file contents.
 60 |     """
 61 |     import tarfile
 62 |     filepath = download(path)
 63 |     if not os.path.exists(dst):
 64 |         os.makedirs(dst)
 65 |         tarfile.open(filepath, 'r:gz').extractall(dst)
 66 | 
 67 | 
 68 | def download_and_extract_zip(path, dst):
 69 |     """Download and extract a zip file.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     path : str
 74 |         Url to zip file to download.
 75 |     dst : str
 76 |         Location to save zip file contents.
 77 |     """
 78 |     import zipfile
 79 |     filepath = download(path)
 80 |     if not os.path.exists(dst):
 81 |         os.makedirs(dst)
 82 |         zf = zipfile.ZipFile(file=filepath)
 83 |         zf.extractall(dst)
 84 | 
 85 | 
 86 | def load_audio(filename, b_normalize=True):
 87 |     """Load the audiofile at the provided filename using scipy.io.wavfile.
 88 | 
 89 |     Optionally normalizes the audio to the maximum value.
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     filename : str
 94 |         File to load.
 95 |     b_normalize : bool, optional
 96 |         Normalize to the maximum value.
 97 |     """
 98 |     sr, s = wavfile.read(filename)
 99 |     if b_normalize:
100 |         s = s.astype(np.float32)
101 |         s = (s / np.max(np.abs(s)))
102 |         s -= np.mean(s)
103 |     return s
104 | 
105 | 
106 | def corrupt(x):
107 |     """Take an input tensor and add uniform masking.
108 | 
109 |     Parameters
110 |     ----------
111 |     x : Tensor/Placeholder
112 |         Input to corrupt.
113 |     Returns
114 |     -------
115 |     x_corrupted : Tensor
116 |         50 pct of values corrupted.
117 |     """
118 |     return tf.mul(x, tf.cast(tf.random_uniform(shape=tf.shape(x),
119 |                                                minval=0,
120 |                                                maxval=2,
121 |                                                dtype=tf.int32), tf.float32))
122 | 
123 | 
124 | def interp(l, r, n_samples):
125 |     """Intepolate between the arrays l and r, n_samples times.
126 | 
127 |     Parameters
128 |     ----------
129 |     l : np.ndarray
130 |         Left edge
131 |     r : np.ndarray
132 |         Right edge
133 |     n_samples : int
134 |         Number of samples
135 | 
136 |     Returns
137 |     -------
138 |     arr : np.ndarray
139 |         Inteporalted array
140 |     """
141 |     return np.array([
142 |         l + step_i / (n_samples - 1) * (r - l)
143 |         for step_i in range(n_samples)])
144 | 
145 | 
146 | def make_latent_manifold(corners, n_samples):
147 |     """Create a 2d manifold out of the provided corners: n_samples * n_samples.
148 | 
149 |     Parameters
150 |     ----------
151 |     corners : list of np.ndarray
152 |         The four corners to intepolate.
153 |     n_samples : int
154 |         Number of samples to use in interpolation.
155 | 
156 |     Returns
157 |     -------
158 |     arr : np.ndarray
159 |         Stacked array of all 2D interpolated samples
160 |     """
161 |     left = interp(corners[0], corners[1], n_samples)
162 |     right = interp(corners[2], corners[3], n_samples)
163 | 
164 |     embedding = []
165 |     for row_i in range(n_samples):
166 |         embedding.append(interp(left[row_i], right[row_i], n_samples))
167 |     return np.vstack(embedding)
168 | 
169 | 
170 | def imcrop_tosquare(img):
171 |     """Make any image a square image.
172 | 
173 |     Parameters
174 |     ----------
175 |     img : np.ndarray
176 |         Input image to crop, assumed at least 2d.
177 | 
178 |     Returns
179 |     -------
180 |     crop : np.ndarray
181 |         Cropped image.
182 |     """
183 |     size = np.min(img.shape[:2])
184 |     extra = img.shape[:2] - size
185 |     crop = img
186 |     for i in np.flatnonzero(extra):
187 |         crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i)
188 |     return crop
189 | 
190 | 
191 | def slice_montage(montage, img_h, img_w, n_imgs):
192 |     """Slice a montage image into n_img h x w images.
193 | 
194 |     Performs the opposite of the montage function.  Takes a montage image and
195 |     slices it back into a N x H x W x C image.
196 | 
197 |     Parameters
198 |     ----------
199 |     montage : np.ndarray
200 |         Montage image to slice.
201 |     img_h : int
202 |         Height of sliced image
203 |     img_w : int
204 |         Width of sliced image
205 |     n_imgs : int
206 |         Number of images to slice
207 | 
208 |     Returns
209 |     -------
210 |     sliced : np.ndarray
211 |         Sliced images as 4d array.
212 |     """
213 |     sliced_ds = []
214 |     for i in range(int(np.sqrt(n_imgs))):
215 |         for j in range(int(np.sqrt(n_imgs))):
216 |             sliced_ds.append(montage[
217 |                 1 + i + i * img_h:1 + i + (i + 1) * img_h,
218 |                 1 + j + j * img_w:1 + j + (j + 1) * img_w])
219 |     return np.array(sliced_ds)
220 | 
221 | 
222 | def montage(images, saveto='montage.png'):
223 |     """Draw all images as a montage separated by 1 pixel borders.
224 | 
225 |     Also saves the file to the destination specified by `saveto`.
226 | 
227 |     Parameters
228 |     ----------
229 |     images : numpy.ndarray
230 |         Input array to create montage of.  Array should be:
231 |         batch x height x width x channels.
232 |     saveto : str
233 |         Location to save the resulting montage image.
234 | 
235 |     Returns
236 |     -------
237 |     m : numpy.ndarray
238 |         Montage image.
239 |     """
240 |     if isinstance(images, list):
241 |         images = np.array(images)
242 |     img_h = images.shape[1]
243 |     img_w = images.shape[2]
244 |     n_plots = int(np.ceil(np.sqrt(images.shape[0])))
245 |     if len(images.shape) == 4 and images.shape[3] == 3:
246 |         m = np.ones(
247 |             (images.shape[1] * n_plots + n_plots + 1,
248 |              images.shape[2] * n_plots + n_plots + 1, 3)) * 0.5
249 |     else:
250 |         m = np.ones(
251 |             (images.shape[1] * n_plots + n_plots + 1,
252 |              images.shape[2] * n_plots + n_plots + 1)) * 0.5
253 |     for i in range(n_plots):
254 |         for j in range(n_plots):
255 |             this_filter = i * n_plots + j
256 |             if this_filter < images.shape[0]:
257 |                 this_img = images[this_filter]
258 |                 m[1 + i + i * img_h:1 + i + (i + 1) * img_h,
259 |                   1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img
260 |     plt.imsave(arr=m, fname=saveto)
261 |     return m
262 | 
263 | 
264 | def montage_filters(W):
265 |     """Draws all filters (n_input * n_output filters) as a
266 |     montage image separated by 1 pixel borders.
267 | 
268 |     Parameters
269 |     ----------
270 |     W : Tensor
271 |         Input tensor to create montage of.
272 | 
273 |     Returns
274 |     -------
275 |     m : numpy.ndarray
276 |         Montage image.
277 |     """
278 |     W = np.reshape(W, [W.shape[0], W.shape[1], 1, W.shape[2] * W.shape[3]])
279 |     n_plots = int(np.ceil(np.sqrt(W.shape[-1])))
280 |     m = np.ones(
281 |         (W.shape[0] * n_plots + n_plots + 1,
282 |          W.shape[1] * n_plots + n_plots + 1)) * 0.5
283 |     for i in range(n_plots):
284 |         for j in range(n_plots):
285 |             this_filter = i * n_plots + j
286 |             if this_filter < W.shape[-1]:
287 |                 m[1 + i + i * W.shape[0]:1 + i + (i + 1) * W.shape[0],
288 |                   1 + j + j * W.shape[1]:1 + j + (j + 1) * W.shape[1]] = (
289 |                     np.squeeze(W[:, :, :, this_filter]))
290 |     return m
291 | 
292 | 
293 | def get_celeb_files(dst='img_align_celeba', max_images=100):
294 |     """Download the first 100 images of the celeb dataset.
295 | 
296 |     Files will be placed in a directory 'img_align_celeba' if one
297 |     doesn't exist.
298 | 
299 |     Returns
300 |     -------
301 |     files : list of strings
302 |         Locations to the first 100 images of the celeb net dataset.
303 |     """
304 |     # Create a directory
305 |     if not os.path.exists(dst):
306 |         os.mkdir(dst)
307 | 
308 |     # Now perform the following 100 times:
309 |     for img_i in range(1, max_images + 1):
310 | 
311 |         # create a string using the current loop counter
312 |         f = '000%03d.jpg' % img_i
313 | 
314 |         if not os.path.exists(os.path.join(dst, f)):
315 | 
316 |             # and get the url with that string appended the end
317 |             url = 'https://s3.amazonaws.com/cadl/celeb-align/' + f
318 | 
319 |             # We'll print this out to the console so we can see how far we've gone
320 |             print(url, end='\r')
321 | 
322 |             # And now download the url to a location inside our new directory
323 |             urllib.request.urlretrieve(url, os.path.join(dst, f))
324 | 
325 |     files = [os.path.join(dst, file_i)
326 |              for file_i in os.listdir(dst)
327 |              if '.jpg' in file_i][:max_images]
328 |     return files
329 | 
330 | 
331 | def get_celeb_imgs(max_images=100):
332 |     """Load the first `max_images` images of the celeb dataset.
333 | 
334 |     Returns
335 |     -------
336 |     imgs : list of np.ndarray
337 |         List of the first 100 images from the celeb dataset
338 |     """
339 |     return [plt.imread(f_i) for f_i in get_celeb_files(max_images=max_images)]
340 | 
341 | 
342 | def gauss(mean, stddev, ksize):
343 |     """Use Tensorflow to compute a Gaussian Kernel.
344 | 
345 |     Parameters
346 |     ----------
347 |     mean : float
348 |         Mean of the Gaussian (e.g. 0.0).
349 |     stddev : float
350 |         Standard Deviation of the Gaussian (e.g. 1.0).
351 |     ksize : int
352 |         Size of kernel (e.g. 16).
353 | 
354 |     Returns
355 |     -------
356 |     kernel : np.ndarray
357 |         Computed Gaussian Kernel using Tensorflow.
358 |     """
359 |     g = tf.Graph()
360 |     with tf.Session(graph=g):
361 |         x = tf.linspace(-3.0, 3.0, ksize)
362 |         z = (tf.exp(tf.neg(tf.pow(x - mean, 2.0) /
363 |                            (2.0 * tf.pow(stddev, 2.0)))) *
364 |              (1.0 / (stddev * tf.sqrt(2.0 * 3.1415))))
365 |         return z.eval()
366 | 
367 | 
368 | def gauss2d(mean, stddev, ksize):
369 |     """Use Tensorflow to compute a 2D Gaussian Kernel.
370 | 
371 |     Parameters
372 |     ----------
373 |     mean : float
374 |         Mean of the Gaussian (e.g. 0.0).
375 |     stddev : float
376 |         Standard Deviation of the Gaussian (e.g. 1.0).
377 |     ksize : int
378 |         Size of kernel (e.g. 16).
379 | 
380 |     Returns
381 |     -------
382 |     kernel : np.ndarray
383 |         Computed 2D Gaussian Kernel using Tensorflow.
384 |     """
385 |     z = gauss(mean, stddev, ksize)
386 |     g = tf.Graph()
387 |     with tf.Session(graph=g):
388 |         z_2d = tf.matmul(tf.reshape(z, [ksize, 1]), tf.reshape(z, [1, ksize]))
389 |         return z_2d.eval()
390 | 
391 | 
392 | def convolve(img, kernel):
393 |     """Use Tensorflow to convolve a 4D image with a 4D kernel.
394 | 
395 |     Parameters
396 |     ----------
397 |     img : np.ndarray
398 |         4-dimensional image shaped N x H x W x C
399 |     kernel : np.ndarray
400 |         4-dimensional image shape K_H, K_W, C_I, C_O corresponding to the
401 |         kernel's height and width, the number of input channels, and the
402 |         number of output channels.  Note that C_I should = C.
403 | 
404 |     Returns
405 |     -------
406 |     result : np.ndarray
407 |         Convolved result.
408 |     """
409 |     g = tf.Graph()
410 |     with tf.Session(graph=g):
411 |         convolved = tf.nn.conv2d(img, kernel, strides=[1, 1, 1, 1], padding='SAME')
412 |         res = convolved.eval()
413 |     return res
414 | 
415 | 
416 | def gabor(ksize=32):
417 |     """Use Tensorflow to compute a 2D Gabor Kernel.
418 | 
419 |     Parameters
420 |     ----------
421 |     ksize : int, optional
422 |         Size of kernel.
423 | 
424 |     Returns
425 |     -------
426 |     gabor : np.ndarray
427 |         Gabor kernel with ksize x ksize dimensions.
428 |     """
429 |     g = tf.Graph()
430 |     with tf.Session(graph=g):
431 |         z_2d = gauss2d(0.0, 1.0, ksize)
432 |         ones = tf.ones((1, ksize))
433 |         ys = tf.sin(tf.linspace(-3.0, 3.0, ksize))
434 |         ys = tf.reshape(ys, [ksize, 1])
435 |         wave = tf.matmul(ys, ones)
436 |         gabor = tf.mul(wave, z_2d)
437 |         return gabor.eval()
438 | 
439 | 
440 | def build_submission(filename, file_list, optional_file_list=()):
441 |     """Helper utility to check homework assignment submissions and package them.
442 | 
443 |     Parameters
444 |     ----------
445 |     filename : str
446 |         Output zip file name
447 |     file_list : tuple
448 |         Tuple of files to include
449 |     """
450 |     # check each file exists
451 |     for part_i, file_i in enumerate(file_list):
452 |         if not os.path.exists(file_i):
453 |             print('\nYou are missing the file {}.  '.format(file_i) +
454 |                   'It does not look like you have completed Part {}.'.format(
455 |                 part_i + 1))
456 | 
457 |     def zipdir(path, zf):
458 |         for root, dirs, files in os.walk(path):
459 |             for file in files:
460 |                 # make sure the files are part of the necessary file list
461 |                 if file.endswith(file_list) or file.endswith(optional_file_list):
462 |                     zf.write(os.path.join(root, file))
463 | 
464 |     # create a zip file with the necessary files
465 |     zipf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED)
466 |     zipdir('.', zipf)
467 |     zipf.close()
468 |     print('Your assignment zip file has been created!')
469 |     print('Now submit the file:\n{}\nto Kadenze for grading!'.format(
470 |         os.path.abspath(filename)))
471 | 
472 | 
473 | def normalize(a, s=0.1):
474 |     '''Normalize the image range for visualization'''
475 |     return np.uint8(np.clip(
476 |         (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5,
477 |         0, 1) * 255)
478 | 
479 | 
480 | # %%
481 | def weight_variable(shape, **kwargs):
482 |     '''Helper function to create a weight variable initialized with
483 |     a normal distribution
484 |     Parameters
485 |     ----------
486 |     shape : list
487 |         Size of weight variable
488 |     '''
489 |     if isinstance(shape, list):
490 |         initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01)
491 |         initial.set_shape(shape)
492 |     else:
493 |         initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
494 |     return tf.Variable(initial, **kwargs)
495 | 
496 | 
497 | # %%
498 | def bias_variable(shape, **kwargs):
499 |     '''Helper function to create a bias variable initialized with
500 |     a constant value.
501 |     Parameters
502 |     ----------
503 |     shape : list
504 |         Size of weight variable
505 |     '''
506 |     if isinstance(shape, list):
507 |         initial = tf.random_normal(tf.pack(shape), mean=0.0, stddev=0.01)
508 |         initial.set_shape(shape)
509 |     else:
510 |         initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
511 |     return tf.Variable(initial, **kwargs)
512 | 
513 | 
514 | def binary_cross_entropy(z, x, name=None):
515 |     """Binary Cross Entropy measures cross entropy of a binary variable.
516 | 
517 |     loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
518 | 
519 |     Parameters
520 |     ----------
521 |     z : tf.Tensor
522 |         A `Tensor` of the same type and shape as `x`.
523 |     x : tf.Tensor
524 |         A `Tensor` of type `float32` or `float64`.
525 |     """
526 |     with tf.variable_scope(name or 'bce'):
527 |         eps = 1e-12
528 |         return (-(x * tf.log(z + eps) +
529 |                   (1. - x) * tf.log(1. - z + eps)))
530 | 
531 | 
532 | def conv2d(x, n_output,
533 |            k_h=5, k_w=5, d_h=2, d_w=2,
534 |            padding='SAME', name='conv2d', reuse=None):
535 |     """Helper for creating a 2d convolution operation.
536 | 
537 |     Parameters
538 |     ----------
539 |     x : tf.Tensor
540 |         Input tensor to convolve.
541 |     n_output : int
542 |         Number of filters.
543 |     k_h : int, optional
544 |         Kernel height
545 |     k_w : int, optional
546 |         Kernel width
547 |     d_h : int, optional
548 |         Height stride
549 |     d_w : int, optional
550 |         Width stride
551 |     padding : str, optional
552 |         Padding type: "SAME" or "VALID"
553 |     name : str, optional
554 |         Variable scope
555 | 
556 |     Returns
557 |     -------
558 |     op : tf.Tensor
559 |         Output of convolution
560 |     """
561 |     with tf.variable_scope(name or 'conv2d', reuse=reuse):
562 |         W = tf.get_variable(
563 |             name='W',
564 |             shape=[k_h, k_w, x.get_shape()[-1], n_output],
565 |             initializer=tf.contrib.layers.xavier_initializer_conv2d())
566 | 
567 |         conv = tf.nn.conv2d(
568 |             name='conv',
569 |             input=x,
570 |             filter=W,
571 |             strides=[1, d_h, d_w, 1],
572 |             padding=padding)
573 | 
574 |         b = tf.get_variable(
575 |             name='b',
576 |             shape=[n_output],
577 |             initializer=tf.constant_initializer(0.0))
578 | 
579 |         h = tf.nn.bias_add(
580 |             name='h',
581 |             value=conv,
582 |             bias=b)
583 | 
584 |     return h, W
585 | 
586 | 
587 | def deconv2d(x, n_output_h, n_output_w, n_output_ch, n_input_ch=None,
588 |              k_h=5, k_w=5, d_h=2, d_w=2,
589 |              padding='SAME', name='deconv2d', reuse=None):
590 |     """Deconvolution helper.
591 | 
592 |     Parameters
593 |     ----------
594 |     x : tf.Tensor
595 |         Input tensor to convolve.
596 |     n_output_h : int
597 |         Height of output
598 |     n_output_w : int
599 |         Width of output
600 |     n_output_ch : int
601 |         Number of filters.
602 |     k_h : int, optional
603 |         Kernel height
604 |     k_w : int, optional
605 |         Kernel width
606 |     d_h : int, optional
607 |         Height stride
608 |     d_w : int, optional
609 |         Width stride
610 |     padding : str, optional
611 |         Padding type: "SAME" or "VALID"
612 |     name : str, optional
613 |         Variable scope
614 | 
615 |     Returns
616 |     -------
617 |     op : tf.Tensor
618 |         Output of deconvolution
619 |     """
620 |     with tf.variable_scope(name or 'deconv2d', reuse=reuse):
621 |         W = tf.get_variable(
622 |             name='W',
623 |             shape=[k_h, k_h, n_output_ch, n_input_ch or x.get_shape()[-1]],
624 |             initializer=tf.contrib.layers.xavier_initializer_conv2d())
625 | 
626 |         conv = tf.nn.conv2d_transpose(
627 |             name='conv_t',
628 |             value=x,
629 |             filter=W,
630 |             output_shape=tf.pack(
631 |                 [tf.shape(x)[0], n_output_h, n_output_w, n_output_ch]),
632 |             strides=[1, d_h, d_w, 1],
633 |             padding=padding)
634 | 
635 |         conv.set_shape([None, n_output_h, n_output_w, n_output_ch])
636 | 
637 |         b = tf.get_variable(
638 |             name='b',
639 |             shape=[n_output_ch],
640 |             initializer=tf.constant_initializer(0.0))
641 | 
642 |         h = tf.nn.bias_add(name='h', value=conv, bias=b)
643 | 
644 |     return h, W
645 | 
646 | 
647 | def lrelu(features, leak=0.2):
648 |     """Leaky rectifier.
649 | 
650 |     Parameters
651 |     ----------
652 |     features : tf.Tensor
653 |         Input to apply leaky rectifier to.
654 |     leak : float, optional
655 |         Percentage of leak.
656 | 
657 |     Returns
658 |     -------
659 |     op : tf.Tensor
660 |         Resulting output of applying leaky rectifier activation.
661 |     """
662 |     f1 = 0.5 * (1 + leak)
663 |     f2 = 0.5 * (1 - leak)
664 |     return f1 * features + f2 * abs(features)
665 | 
666 | 
667 | def linear(x, n_output, name=None, activation=None, reuse=None):
668 |     """Fully connected layer.
669 | 
670 |     Parameters
671 |     ----------
672 |     x : tf.Tensor
673 |         Input tensor to connect
674 |     n_output : int
675 |         Number of output neurons
676 |     name : None, optional
677 |         Scope to apply
678 | 
679 |     Returns
680 |     -------
681 |     h, W : tf.Tensor, tf.Tensor
682 |         Output of fully connected layer and the weight matrix
683 |     """
684 |     if len(x.get_shape()) != 2:
685 |         x = flatten(x, reuse=reuse)
686 | 
687 |     n_input = x.get_shape().as_list()[1]
688 | 
689 |     with tf.variable_scope(name or "fc", reuse=reuse):
690 |         W = tf.get_variable(
691 |             name='W',
692 |             shape=[n_input, n_output],
693 |             dtype=tf.float32,
694 |             initializer=tf.contrib.layers.xavier_initializer())
695 | 
696 |         b = tf.get_variable(
697 |             name='b',
698 |             shape=[n_output],
699 |             dtype=tf.float32,
700 |             initializer=tf.constant_initializer(0.0))
701 | 
702 |         h = tf.nn.bias_add(
703 |             name='h',
704 |             value=tf.matmul(x, W),
705 |             bias=b)
706 | 
707 |         if activation:
708 |             h = activation(h)
709 | 
710 |         return h, W
711 | 
712 | 
713 | def flatten(x, name=None, reuse=None):
714 |     """Flatten Tensor to 2-dimensions.
715 | 
716 |     Parameters
717 |     ----------
718 |     x : tf.Tensor
719 |         Input tensor to flatten.
720 |     name : None, optional
721 |         Variable scope for flatten operations
722 | 
723 |     Returns
724 |     -------
725 |     flattened : tf.Tensor
726 |         Flattened tensor.
727 |     """
728 |     with tf.variable_scope('flatten'):
729 |         dims = x.get_shape().as_list()
730 |         if len(dims) == 4:
731 |             flattened = tf.reshape(
732 |                 x,
733 |                 shape=[-1, dims[1] * dims[2] * dims[3]])
734 |         elif len(dims) == 2 or len(dims) == 1:
735 |             flattened = x
736 |         else:
737 |             raise ValueError('Expected n dimensions of 1, 2 or 4.  Found:',
738 |                              len(dims))
739 | 
740 |         return flattened
741 | 
742 | 
743 | def to_tensor(x):
744 |     """Convert 2 dim Tensor to a 4 dim Tensor ready for convolution.
745 | 
746 |     Performs the opposite of flatten(x).  If the tensor is already 4-D, this
747 |     returns the same as the input, leaving it unchanged.
748 | 
749 |     Parameters
750 |     ----------
751 |     x : tf.Tesnor
752 |         Input 2-D tensor.  If 4-D already, left unchanged.
753 | 
754 |     Returns
755 |     -------
756 |     x : tf.Tensor
757 |         4-D representation of the input.
758 | 
759 |     Raises
760 |     ------
761 |     ValueError
762 |         If the tensor is not 2D or already 4D.
763 |     """
764 |     if len(x.get_shape()) == 2:
765 |         n_input = x.get_shape().as_list()[1]
766 |         x_dim = np.sqrt(n_input)
767 |         if x_dim == int(x_dim):
768 |             x_dim = int(x_dim)
769 |             x_tensor = tf.reshape(
770 |                 x, [-1, x_dim, x_dim, 1], name='reshape')
771 |         elif np.sqrt(n_input / 3) == int(np.sqrt(n_input / 3)):
772 |             x_dim = int(np.sqrt(n_input / 3))
773 |             x_tensor = tf.reshape(
774 |                 x, [-1, x_dim, x_dim, 3], name='reshape')
775 |         else:
776 |             x_tensor = tf.reshape(
777 |                 x, [-1, 1, 1, n_input], name='reshape')
778 |     elif len(x.get_shape()) == 4:
779 |         x_tensor = x
780 |     else:
781 |         raise ValueError('Unsupported input dimensions')
782 |     return x_tensor
783 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/vae.py:
--------------------------------------------------------------------------------
  1 | """Convolutional/Variational autoencoder, including demonstration of
  2 | training such a network on MNIST, CelebNet and the film, "Sita Sings The Blues"
  3 | using an image pipeline.
  4 | 
  5 | Copyright Parag K. Mital, January 2016
  6 | """
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | import os
 10 | from libs.dataset_utils import create_input_pipeline
 11 | from libs.datasets import CELEB, MNIST
 12 | from libs.batch_norm import batch_norm
 13 | from libs import utils
 14 | 
 15 | 
 16 | def VAE(input_shape=[None, 784],
 17 |         n_filters=[64, 64, 64],
 18 |         filter_sizes=[4, 4, 4],
 19 |         n_hidden=32,
 20 |         n_code=2,
 21 |         activation=tf.nn.tanh,
 22 |         dropout=False,
 23 |         denoising=False,
 24 |         convolutional=False,
 25 |         variational=False):
 26 |     """(Variational) (Convolutional) (Denoising) Autoencoder.
 27 | 
 28 |     Uses tied weights.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     input_shape : list, optional
 33 |         Shape of the input to the network. e.g. for MNIST: [None, 784].
 34 |     n_filters : list, optional
 35 |         Number of filters for each layer.
 36 |         If convolutional=True, this refers to the total number of output
 37 |         filters to create for each layer, with each layer's number of output
 38 |         filters as a list.
 39 |         If convolutional=False, then this refers to the total number of neurons
 40 |         for each layer in a fully connected network.
 41 |     filter_sizes : list, optional
 42 |         Only applied when convolutional=True.  This refers to the ksize (height
 43 |         and width) of each convolutional layer.
 44 |     n_hidden : int, optional
 45 |         Only applied when variational=True.  This refers to the first fully
 46 |         connected layer prior to the variational embedding, directly after
 47 |         the encoding.  After the variational embedding, another fully connected
 48 |         layer is created with the same size prior to decoding.  Set to 0 to
 49 |         not use an additional hidden layer.
 50 |     n_code : int, optional
 51 |         Only applied when variational=True.  This refers to the number of
 52 |         latent Gaussians to sample for creating the inner most encoding.
 53 |     activation : function, optional
 54 |         Activation function to apply to each layer, e.g. tf.nn.relu
 55 |     dropout : bool, optional
 56 |         Whether or not to apply dropout.  If using dropout, you must feed a
 57 |         value for 'keep_prob', as returned in the dictionary.  1.0 means no
 58 |         dropout is used.  0.0 means every connection is dropped.  Sensible
 59 |         values are between 0.5-0.8.
 60 |     denoising : bool, optional
 61 |         Whether or not to apply denoising.  If using denoising, you must feed a
 62 |         value for 'corrupt_prob', as returned in the dictionary.  1.0 means no
 63 |         corruption is used.  0.0 means every feature is corrupted.  Sensible
 64 |         values are between 0.5-0.8.
 65 |     convolutional : bool, optional
 66 |         Whether or not to use a convolutional network or else a fully connected
 67 |         network will be created.  This effects the n_filters parameter's
 68 |         meaning.
 69 |     variational : bool, optional
 70 |         Whether or not to create a variational embedding layer.  This will
 71 |         create a fully connected layer after the encoding, if `n_hidden` is
 72 |         greater than 0, then will create a multivariate gaussian sampling
 73 |         layer, then another fully connected layer.  The size of the fully
 74 |         connected layers are determined by `n_hidden`, and the size of the
 75 |         sampling layer is determined by `n_code`.
 76 | 
 77 |     Returns
 78 |     -------
 79 |     model : dict
 80 |         {
 81 |             'cost': Tensor to optimize.
 82 |             'Ws': All weights of the encoder.
 83 |             'x': Input Placeholder
 84 |             'z': Inner most encoding Tensor (latent features)
 85 |             'y': Reconstruction of the Decoder
 86 |             'keep_prob': Amount to keep when using Dropout
 87 |             'corrupt_prob': Amount to corrupt when using Denoising
 88 |             'train': Set to True when training/Applies to Batch Normalization.
 89 |         }
 90 |     """
 91 |     # network input / placeholders for train (bn) and dropout
 92 |     x = tf.placeholder(tf.float32, input_shape, 'x')
 93 |     phase_train = tf.placeholder(tf.bool, name='phase_train')
 94 |     keep_prob = tf.placeholder(tf.float32, name='keep_prob')
 95 |     corrupt_prob = tf.placeholder(tf.float32, [1])
 96 | 
 97 |     if denoising:
 98 |         current_input = utils.corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)
 99 | 
100 |     # 2d -> 4d if convolution
101 |     x_tensor = utils.to_tensor(x) if convolutional else x
102 |     current_input = x_tensor
103 | 
104 |     Ws = []
105 |     shapes = []
106 | 
107 |     # Build the encoder
108 |     for layer_i, n_output in enumerate(n_filters):
109 |         with tf.variable_scope('encoder/{}'.format(layer_i)):
110 |             shapes.append(current_input.get_shape().as_list())
111 |             if convolutional:
112 |                 h, W = utils.conv2d(x=current_input,
113 |                                     n_output=n_output,
114 |                                     k_h=filter_sizes[layer_i],
115 |                                     k_w=filter_sizes[layer_i])
116 |             else:
117 |                 h, W = utils.linear(x=current_input,
118 |                                     n_output=n_output)
119 |             h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i)))
120 |             if dropout:
121 |                 h = tf.nn.dropout(h, keep_prob)
122 |             Ws.append(W)
123 |             current_input = h
124 | 
125 |     shapes.append(current_input.get_shape().as_list())
126 | 
127 |     with tf.variable_scope('variational'):
128 |         if variational:
129 |             dims = current_input.get_shape().as_list()
130 |             flattened = utils.flatten(current_input)
131 | 
132 |             if n_hidden:
133 |                 h = utils.linear(flattened, n_hidden, name='W_fc')[0]
134 |                 h = activation(batch_norm(h, phase_train, 'fc/bn'))
135 |                 if dropout:
136 |                     h = tf.nn.dropout(h, keep_prob)
137 |             else:
138 |                 h = flattened
139 | 
140 |             z_mu = utils.linear(h, n_code, name='mu')[0]
141 |             z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]
142 | 
143 |             # Sample from noise distribution p(eps) ~ N(0, 1)
144 |             epsilon = tf.random_normal(
145 |                 tf.pack([tf.shape(x)[0], n_code]))
146 | 
147 |             # Sample from posterior
148 |             z = z_mu + tf.mul(epsilon, tf.exp(z_log_sigma))
149 | 
150 |             if n_hidden:
151 |                 h = utils.linear(z, n_hidden, name='fc_t')[0]
152 |                 h = activation(batch_norm(h, phase_train, 'fc_t/bn'))
153 |                 if dropout:
154 |                     h = tf.nn.dropout(h, keep_prob)
155 |             else:
156 |                 h = z
157 | 
158 |             size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
159 |             h = utils.linear(h, size, name='fc_t2')[0]
160 |             current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn'))
161 |             if dropout:
162 |                 current_input = tf.nn.dropout(current_input, keep_prob)
163 | 
164 |             if convolutional:
165 |                 current_input = tf.reshape(
166 |                     current_input, tf.pack([
167 |                         tf.shape(current_input)[0],
168 |                         dims[1],
169 |                         dims[2],
170 |                         dims[3]]))
171 |         else:
172 |             z = current_input
173 | 
174 |     shapes.reverse()
175 |     n_filters.reverse()
176 |     Ws.reverse()
177 | 
178 |     n_filters += [input_shape[-1]]
179 | 
180 |     # %%
181 |     # Decoding layers
182 |     for layer_i, n_output in enumerate(n_filters[1:]):
183 |         with tf.variable_scope('decoder/{}'.format(layer_i)):
184 |             shape = shapes[layer_i + 1]
185 |             if convolutional:
186 |                 h, W = utils.deconv2d(x=current_input,
187 |                                       n_output_h=shape[1],
188 |                                       n_output_w=shape[2],
189 |                                       n_output_ch=shape[3],
190 |                                       n_input_ch=shapes[layer_i][3],
191 |                                       k_h=filter_sizes[layer_i],
192 |                                       k_w=filter_sizes[layer_i])
193 |             else:
194 |                 h, W = utils.linear(x=current_input,
195 |                                     n_output=n_output)
196 |             h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
197 |             if dropout:
198 |                 h = tf.nn.dropout(h, keep_prob)
199 |             current_input = h
200 | 
201 |     y = current_input
202 |     x_flat = utils.flatten(x)
203 |     y_flat = utils.flatten(y)
204 | 
205 |     # l2 loss
206 |     loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1)
207 | 
208 |     if variational:
209 |         # variational lower bound, kl-divergence
210 |         loss_z = -0.5 * tf.reduce_sum(
211 |             1.0 + 2.0 * z_log_sigma -
212 |             tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1)
213 | 
214 |         # add l2 loss
215 |         cost = tf.reduce_mean(loss_x + loss_z)
216 |     else:
217 |         # just optimize l2 loss
218 |         cost = tf.reduce_mean(loss_x)
219 | 
220 |     return {'cost': cost, 'Ws': Ws,
221 |             'x': x, 'z': z, 'y': y,
222 |             'keep_prob': keep_prob,
223 |             'corrupt_prob': corrupt_prob,
224 |             'train': phase_train}
225 | 
226 | 
227 | def train_vae(files,
228 |               input_shape,
229 |               learning_rate=0.0001,
230 |               batch_size=100,
231 |               n_epochs=50,
232 |               n_examples=10,
233 |               crop_shape=[64, 64, 3],
234 |               crop_factor=0.8,
235 |               n_filters=[100, 100, 100, 100],
236 |               n_hidden=256,
237 |               n_code=50,
238 |               convolutional=True,
239 |               variational=True,
240 |               filter_sizes=[3, 3, 3, 3],
241 |               dropout=True,
242 |               keep_prob=0.8,
243 |               activation=tf.nn.relu,
244 |               img_step=100,
245 |               save_step=100,
246 |               ckpt_name="vae.ckpt"):
247 |     """General purpose training of a (Variational) (Convolutional) Autoencoder.
248 | 
249 |     Supply a list of file paths to images, and this will do everything else.
250 | 
251 |     Parameters
252 |     ----------
253 |     files : list of strings
254 |         List of paths to images.
255 |     input_shape : list
256 |         Must define what the input image's shape is.
257 |     learning_rate : float, optional
258 |         Learning rate.
259 |     batch_size : int, optional
260 |         Batch size.
261 |     n_epochs : int, optional
262 |         Number of epochs.
263 |     n_examples : int, optional
264 |         Number of example to use while demonstrating the current training
265 |         iteration's reconstruction.  Creates a square montage, so make
266 |         sure int(sqrt(n_examples))**2 = n_examples, e.g. 16, 25, 36, ... 100.
267 |     crop_shape : list, optional
268 |         Size to centrally crop the image to.
269 |     crop_factor : float, optional
270 |         Resize factor to apply before cropping.
271 |     n_filters : list, optional
272 |         Same as VAE's n_filters.
273 |     n_hidden : int, optional
274 |         Same as VAE's n_hidden.
275 |     n_code : int, optional
276 |         Same as VAE's n_code.
277 |     convolutional : bool, optional
278 |         Use convolution or not.
279 |     variational : bool, optional
280 |         Use variational layer or not.
281 |     filter_sizes : list, optional
282 |         Same as VAE's filter_sizes.
283 |     dropout : bool, optional
284 |         Use dropout or not
285 |     keep_prob : float, optional
286 |         Percent of keep for dropout.
287 |     activation : function, optional
288 |         Which activation function to use.
289 |     img_step : int, optional
290 |         How often to save training images showing the manifold and
291 |         reconstruction.
292 |     save_step : int, optional
293 |         How often to save checkpoints.
294 |     ckpt_name : str, optional
295 |         Checkpoints will be named as this, e.g. 'model.ckpt'
296 |     """
297 |     batch = create_input_pipeline(
298 |         files=files,
299 |         batch_size=batch_size,
300 |         n_epochs=n_epochs,
301 |         crop_shape=crop_shape,
302 |         crop_factor=crop_factor,
303 |         shape=input_shape)
304 | 
305 |     ae = VAE(input_shape=[None] + crop_shape,
306 |              convolutional=convolutional,
307 |              variational=variational,
308 |              n_filters=n_filters,
309 |              n_hidden=n_hidden,
310 |              n_code=n_code,
311 |              dropout=dropout,
312 |              filter_sizes=filter_sizes,
313 |              activation=activation)
314 | 
315 |     # Create a manifold of our inner most layer to show
316 |     # example reconstructions.  This is one way to see
317 |     # what the "embedding" or "latent space" of the encoder
318 |     # is capable of encoding, though note that this is just
319 |     # a random hyperplane within the latent space, and does not
320 |     # encompass all possible embeddings.
321 |     zs = np.random.uniform(
322 |         -1.0, 1.0, [4, n_code]).astype(np.float32)
323 |     zs = utils.make_latent_manifold(zs, n_examples)
324 | 
325 |     optimizer = tf.train.AdamOptimizer(
326 |         learning_rate=learning_rate).minimize(ae['cost'])
327 | 
328 |     # We create a session to use the graph
329 |     sess = tf.Session()
330 |     saver = tf.train.Saver()
331 |     sess.run(tf.initialize_all_variables())
332 | 
333 |     # This will handle our threaded image pipeline
334 |     coord = tf.train.Coordinator()
335 | 
336 |     # Ensure no more changes to graph
337 |     tf.get_default_graph().finalize()
338 | 
339 |     # Start up the queues for handling the image pipeline
340 |     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
341 | 
342 |     if os.path.exists(ckpt_name):
343 |         saver.restore(sess, ckpt_name)
344 | 
345 |     # Fit all training data
346 |     t_i = 0
347 |     batch_i = 0
348 |     epoch_i = 0
349 |     cost = 0
350 |     n_files = len(files)
351 |     test_xs = sess.run(batch) / 255.0
352 |     utils.montage(test_xs, 'test_xs.png')
353 |     try:
354 |         while not coord.should_stop() and epoch_i < n_epochs:
355 |             batch_i += 1
356 |             batch_xs = sess.run(batch) / 255.0
357 |             train_cost = sess.run([ae['cost'], optimizer], feed_dict={
358 |                 ae['x']: batch_xs, ae['train']: True,
359 |                 ae['keep_prob']: keep_prob})[0]
360 |             print(batch_i, train_cost)
361 |             cost += train_cost
362 |             if batch_i % n_files == 0:
363 |                 print('epoch:', epoch_i)
364 |                 print('average cost:', cost / batch_i)
365 |                 cost = 0
366 |                 batch_i = 0
367 |                 epoch_i += 1
368 | 
369 |             if batch_i % img_step == 0:
370 |                 # Plot example reconstructions from latent layer
371 |                 recon = sess.run(
372 |                     ae['y'], feed_dict={
373 |                         ae['z']: zs,
374 |                         ae['train']: False,
375 |                         ae['keep_prob']: 1.0})
376 |                 utils.montage(recon.reshape([-1] + crop_shape),
377 |                               'manifold_%08d.png' % t_i)
378 | 
379 |                 # Plot example reconstructions
380 |                 recon = sess.run(
381 |                     ae['y'], feed_dict={ae['x']: test_xs,
382 |                                         ae['train']: False,
383 |                                         ae['keep_prob']: 1.0})
384 |                 print('reconstruction (min, max, mean):',
385 |                     recon.min(), recon.max(), recon.mean())
386 |                 utils.montage(recon.reshape([-1] + crop_shape),
387 |                               'reconstruction_%08d.png' % t_i)
388 |                 t_i += 1
389 | 
390 |             if batch_i % save_step == 0:
391 |                 # Save the variables to disk.
392 |                 saver.save(sess, "./" + ckpt_name,
393 |                            global_step=batch_i,
394 |                            write_meta_graph=False)
395 |     except tf.errors.OutOfRangeError:
396 |         print('Done.')
397 |     finally:
398 |         # One of the threads has issued an exception.  So let's tell all the
399 |         # threads to shutdown.
400 |         coord.request_stop()
401 | 
402 |     # Wait until all threads have finished.
403 |     coord.join(threads)
404 | 
405 |     # Clean up the session.
406 |     sess.close()
407 | 
408 | 
409 | # %%
410 | def test_mnist():
411 |     """Train an autoencoder on MNIST.
412 | 
413 |     This function will train an autoencoder on MNIST and also
414 |     save many image files during the training process, demonstrating
415 |     the latent space of the inner most dimension of the encoder,
416 |     as well as reconstructions of the decoder.
417 |     """
418 | 
419 |     # load MNIST
420 |     n_code = 2
421 |     mnist = MNIST(split=[0.8, 0.1, 0.1])
422 |     ae = VAE(input_shape=[None, 784], n_filters=[512, 256],
423 |              n_hidden=64, n_code=n_code, activation=tf.nn.sigmoid,
424 |              convolutional=False, variational=True)
425 | 
426 |     n_examples = 100
427 |     zs = np.random.uniform(
428 |         -1.0, 1.0, [4, n_code]).astype(np.float32)
429 |     zs = utils.make_latent_manifold(zs, n_examples)
430 | 
431 |     learning_rate = 0.02
432 |     optimizer = tf.train.AdamOptimizer(
433 |         learning_rate=learning_rate).minimize(ae['cost'])
434 | 
435 |     # We create a session to use the graph
436 |     sess = tf.Session()
437 |     sess.run(tf.initialize_all_variables())
438 | 
439 |     # Fit all training data
440 |     t_i = 0
441 |     batch_i = 0
442 |     batch_size = 200
443 |     n_epochs = 10
444 |     test_xs = mnist.test.images[:n_examples]
445 |     utils.montage(test_xs.reshape((-1, 28, 28)), 'test_xs.png')
446 |     for epoch_i in range(n_epochs):
447 |         train_i = 0
448 |         train_cost = 0
449 |         for batch_xs, _ in mnist.train.next_batch(batch_size):
450 |             train_cost += sess.run([ae['cost'], optimizer], feed_dict={
451 |                 ae['x']: batch_xs, ae['train']: True, ae['keep_prob']: 1.0})[0]
452 |             train_i += 1
453 |             if batch_i % 10 == 0:
454 |                 # Plot example reconstructions from latent layer
455 |                 recon = sess.run(
456 |                     ae['y'], feed_dict={
457 |                         ae['z']: zs,
458 |                         ae['train']: False,
459 |                         ae['keep_prob']: 1.0})
460 |                 m = utils.montage(recon.reshape((-1, 28, 28)),
461 |                     'manifold_%08d.png' % t_i)
462 |                 # Plot example reconstructions
463 |                 recon = sess.run(
464 |                     ae['y'], feed_dict={ae['x']: test_xs,
465 |                                         ae['train']: False,
466 |                                         ae['keep_prob']: 1.0})
467 |                 m = utils.montage(recon.reshape(
468 |                     (-1, 28, 28)), 'reconstruction_%08d.png' % t_i)
469 |                 t_i += 1
470 |             batch_i += 1
471 | 
472 |         valid_i = 0
473 |         valid_cost = 0
474 |         for batch_xs, _ in mnist.valid.next_batch(batch_size):
475 |             valid_cost += sess.run([ae['cost']], feed_dict={
476 |                 ae['x']: batch_xs, ae['train']: False, ae['keep_prob']: 1.0})[0]
477 |             valid_i += 1
478 |         print('train:', train_cost / train_i, 'valid:', valid_cost / valid_i)
479 | 
480 | 
481 | def test_celeb():
482 |     """Train an autoencoder on Celeb Net.
483 |     """
484 |     files = CELEB()
485 |     train_vae(
486 |         files=files,
487 |         input_shape=[218, 178, 3],
488 |         batch_size=100,
489 |         n_epochs=50,
490 |         crop_shape=[64, 64, 3],
491 |         crop_factor=0.8,
492 |         convolutional=True,
493 |         variational=True,
494 |         n_filters=[100, 100, 100],
495 |         n_hidden=250,
496 |         n_code=100,
497 |         dropout=True,
498 |         filter_sizes=[3, 3, 3],
499 |         activation=tf.nn.sigmoid,
500 |         ckpt_name='celeb.ckpt')
501 | 
502 | 
503 | def test_sita():
504 |     """Train an autoencoder on Sita Sings The Blues.
505 |     """
506 |     if not os.path.exists('sita'):
507 |         os.system('wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi')
508 |         os.mkdir('sita')
509 |         os.system('ffmpeg -i Sita_Sings_the_Blues_640x360_XviD.avi -r 60 -f' +
510 |                   ' image2 -s 160x90 sita/sita-%08d.jpg')
511 |     files = [os.path.join('sita', f) for f in os.listdir('sita')]
512 | 
513 |     train_vae(
514 |         files=files,
515 |         input_shape=[90, 160, 3],
516 |         batch_size=100,
517 |         n_epochs=50,
518 |         crop_shape=[90, 160, 3],
519 |         crop_factor=1.0,
520 |         convolutional=True,
521 |         variational=True,
522 |         n_filters=[100, 100, 100],
523 |         n_hidden=250,
524 |         n_code=100,
525 |         dropout=True,
526 |         filter_sizes=[3, 3, 3],
527 |         activation=tf.nn.sigmoid,
528 |         ckpt_name='sita.ckpt')
529 | 
530 | 
531 | if __name__ == '__main__':
532 |     test_celeb()
533 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/vaegan.py:
--------------------------------------------------------------------------------
  1 | """Convolutional/Variational autoencoder, including demonstration of
  2 | training such a network on MNIST, CelebNet and the film, "Sita Sings The Blues"
  3 | using an image pipeline.
  4 | 
  5 | Parag K. Mital, Jan 2016
  6 | """
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | import os
 10 | from libs.dataset_utils import create_input_pipeline
 11 | from libs.datasets import CELEB
 12 | from libs.utils import *
 13 | 
 14 | 
 15 | def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[],
 16 |             convolutional=False, activation=tf.nn.relu,
 17 |             output_activation=tf.nn.sigmoid):
 18 |     """Summary
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     x : TYPE
 23 |         Description
 24 |     n_hidden : None, optional
 25 |         Description
 26 |     dimensions : list, optional
 27 |         Description
 28 |     filter_sizes : list, optional
 29 |         Description
 30 |     convolutional : bool, optional
 31 |         Description
 32 |     activation : TYPE, optional
 33 |         Description
 34 |     output_activation : TYPE, optional
 35 |         Description
 36 | 
 37 |     Returns
 38 |     -------
 39 |     name : TYPE
 40 |         Description
 41 |     """
 42 |     if convolutional:
 43 |         x_tensor = to_tensor(x)
 44 |     else:
 45 |         x_tensor = tf.reshape(
 46 |             tensor=x,
 47 |             shape=[-1, dimensions[0]])
 48 |         dimensions = dimensions[1:]
 49 |     current_input = x_tensor
 50 | 
 51 |     Ws = []
 52 |     hs = []
 53 |     shapes = []
 54 |     for layer_i, n_output in enumerate(dimensions):
 55 |         with tf.variable_scope(str(layer_i)):
 56 |             shapes.append(current_input.get_shape().as_list())
 57 |             if convolutional:
 58 |                 h, W = conv2d(
 59 |                     x=current_input,
 60 |                     n_output=n_output,
 61 |                     k_h=filter_sizes[layer_i],
 62 |                     k_w=filter_sizes[layer_i],
 63 |                     padding='SAME')
 64 |             else:
 65 |                 h, W = linear(
 66 |                     x=current_input,
 67 |                     n_output=n_output)
 68 |             h = activation(h)
 69 |             Ws.append(W)
 70 |             hs.append(h)
 71 | 
 72 |         current_input = h
 73 | 
 74 |     shapes.append(h.get_shape().as_list())
 75 | 
 76 |     with tf.variable_scope('flatten'):
 77 |         flattened = flatten(current_input)
 78 | 
 79 |     with tf.variable_scope('hidden'):
 80 |         if n_hidden:
 81 |             h, W = linear(flattened, n_hidden, name='linear')
 82 |             h = activation(h)
 83 |         else:
 84 |             h = flattened
 85 | 
 86 |     return {'z': h, 'Ws': Ws, 'hs': hs, 'shapes': shapes}
 87 | 
 88 | 
 89 | def decoder(z, shapes, n_hidden=None,
 90 |             dimensions=[], filter_sizes=[],
 91 |             convolutional=False, activation=tf.nn.relu,
 92 |             output_activation=tf.nn.relu):
 93 |     """Summary
 94 | 
 95 |     Parameters
 96 |     ----------
 97 |     z : TYPE
 98 |         Description
 99 |     shapes : TYPE
100 |         Description
101 |     n_hidden : None, optional
102 |         Description
103 |     dimensions : list, optional
104 |         Description
105 |     filter_sizes : list, optional
106 |         Description
107 |     convolutional : bool, optional
108 |         Description
109 |     activation : TYPE, optional
110 |         Description
111 |     output_activation : TYPE, optional
112 |         Description
113 | 
114 |     Returns
115 |     -------
116 |     name : TYPE
117 |         Description
118 |     """
119 |     with tf.variable_scope('hidden/1'):
120 |         if n_hidden:
121 |             h = linear(z, n_hidden, name='linear')[0]
122 |             h = activation(h)
123 |         else:
124 |             h = z
125 | 
126 |     with tf.variable_scope('hidden/2'):
127 |         dims = shapes[0]
128 |         size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
129 |         h = linear(h, size, name='linear')[0]
130 |         current_input = activation(h)
131 |         if convolutional:
132 |             current_input = tf.reshape(
133 |                 current_input,
134 |                 tf.pack([tf.shape(current_input)[0], dims[1], dims[2], dims[3]]))
135 | 
136 |     Ws = []
137 |     hs = []
138 |     for layer_i, n_output in enumerate(dimensions[1:]):
139 |         with tf.variable_scope('decoder/{}'.format(layer_i)):
140 |             if convolutional:
141 |                 shape = shapes[layer_i + 1]
142 |                 h, W = deconv2d(x=current_input,
143 |                                 n_output_h=shape[1],
144 |                                 n_output_w=shape[2],
145 |                                 n_output_ch=shape[3],
146 |                                 n_input_ch=shapes[layer_i][3],
147 |                                 k_h=filter_sizes[layer_i],
148 |                                 k_w=filter_sizes[layer_i])
149 |             else:
150 |                 h, W = linear(x=current_input,
151 |                               n_output=n_output)
152 |             if (layer_i + 1) < len(dimensions):
153 |                 h = activation(h)
154 |             else:
155 |                 h = output_activation(h)
156 |             Ws.append(W)
157 |             hs.append(h)
158 |             current_input = h
159 | 
160 |     z = tf.identity(current_input, name="x_tilde")
161 |     return {'x_tilde': current_input, 'Ws': Ws, 'hs': hs}
162 | 
163 | 
164 | def variational_bayes(h, n_code):
165 |     """Summary
166 | 
167 |     Parameters
168 |     ----------
169 |     h : TYPE
170 |         Description
171 |     n_code : TYPE
172 |         Description
173 | 
174 |     Returns
175 |     -------
176 |     name : TYPE
177 |         Description
178 |     """
179 |     z_mu = tf.nn.tanh(linear(h, n_code, name='mu')[0])
180 |     z_log_sigma = 0.5 * tf.nn.tanh(linear(h, n_code, name='log_sigma')[0])
181 | 
182 |     # Sample from noise distribution p(eps) ~ N(0, 1)
183 |     epsilon = tf.random_normal(tf.pack([tf.shape(h)[0], n_code]))
184 | 
185 |     # Sample from posterior
186 |     z = tf.add(z_mu, tf.mul(epsilon, tf.exp(z_log_sigma)), name='z')
187 |     # -log(p(z)/q(z|x)), bits by coding.
188 |     # variational bound coding costs kl(p(z|x)||q(z|x))
189 |     # d_kl(q(z|x)||p(z))
190 |     loss_z = -0.5 * tf.reduce_sum(
191 |         1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma),
192 |         1)
193 |     return z, z_mu, z_log_sigma, loss_z
194 | 
195 | 
196 | def discriminator(x, convolutional=True,
197 |                   filter_sizes=[5, 5, 5, 5],
198 |                   activation=tf.nn.relu,
199 |                   n_filters=[100, 100, 100, 100]):
200 |     """Summary
201 | 
202 |     Parameters
203 |     ----------
204 |     x : TYPE
205 |         Description
206 |     convolutional : bool, optional
207 |         Description
208 |     filter_sizes : list, optional
209 |         Description
210 |     n_filters : list, optional
211 |         Description
212 | 
213 |     Returns
214 |     -------
215 |     name : TYPE
216 |         Description
217 |     """
218 |     encoding = encoder(x=x,
219 |                        convolutional=convolutional,
220 |                        dimensions=n_filters,
221 |                        filter_sizes=filter_sizes,
222 |                        activation=activation)
223 | 
224 |     # flatten, then linear to 1 value
225 |     res = flatten(encoding['z'], name='flatten')
226 |     if res.get_shape().as_list()[-1] > 1:
227 |         res = linear(res, 1)[0]
228 | 
229 |     return {'logits': res, 'probs': tf.nn.sigmoid(res),
230 |             'Ws': encoding['Ws'], 'hs': encoding['hs']}
231 | 
232 | 
233 | def VAE(input_shape=[None, 784],
234 |         n_filters=[64, 64, 64],
235 |         filter_sizes=[4, 4, 4],
236 |         n_hidden=32,
237 |         n_code=2,
238 |         activation=tf.nn.tanh,
239 |         convolutional=False,
240 |         variational=False):
241 |     """Summary
242 | 
243 |     Parameters
244 |     ----------
245 |     input_shape : list, optional
246 |         Description
247 |     n_filters : list, optional
248 |         Description
249 |     filter_sizes : list, optional
250 |         Description
251 |     n_hidden : int, optional
252 |         Description
253 |     n_code : int, optional
254 |         Description
255 |     activation : TYPE, optional
256 |         Description
257 |     convolutional : bool, optional
258 |         Description
259 |     variational : bool, optional
260 |         Description
261 | 
262 |     Returns
263 |     -------
264 |     name : TYPE
265 |         Description
266 |     """
267 |     # network input / placeholders for train (bn)
268 |     x = tf.placeholder(tf.float32, input_shape, 'x')
269 | 
270 |     with tf.variable_scope('encoder'):
271 |         encoding = encoder(x=x,
272 |                            n_hidden=n_hidden,
273 |                            convolutional=convolutional,
274 |                            dimensions=n_filters,
275 |                            filter_sizes=filter_sizes,
276 |                            activation=activation)
277 | 
278 |     if variational:
279 |         with tf.variable_scope('variational'):
280 |             z, z_mu, z_log_sigma, loss_z = variational_bayes(
281 |                 h=encoding['z'], n_code=n_code)
282 |     else:
283 |         z = encoding['z']
284 |         loss_z = None
285 | 
286 |     shapes = encoding['shapes'].copy()
287 |     shapes.reverse()
288 |     n_filters = n_filters.copy()
289 |     n_filters.reverse()
290 |     n_filters += [input_shape[-1]]
291 | 
292 |     with tf.variable_scope('generator'):
293 |         decoding = decoder(z=z,
294 |                            shapes=shapes,
295 |                            n_hidden=n_hidden,
296 |                            dimensions=n_filters,
297 |                            filter_sizes=filter_sizes,
298 |                            convolutional=convolutional,
299 |                            activation=activation)
300 | 
301 |     x_tilde = decoding['x_tilde']
302 |     x_flat = flatten(x)
303 |     x_tilde_flat = flatten(x_tilde)
304 | 
305 |     # -log(p(x|z))
306 |     loss_x = tf.reduce_sum(tf.squared_difference(x_flat, x_tilde_flat), 1)
307 |     return {'loss_x': loss_x, 'loss_z': loss_z, 'x': x, 'z': z,
308 |             'Ws': encoding['Ws'], 'hs': decoding['hs'],
309 |             'x_tilde': x_tilde}
310 | 
311 | 
312 | def VAEGAN(input_shape=[None, 784],
313 |            n_filters=[64, 64, 64],
314 |            filter_sizes=[4, 4, 4],
315 |            n_hidden=32,
316 |            n_code=2,
317 |            activation=tf.nn.tanh,
318 |            convolutional=False,
319 |            variational=False):
320 |     """Summary
321 | 
322 |     Parameters
323 |     ----------
324 |     input_shape : list, optional
325 |         Description
326 |     n_filters : list, optional
327 |         Description
328 |     filter_sizes : list, optional
329 |         Description
330 |     n_hidden : int, optional
331 |         Description
332 |     n_code : int, optional
333 |         Description
334 |     activation : TYPE, optional
335 |         Description
336 |     convolutional : bool, optional
337 |         Description
338 |     variational : bool, optional
339 |         Description
340 | 
341 |     Returns
342 |     -------
343 |     name : TYPE
344 |         Description
345 |     """
346 |     # network input / placeholders for train (bn)
347 |     x = tf.placeholder(tf.float32, input_shape, 'x')
348 |     z_samp = tf.placeholder(tf.float32, [None, n_code], 'z_samp')
349 | 
350 |     with tf.variable_scope('encoder'):
351 |         encoding = encoder(x=x,
352 |                            n_hidden=n_hidden,
353 |                            convolutional=convolutional,
354 |                            dimensions=n_filters,
355 |                            filter_sizes=filter_sizes,
356 |                            activation=activation)
357 | 
358 |         with tf.variable_scope('variational'):
359 |             z, z_mu, z_log_sigma, loss_z = variational_bayes(
360 |                 h=encoding['z'], n_code=n_code)
361 | 
362 |     shapes = encoding['shapes'].copy()
363 |     shapes.reverse()
364 |     n_filters_decoder = n_filters.copy()
365 |     n_filters_decoder.reverse()
366 |     n_filters_decoder += [input_shape[-1]]
367 | 
368 |     with tf.variable_scope('generator'):
369 |         decoding_actual = decoder(z=z,
370 |                                   shapes=shapes,
371 |                                   n_hidden=n_hidden,
372 |                                   convolutional=convolutional,
373 |                                   dimensions=n_filters_decoder,
374 |                                   filter_sizes=filter_sizes,
375 |                                   activation=activation)
376 | 
377 |     with tf.variable_scope('generator', reuse=True):
378 |         decoding_sampled = decoder(z=z_samp,
379 |                                    shapes=shapes,
380 |                                    n_hidden=n_hidden,
381 |                                    convolutional=convolutional,
382 |                                    dimensions=n_filters_decoder,
383 |                                    filter_sizes=filter_sizes,
384 |                                    activation=activation)
385 | 
386 |     with tf.variable_scope('discriminator'):
387 |         D_real = discriminator(x,
388 |                                filter_sizes=filter_sizes,
389 |                                n_filters=n_filters,
390 |                                activation=activation)
391 | 
392 |     with tf.variable_scope('discriminator', reuse=True):
393 |         D_fake = discriminator(decoding_actual['x_tilde'],
394 |                                filter_sizes=filter_sizes,
395 |                                n_filters=n_filters,
396 |                                activation=activation)
397 | 
398 |     with tf.variable_scope('discriminator', reuse=True):
399 |         D_samp = discriminator(decoding_sampled['x_tilde'],
400 |                                filter_sizes=filter_sizes,
401 |                                n_filters=n_filters,
402 |                                activation=activation)
403 | 
404 |     with tf.variable_scope('loss'):
405 |         # Weights influence of content/style of decoder
406 |         gamma = tf.placeholder(tf.float32, name='gamma')
407 | 
408 |         # Discriminator_l Log Likelihood Loss
409 |         loss_D_llike = 0
410 |         for h_fake, h_real in zip(D_fake['hs'][3:], D_real['hs'][3:]):
411 |             loss_D_llike += tf.reduce_sum(
412 |                 0.5 * tf.squared_difference(
413 |                     flatten(h_fake), flatten(h_real)), 1)
414 | 
415 |         # GAN Loss
416 |         eps = 1e-12
417 |         loss_real = tf.reduce_sum(tf.log(D_real['probs'] + eps), 1)
418 |         loss_fake = tf.reduce_sum(tf.log(1 - D_fake['probs'] + eps), 1)
419 |         loss_samp = tf.reduce_sum(tf.log(1 - D_samp['probs'] + eps), 1)
420 | 
421 |         loss_GAN = (loss_real + loss_fake + loss_samp) / 3.0
422 | 
423 |         loss_enc = tf.reduce_mean(loss_z + loss_D_llike)
424 |         loss_gen = tf.reduce_mean(gamma * loss_D_llike - loss_GAN)
425 |         loss_dis = -tf.reduce_mean(loss_GAN)
426 | 
427 |     return {'x': x, 'z': z, 'x_tilde': decoding_actual['x_tilde'],
428 |             'z_samp': z_samp, 'x_tilde_samp': decoding_sampled['x_tilde'],
429 |             'loss_real': loss_real, 'loss_fake': loss_fake, 'loss_samp': loss_samp,
430 |             'loss_GAN': loss_GAN, 'loss_D_llike': loss_D_llike,
431 |             'loss_enc': loss_enc, 'loss_gen': loss_gen, 'loss_dis': loss_dis,
432 |             'gamma': gamma}
433 | 
434 | 
435 | def train_vaegan(files,
436 |                  learning_rate=0.00001,
437 |                  batch_size=64,
438 |                  n_epochs=250,
439 |                  n_examples=10,
440 |                  input_shape=[218, 178, 3],
441 |                  crop_shape=[64, 64, 3],
442 |                  crop_factor=0.8,
443 |                  n_filters=[100, 100, 100, 100],
444 |                  n_hidden=None,
445 |                  n_code=128,
446 |                  convolutional=True,
447 |                  variational=True,
448 |                  filter_sizes=[3, 3, 3, 3],
449 |                  activation=tf.nn.elu,
450 |                  ckpt_name="vaegan.ckpt"):
451 |     """Summary
452 | 
453 |     Parameters
454 |     ----------
455 |     files : TYPE
456 |         Description
457 |     learning_rate : float, optional
458 |         Description
459 |     batch_size : int, optional
460 |         Description
461 |     n_epochs : int, optional
462 |         Description
463 |     n_examples : int, optional
464 |         Description
465 |     input_shape : list, optional
466 |         Description
467 |     crop_shape : list, optional
468 |         Description
469 |     crop_factor : float, optional
470 |         Description
471 |     n_filters : list, optional
472 |         Description
473 |     n_hidden : int, optional
474 |         Description
475 |     n_code : int, optional
476 |         Description
477 |     convolutional : bool, optional
478 |         Description
479 |     variational : bool, optional
480 |         Description
481 |     filter_sizes : list, optional
482 |         Description
483 |     activation : TYPE, optional
484 |         Description
485 |     ckpt_name : str, optional
486 |         Description
487 | 
488 |     Returns
489 |     -------
490 |     name : TYPE
491 |         Description
492 |     """
493 | 
494 |     ae = VAEGAN(input_shape=[None] + crop_shape,
495 |                 convolutional=convolutional,
496 |                 variational=variational,
497 |                 n_filters=n_filters,
498 |                 n_hidden=n_hidden,
499 |                 n_code=n_code,
500 |                 filter_sizes=filter_sizes,
501 |                 activation=activation)
502 | 
503 |     batch = create_input_pipeline(
504 |         files=files,
505 |         batch_size=batch_size,
506 |         n_epochs=n_epochs,
507 |         crop_shape=crop_shape,
508 |         crop_factor=crop_factor,
509 |         shape=input_shape)
510 | 
511 |     zs = np.random.randn(4, n_code).astype(np.float32)
512 |     zs = make_latent_manifold(zs, n_examples)
513 | 
514 |     opt_enc = tf.train.AdamOptimizer(
515 |         learning_rate=learning_rate).minimize(
516 |         ae['loss_enc'],
517 |         var_list=[var_i for var_i in tf.trainable_variables()
518 |                   if var_i.name.startswith('encoder')])
519 | 
520 |     opt_gen = tf.train.AdamOptimizer(
521 |         learning_rate=learning_rate).minimize(
522 |         ae['loss_gen'],
523 |         var_list=[var_i for var_i in tf.trainable_variables()
524 |                   if var_i.name.startswith('generator')])
525 | 
526 |     opt_dis = tf.train.AdamOptimizer(
527 |         learning_rate=learning_rate).minimize(
528 |         ae['loss_dis'],
529 |         var_list=[var_i for var_i in tf.trainable_variables()
530 |                   if var_i.name.startswith('discriminator')])
531 | 
532 |     sess = tf.Session()
533 |     saver = tf.train.Saver()
534 |     sess.run(tf.initialize_all_variables())
535 |     coord = tf.train.Coordinator()
536 |     tf.get_default_graph().finalize()
537 |     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
538 | 
539 |     if os.path.exists(ckpt_name):
540 |         saver.restore(sess, ckpt_name)
541 |         print("VAE model restored.")
542 | 
543 |     t_i = 0
544 |     batch_i = 0
545 |     epoch_i = 0
546 | 
547 |     equilibrium = 0.693
548 |     margin = 0.4
549 | 
550 |     n_files = len(files)
551 |     test_xs = sess.run(batch) / 255.0
552 |     montage(test_xs, 'test_xs.png')
553 |     try:
554 |         while not coord.should_stop() or epoch_i < n_epochs:
555 |             if batch_i % (n_files // batch_size) == 0:
556 |                 batch_i = 0
557 |                 epoch_i += 1
558 |                 print('---------- EPOCH:', epoch_i)
559 | 
560 |             batch_i += 1
561 |             batch_xs = sess.run(batch) / 255.0
562 |             batch_zs = np.random.randn(batch_size, n_code).astype(np.float32)
563 |             real_cost, fake_cost, _ = sess.run([
564 |                 ae['loss_real'], ae['loss_fake'], opt_enc],
565 |                 feed_dict={
566 |                     ae['x']: batch_xs,
567 |                     ae['gamma']: 0.5})
568 |             real_cost = -np.mean(real_cost)
569 |             fake_cost = -np.mean(fake_cost)
570 |             print('real:', real_cost, '/ fake:', fake_cost)
571 | 
572 |             gen_update = True
573 |             dis_update = True
574 | 
575 |             if real_cost > (equilibrium + margin) or \
576 |                fake_cost > (equilibrium + margin):
577 |                 gen_update = False
578 | 
579 |             if real_cost < (equilibrium - margin) or \
580 |                fake_cost < (equilibrium - margin):
581 |                 dis_update = False
582 | 
583 |             if not (gen_update or dis_update):
584 |                 gen_update = True
585 |                 dis_update = True
586 | 
587 |             if gen_update:
588 |                 sess.run(opt_gen, feed_dict={
589 |                     ae['x']: batch_xs,
590 |                     ae['z_samp']: batch_zs,
591 |                     ae['gamma']: 0.5})
592 |             if dis_update:
593 |                 sess.run(opt_dis, feed_dict={
594 |                     ae['x']: batch_xs,
595 |                     ae['z_samp']: batch_zs,
596 |                     ae['gamma']: 0.5})
597 | 
598 |             if batch_i % 50 == 0:
599 | 
600 |                 # Plot example reconstructions from latent layer
601 |                 recon = sess.run(
602 |                     ae['x_tilde'], feed_dict={
603 |                         ae['z']: zs})
604 |                 print('recon:', recon.min(), recon.max())
605 |                 recon = np.clip(recon / recon.max(), 0, 1)
606 |                 montage(recon.reshape([-1] + crop_shape),
607 |                         'imgs/manifold_%08d.png' % t_i)
608 | 
609 |                 # Plot example reconstructions
610 |                 recon = sess.run(
611 |                     ae['x_tilde'], feed_dict={
612 |                         ae['x']: test_xs})
613 |                 print('recon:', recon.min(), recon.max())
614 |                 recon = np.clip(recon / recon.max(), 0, 1)
615 |                 montage(recon.reshape([-1] + crop_shape),
616 |                         'imgs/reconstruction_%08d.png' % t_i)
617 |                 t_i += 1
618 | 
619 |             if batch_i % 100 == 0:
620 |                 # Save the variables to disk.
621 |                 save_path = saver.save(sess, "./" + ckpt_name,
622 |                                        global_step=batch_i,
623 |                                        write_meta_graph=False)
624 |                 print("Model saved in file: %s" % save_path)
625 |     except tf.errors.OutOfRangeError:
626 |         print('Done training -- epoch limit reached')
627 |     finally:
628 |         # One of the threads has issued an exception.  So let's tell all the
629 |         # threads to shutdown.
630 |         coord.request_stop()
631 | 
632 |     # Wait until all threads have finished.
633 |     coord.join(threads)
634 | 
635 |     # Clean up the session.
636 |     sess.close()
637 | 
638 | 
639 | def test_celeb():
640 |     """Summary
641 | 
642 |     Returns
643 |     -------
644 |     name : TYPE
645 |         Description
646 |     """
647 |     files = CELEB()
648 |     train_vaegan(
649 |         files=files,
650 |         batch_size=64,
651 |         n_epochs=100,
652 |         crop_shape=[100, 100, 3],
653 |         crop_factor=0.8,
654 |         input_shape=[218, 178, 3],
655 |         convolutional=True,
656 |         variational=True,
657 |         n_filters=[256, 384, 512, 1024, 2048],
658 |         n_hidden=None,
659 |         n_code=512,
660 |         filter_sizes=[3, 3, 3, 3, 3],
661 |         activation=tf.nn.elu,
662 |         ckpt_name='celeb.ckpt')
663 | 
664 | 
665 | def test_sita():
666 |     """Summary
667 | 
668 |     Returns
669 |     -------
670 |     name : TYPE
671 |         Description
672 |     """
673 |     if not os.path.exists('sita'):
674 |         os.system('wget http://ossguy.com/sita/Sita_Sings_the_Blues_640x360_XviD.avi')
675 |         os.mkdir('sita')
676 |         os.system('ffmpeg -i Sita_Sings_the_Blues_640x360_XviD.avi -r 60 -f' +
677 |                   ' image2 -s 160x90 sita/sita-%08d.jpg')
678 |     files = [os.path.join('sita', f) for f in os.listdir('sita')]
679 | 
680 |     train_vaegan(
681 |         files=files,
682 |         batch_size=64,
683 |         n_epochs=50,
684 |         crop_shape=[90, 160, 3],
685 |         crop_factor=1.0,
686 |         input_shape=[218, 178, 3],
687 |         convolutional=True,
688 |         variational=True,
689 |         n_filters=[100, 100, 100, 100, 100],
690 |         n_hidden=250,
691 |         n_code=100,
692 |         filter_sizes=[3, 3, 3, 3, 2],
693 |         activation=tf.nn.elu,
694 |         ckpt_name='sita.ckpt')
695 | 
696 | 
697 | if __name__ == '__main__':
698 |     test_celeb()
699 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/libs/vgg16.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Creative Applications of Deep Learning w/ Tensorflow.
  3 | Kadenze, Inc.
  4 | Copyright Parag K. Mital, June 2016.
  5 | """
  6 | import tensorflow as tf
  7 | import os
  8 | import json
  9 | import numpy as np
 10 | import matplotlib.pyplot as plt
 11 | from skimage.transform import resize as imresize
 12 | from .utils import download
 13 | 
 14 | 
 15 | def get_vgg_face_model():
 16 |     download('https://s3.amazonaws.com/cadl/models/vgg_face.tfmodel')
 17 |     with open("vgg_face.tfmodel", mode='rb') as f:
 18 |         graph_def = tf.GraphDef()
 19 |         try:
 20 |             graph_def.ParseFromString(f.read())
 21 |         except:
 22 |             print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ' +
 23 |                   'to environment.  e.g.:\n' +
 24 |                   'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' +
 25 |                   'See here for info: ' +
 26 |                   'https://github.com/tensorflow/tensorflow/issues/582')
 27 | 
 28 |     download('https://s3.amazonaws.com/cadl/models/vgg_face.json')
 29 |     labels = json.load(open('vgg_face.json'))
 30 | 
 31 |     return {
 32 |         'graph_def': graph_def,
 33 |         'labels': labels,
 34 |         'preprocess': preprocess,
 35 |         'deprocess': deprocess
 36 |     }
 37 | 
 38 | 
 39 | def get_vgg_model():
 40 |     download('https://s3.amazonaws.com/cadl/models/vgg16.tfmodel')
 41 |     with open("vgg16.tfmodel", mode='rb') as f:
 42 |         graph_def = tf.GraphDef()
 43 |         try:
 44 |             graph_def.ParseFromString(f.read())
 45 |         except:
 46 |             print('try adding PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ' +
 47 |                   'to environment.  e.g.:\n' +
 48 |                   'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python ipython\n' +
 49 |                   'See here for info: ' +
 50 |                   'https://github.com/tensorflow/tensorflow/issues/582')
 51 | 
 52 |     download('https://s3.amazonaws.com/cadl/models/synset.txt')
 53 |     with open('synset.txt') as f:
 54 |         labels = [(idx, l.strip()) for idx, l in enumerate(f.readlines())]
 55 | 
 56 |     return {
 57 |         'graph_def': graph_def,
 58 |         'labels': labels,
 59 |         'preprocess': preprocess,
 60 |         'deprocess': deprocess
 61 |     }
 62 | 
 63 | 
 64 | def preprocess(img, crop=True, resize=True, dsize=(224, 224)):
 65 |     if img.dtype == np.uint8:
 66 |         img = img / 255.0
 67 | 
 68 |     if crop:
 69 |         short_edge = min(img.shape[:2])
 70 |         yy = int((img.shape[0] - short_edge) / 2)
 71 |         xx = int((img.shape[1] - short_edge) / 2)
 72 |         crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
 73 |     else:
 74 |         crop_img = img
 75 | 
 76 |     if resize:
 77 |         norm_img = imresize(crop_img, dsize, preserve_range=True)
 78 |     else:
 79 |         norm_img = crop_img
 80 | 
 81 |     return (norm_img).astype(np.float32)
 82 | 
 83 | 
 84 | def deprocess(img):
 85 |     return np.clip(img * 255, 0, 255).astype(np.uint8)
 86 |     # return ((img / np.max(np.abs(img))) * 127.5 +
 87 |     #         127.5).astype(np.uint8)
 88 | 
 89 | 
 90 | def test_vgg():
 91 |     """Loads the VGG network and applies it to a test image.
 92 |     """
 93 |     with tf.Session() as sess:
 94 |         net = get_vgg_model()
 95 |         tf.import_graph_def(net['graph_def'], name='vgg')
 96 |         g = tf.get_default_graph()
 97 |         names = [op.name for op in g.get_operations()]
 98 |         input_name = names[0] + ':0'
 99 |         x = g.get_tensor_by_name(input_name)
100 |         softmax = g.get_tensor_by_name(names[-2] + ':0')
101 | 
102 |         og = plt.imread('bosch.png')
103 |         img = preprocess(og)[np.newaxis, ...]
104 |         res = np.squeeze(softmax.eval(feed_dict={
105 |             x: img,
106 |             'vgg/dropout_1/random_uniform:0': [[1.0]],
107 |             'vgg/dropout/random_uniform:0': [[1.0]]}))
108 |         print([(res[idx], net['labels'][idx])
109 |                for idx in res.argsort()[-5:][::-1]])
110 | 
111 |         """Let's visualize the network's gradient activation
112 |         when backpropagated to the original input image.  This
113 |         is effectively telling us which pixels contribute to the
114 |         predicted class or given neuron"""
115 |         features = [name for name in names if 'BiasAdd' in name.split()[-1]]
116 |         from math import sqrt, ceil
117 |         n_plots = ceil(sqrt(len(features) + 1))
118 |         fig, axs = plt.subplots(n_plots, n_plots)
119 |         plot_i = 0
120 |         axs[0][0].imshow(img[0])
121 |         for feature_i, featurename in enumerate(features):
122 |             plot_i += 1
123 |             feature = g.get_tensor_by_name(featurename + ':0')
124 |             neuron = tf.reduce_max(feature, 1)
125 |             saliency = tf.gradients(tf.reduce_sum(neuron), x)
126 |             neuron_idx = tf.arg_max(feature, 1)
127 |             this_res = sess.run([saliency[0], neuron_idx], feed_dict={
128 |                 x: img,
129 |                 'vgg/dropout_1/random_uniform:0': [[1.0]],
130 |                 'vgg/dropout/random_uniform:0': [[1.0]]})
131 | 
132 |             grad = this_res[0][0] / np.max(np.abs(this_res[0]))
133 |             ax = axs[plot_i // n_plots][plot_i % n_plots]
134 |             ax.imshow((grad * 127.5 + 127.5).astype(np.uint8))
135 |             ax.set_title(featurename)
136 | 
137 |         """Deep Dreaming takes the backpropagated gradient activations
138 |         and simply adds it to the image, running the same process again
139 |         and again in a loop.  There are many tricks one can add to this
140 |         idea, such as infinitely zooming into the image by cropping and
141 |         scaling, adding jitter by randomly moving the image around, or
142 |         adding constraints on the total activations."""
143 |         og = plt.imread('street.png')
144 |         crop = 2
145 |         img = preprocess(og)[np.newaxis, ...]
146 |         layer = g.get_tensor_by_name(features[3] + ':0')
147 |         n_els = layer.get_shape().as_list()[1]
148 |         neuron_i = np.random.randint(1000)
149 |         layer_vec = np.zeros((1, n_els))
150 |         layer_vec[0, neuron_i] = 1
151 |         neuron = tf.reduce_max(layer, 1)
152 |         saliency = tf.gradients(tf.reduce_sum(neuron), x)
153 |         for it_i in range(3):
154 |             print(it_i)
155 |             this_res = sess.run(saliency[0], feed_dict={
156 |                 x: img,
157 |                 layer: layer_vec,
158 |                 'vgg/dropout_1/random_uniform:0': [[1.0]],
159 |                 'vgg/dropout/random_uniform:0': [[1.0]]})
160 |             grad = this_res[0] / np.mean(np.abs(grad))
161 |             img = img[:, crop:-crop - 1, crop:-crop - 1, :]
162 |             img = imresize(img[0], (224, 224))[np.newaxis]
163 |             img += grad
164 |         plt.imshow(deprocess(img[0]))
165 | 
166 | 
167 | def test_vgg_face():
168 |     """Loads the VGG network and applies it to a test image.
169 |     """
170 |     with tf.Session() as sess:
171 |         net = get_vgg_face_model()
172 |         x = tf.placeholder(tf.float32, [1, 224, 224, 3], name='x')
173 |         tf.import_graph_def(net['graph_def'], name='vgg',
174 |                             input_map={'Placeholder:0': x})
175 |         g = tf.get_default_graph()
176 |         names = [op.name for op in g.get_operations()]
177 | 
178 |         og = plt.imread('bricks.png')[..., :3]
179 |         img = preprocess(og)[np.newaxis, ...]
180 |         plt.imshow(img[0])
181 |         plt.show()
182 | 
183 |         """Let's visualize the network's gradient activation
184 |         when backpropagated to the original input image.  This
185 |         is effectively telling us which pixels contribute to the
186 |         predicted class or given neuron"""
187 |         features = [name for name in names if 'BiasAdd' in name.split()[-1]]
188 |         from math import sqrt, ceil
189 |         n_plots = ceil(sqrt(len(features) + 1))
190 |         fig, axs = plt.subplots(n_plots, n_plots)
191 |         plot_i = 0
192 |         axs[0][0].imshow(img[0])
193 |         for feature_i, featurename in enumerate(features):
194 |             plot_i += 1
195 |             feature = g.get_tensor_by_name(featurename + ':0')
196 |             neuron = tf.reduce_max(feature, 1)
197 |             saliency = tf.gradients(tf.reduce_sum(neuron), x)
198 |             neuron_idx = tf.arg_max(feature, 1)
199 |             this_res = sess.run([saliency[0], neuron_idx], feed_dict={x: img})
200 | 
201 |             grad = this_res[0][0] / np.max(np.abs(this_res[0]))
202 |             ax = axs[plot_i // n_plots][plot_i % n_plots]
203 |             ax.imshow((grad * 127.5 + 127.5).astype(np.uint8))
204 |             ax.set_title(featurename)
205 |             plt.waitforbuttonpress()
206 | 
207 |         """Deep Dreaming takes the backpropagated gradient activations
208 |         and simply adds it to the image, running the same process again
209 |         and again in a loop.  There are many tricks one can add to this
210 |         idea, such as infinitely zooming into the image by cropping and
211 |         scaling, adding jitter by randomly moving the image around, or
212 |         adding constraints on the total activations."""
213 |         og = plt.imread('street.png')
214 |         crop = 2
215 |         img = preprocess(og)[np.newaxis, ...]
216 |         layer = g.get_tensor_by_name(features[3] + ':0')
217 |         n_els = layer.get_shape().as_list()[1]
218 |         neuron_i = np.random.randint(1000)
219 |         layer_vec = np.zeros((1, n_els))
220 |         layer_vec[0, neuron_i] = 1
221 |         neuron = tf.reduce_max(layer, 1)
222 |         saliency = tf.gradients(tf.reduce_sum(neuron), x)
223 |         for it_i in range(3):
224 |             print(it_i)
225 |             this_res = sess.run(saliency[0], feed_dict={
226 |                 x: img,
227 |                 layer: layer_vec,
228 |                 'vgg/dropout_1/random_uniform:0': [[1.0]],
229 |                 'vgg/dropout/random_uniform:0': [[1.0]]})
230 |             grad = this_res[0] / np.mean(np.abs(grad))
231 |             img = img[:, crop:-crop - 1, crop:-crop - 1, :]
232 |             img = imresize(img[0], (224, 224))[np.newaxis]
233 |             img += grad
234 |         plt.imshow(deprocess(img[0]))
235 | 
236 | if __name__ == '__main__':
237 |     test_vgg_face()
238 | 


--------------------------------------------------------------------------------
/Zero-shot Classification by Deep Learning/testclasses_akata.txt:
--------------------------------------------------------------------------------
 1 | 001.Black_footed_Albatross
 2 | 004.Groove_billed_Ani
 3 | 006.Least_Auklet
 4 | 008.Rhinoceros_Auklet
 5 | 009.Brewer_Blackbird
 6 | 014.Indigo_Bunting
 7 | 023.Brandt_Cormorant
 8 | 029.American_Crow
 9 | 031.Black_billed_Cuckoo
10 | 033.Yellow_billed_Cuckoo
11 | 034.Gray_crowned_Rosy_Finch
12 | 035.Purple_Finch
13 | 036.Northern_Flicker
14 | 037.Acadian_Flycatcher
15 | 038.Great_Crested_Flycatcher
16 | 043.Yellow_bellied_Flycatcher
17 | 049.Boat_tailed_Grackle
18 | 051.Horned_Grebe
19 | 053.Western_Grebe
20 | 066.Western_Gull
21 | 072.Pomarine_Jaeger
22 | 079.Belted_Kingfisher
23 | 083.White_breasted_Kingfisher
24 | 084.Red_legged_Kittiwake
25 | 086.Pacific_Loon
26 | 091.Mockingbird
27 | 095.Baltimore_Oriole
28 | 096.Hooded_Oriole
29 | 098.Scott_Oriole
30 | 101.White_Pelican
31 | 102.Western_Wood_Pewee
32 | 103.Sayornis
33 | 112.Great_Grey_Shrike
34 | 114.Black_throated_Sparrow
35 | 119.Field_Sparrow
36 | 121.Grasshopper_Sparrow
37 | 130.Tree_Sparrow
38 | 135.Bank_Swallow
39 | 138.Tree_Swallow
40 | 147.Least_Tern
41 | 156.White_eyed_Vireo
42 | 163.Cape_May_Warbler
43 | 165.Chestnut_sided_Warbler
44 | 166.Golden_winged_Warbler
45 | 180.Wilson_Warbler
46 | 183.Northern_Waterthrush
47 | 185.Bohemian_Waxwing
48 | 186.Cedar_Waxwing
49 | 187.American_Three_toed_Woodpecker
50 | 197.Marsh_Wren
51 | 


--------------------------------------------------------------------------------