├── README.md ├── cs20si ├── vis │ ├── test_1.png │ ├── test_10.png │ ├── test_2.png │ ├── test_3.png │ ├── test_4.png │ ├── test_5.png │ ├── test_6.png │ ├── test_7.png │ ├── test_8.png │ └── test_9.png ├── layer_utils.py ├── autoencoder_complete.py ├── autoencoder.py ├── utils.py ├── train.py ├── layers.py └── layers_complete.py ├── cs224n ├── word2vec │ ├── tsne.png │ ├── word2vec.py │ ├── utils.py │ ├── get_data.py │ └── word2vec_complete.py └── linear_regression │ ├── plot.png │ ├── linear_regression.py │ └── linear_regression_complete.py └── .gitignore /README.md: -------------------------------------------------------------------------------- 1 | # tensorflow_tutorials 2 | Code for TF tutorials (CS20SI and CS224N) 3 | -------------------------------------------------------------------------------- /cs20si/vis/test_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_1.png -------------------------------------------------------------------------------- /cs20si/vis/test_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_10.png -------------------------------------------------------------------------------- /cs20si/vis/test_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_2.png -------------------------------------------------------------------------------- /cs20si/vis/test_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_3.png -------------------------------------------------------------------------------- /cs20si/vis/test_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_4.png -------------------------------------------------------------------------------- /cs20si/vis/test_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_5.png -------------------------------------------------------------------------------- /cs20si/vis/test_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_6.png -------------------------------------------------------------------------------- /cs20si/vis/test_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_7.png -------------------------------------------------------------------------------- /cs20si/vis/test_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_8.png -------------------------------------------------------------------------------- /cs20si/vis/test_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs20si/vis/test_9.png -------------------------------------------------------------------------------- /cs224n/word2vec/tsne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs224n/word2vec/tsne.png -------------------------------------------------------------------------------- /cs224n/linear_regression/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishithbsk/tensorflow_tutorials/HEAD/cs224n/linear_regression/plot.png -------------------------------------------------------------------------------- /cs20si/layer_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def get_deconv2d_output_dims(input_dims, filter_dims, stride_dims, padding): 4 | # Returns the height and width of the output of a deconvolution layer. 5 | batch_size, input_h, input_w, num_channels_in = input_dims 6 | filter_h, filter_w, num_channels_out = filter_dims 7 | stride_h, stride_w = stride_dims 8 | 9 | # Compute the height in the output, based on the padding. 10 | if padding == 'SAME': 11 | out_h = input_h * stride_h 12 | elif padding == 'VALID': 13 | out_h = (input_h - 1) * stride_h + filter_h 14 | 15 | # Compute the width in the output, based on the padding. 16 | if padding == 'SAME': 17 | out_w = input_w * stride_w 18 | elif padding == 'VALID': 19 | out_w = (input_w - 1) * stride_w + filter_w 20 | 21 | return [batch_size, out_h, out_w, num_channels_out] 22 | -------------------------------------------------------------------------------- /cs224n/linear_regression/linear_regression.py: -------------------------------------------------------------------------------- 1 | # linear regression 2 | # Author: Nishith Khandwala (nishith@stanford.edu) 3 | # Adapted from https://github.com/hans/ipython-notebooks/ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import matplotlib 12 | matplotlib.use('TKAgg') 13 | from matplotlib import pyplot as plt 14 | 15 | ''' 16 | Good ole linear regression: find the best linear fit to our data 17 | ''' 18 | 19 | def generate_dataset(): 20 | # data is generated by y = 2x + e 21 | # where 'e' is sampled from a normal distribution 22 | x_batch = np.linspace(-1, 1, 101) 23 | y_batch = 2 * x_batch + np.random.randn(*x_batch.shape) * 0.3 24 | return x_batch, y_batch 25 | 26 | def linear_regression(): 27 | return NotImplementedError 28 | 29 | def run(): 30 | pass 31 | x_batch, y_batch = generate_dataset() 32 | 33 | plt.figure(1) 34 | plt.scatter(x_batch, y_batch) 35 | plt.plot(x_batch, y_pred_batch) 36 | plt.savefig('plot.png') 37 | 38 | if __name__ == '__main__': 39 | run() 40 | -------------------------------------------------------------------------------- /cs20si/autoencoder_complete.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from layers import * 4 | 5 | def encoder(input): 6 | conv1 = conv(input, 'conv1', [3, 3, 1], [2, 2]) 7 | conv2 = conv(conv1, 'conv2', [3, 3, 8], [2, 2]) 8 | conv3 = conv(conv2, 'conv3', [3, 3, 8], [2, 2]) 9 | fc_enc = fc(conv3, 'fc_enc', 100, non_linear_fn=None) 10 | return fc_enc 11 | 12 | def decoder(input): 13 | fc_dec = fc(input, 'fc_dec', 128) 14 | fc_dec_reshaped = tf.reshape(fc_dec, [-1, 4, 4, 8]) 15 | deconv1 = deconv(fc_dec_reshaped, 'deconv1', [3, 3, 8], [2, 2]) 16 | deconv2 = deconv(deconv1, 'deconv2', [8, 8, 1], [2, 2], padding='VALID') 17 | deconv3 = deconv(deconv2, 'deconv3', [7, 7, 1], [1, 1], padding='VALID', 18 | non_linear_fn=tf.sigmoid) 19 | return deconv3 20 | 21 | def autoencoder(input_shape): 22 | input_image = tf.placeholder(tf.float32, 23 | input_shape, 24 | name='input_image') 25 | 26 | with tf.variable_scope('autoencoder') as scope: 27 | encoding = encoder(input_image) 28 | reconstructed_image = decoder(encoding) 29 | return input_image, reconstructed_image 30 | 31 | -------------------------------------------------------------------------------- /cs20si/autoencoder.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from layers import * 4 | 5 | def encoder(input): 6 | # Create a conv network with 3 conv layers and 1 FC layer 7 | # Conv 1: filter: [3, 3, 1], stride: [2, 2], relu 8 | 9 | # Conv 2: filter: [3, 3, 8], stride: [2, 2], relu 10 | 11 | # Conv 3: filter: [3, 3, 8], stride: [2, 2], relu 12 | 13 | # FC: output_dim: 100, no non-linearity 14 | raise NotImplementedError 15 | 16 | def decoder(input): 17 | # Create a deconv network with 1 FC layer and 3 deconv layers 18 | # FC: output dim: 128, relu 19 | 20 | # Reshape to [batch_size, 4, 4, 8] 21 | 22 | # Deconv 1: filter: [3, 3, 8], stride: [2, 2], relu 23 | 24 | # Deconv 2: filter: [8, 8, 1], stride: [2, 2], padding: valid, relu 25 | 26 | # Deconv 3: filter: [7, 7, 1], stride: [1, 1], padding: valid, sigmoid 27 | raise NotImplementedError 28 | 29 | def autoencoder(input_shape): 30 | # Define place holder with input shape 31 | 32 | # Define variable scope for autoencoder 33 | with tf.variable_scope('autoencoder') as scope: 34 | # Pass input to encoder to obtain encoding 35 | 36 | # Pass encoding into decoder to obtain reconstructed image 37 | 38 | # Return input image (placeholder) and reconstructed image 39 | pass 40 | -------------------------------------------------------------------------------- /cs20si/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import tensorflow 4 | import numpy as np 5 | 6 | import matplotlib 7 | matplotlib.use('TKAgg') 8 | from matplotlib import pyplot as plt 9 | 10 | from tensorflow.examples.tutorials.mnist import input_data 11 | 12 | mnist_image_shape = [28, 28, 1] 13 | 14 | def load_dataset(): 15 | return input_data.read_data_sets('MNIST_data') 16 | 17 | def get_next_batch(dataset, batch_size): 18 | # dataset should be mnist.(train/val/test) 19 | batch, _ = dataset.next_batch(batch_size) 20 | batch_shape = [batch_size] + mnist_image_shape 21 | return np.reshape(batch, batch_shape) 22 | 23 | def visualize(_original, _reconstructions, num_visualize): 24 | vis_folder = './vis/' 25 | if not os.path.exists(vis_folder): 26 | os.makedirs(vis_folder) 27 | 28 | original = _original[:num_visualize] 29 | reconstructions = _reconstructions[:num_visualize] 30 | 31 | count = 1 32 | for (orig, rec) in zip(original, reconstructions): 33 | orig = np.reshape(orig, (mnist_image_shape[0], 34 | mnist_image_shape[1])) 35 | rec = np.reshape(rec, (mnist_image_shape[0], 36 | mnist_image_shape[1])) 37 | f, ax = plt.subplots(1,2) 38 | ax[0].imshow(orig, cmap='gray') 39 | ax[1].imshow(rec, cmap='gray') 40 | plt.savefig(vis_folder + "test_%d.png" % count) 41 | count += 1 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /cs20si/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from utils import * 4 | from autoencoder import * 5 | 6 | batch_size = 100 7 | batch_shape = (batch_size, 28, 28, 1) 8 | num_visualize = 10 9 | 10 | lr = 0.01 11 | num_epochs = 50 12 | 13 | def calculate_loss(original, reconstructed): 14 | return tf.div(tf.reduce_sum(tf.square(tf.sub(reconstructed, 15 | original))), 16 | tf.constant(float(batch_size))) 17 | 18 | def train(dataset): 19 | input_image, reconstructed_image = autoencoder(batch_shape) 20 | loss = calculate_loss(input_image, reconstructed_image) 21 | optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss) 22 | 23 | init = tf.global_variables_initializer() 24 | with tf.Session() as session: 25 | session.run(init) 26 | 27 | dataset_size = len(dataset.train.images) 28 | print "Dataset size:", dataset_size 29 | num_iters = (num_epochs * dataset_size)/batch_size 30 | print "Num iters:", num_iters 31 | for step in xrange(num_iters): 32 | input_batch = get_next_batch(dataset.train, batch_size) 33 | loss_val, _ = session.run([loss, optimizer], 34 | feed_dict={input_image: input_batch}) 35 | if step % 1000 == 0: 36 | print "Loss at step", step, ":", loss_val 37 | 38 | test_batch = get_next_batch(dataset.test, batch_size) 39 | reconstruction = session.run(reconstructed_image, 40 | feed_dict={input_image: test_batch}) 41 | visualize(test_batch, reconstruction, num_visualize) 42 | 43 | if __name__ == '__main__': 44 | dataset = load_dataset() 45 | train(dataset) 46 | 47 | -------------------------------------------------------------------------------- /cs224n/word2vec/word2vec.py: -------------------------------------------------------------------------------- 1 | # word2vec 2 | # Author: Nishith Khandwala (nishith@stanford.edu) 3 | # Adapted from https://www.tensorflow.org/tutorials/word2vec/ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import math 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | from utils import * 14 | 15 | ''' 16 | Consider the following sentence: 17 | "the first cs224n homework was a lot of fun" 18 | 19 | With a window size of 1, we have the dataset: 20 | ([the, cs224n], first), ([lot, fun], of) ... 21 | 22 | Remember that Skipgram tries to predict each context word from 23 | its target word, and so the task becomes to predict 'the' and 24 | 'cs224n' from first, 'lot' and 'fun' from 'of' and so on. 25 | 26 | Our dataset now becomes: 27 | (first, the), (first, cs224n), (of, lot), (of, fun) ... 28 | ''' 29 | 30 | # Let's define some constants first 31 | batch_size = 128 32 | vocabulary_size = 50000 33 | embedding_size = 128 # Dimension of the embedding vector. 34 | num_sampled = 64 # Number of negative examples to sample. 35 | 36 | ''' 37 | load_data loads the already preprocessed training and val data. 38 | 39 | train data is a list of (batch_input, batch_labels) pairs. 40 | val data is a list of all validation inputs. 41 | reverse_dictionary is a python dict from word index to word 42 | ''' 43 | train_data, val_data, reverse_dictionary = load_data() 44 | print("Number of training examples:", len(train_data)*batch_size) 45 | print("Number of validation examples:", len(val_data)) 46 | 47 | def skipgram(): 48 | raise NotImplementedError 49 | 50 | def run(): 51 | raise NotImplementedError 52 | 53 | # Let's start training 54 | final_embeddings = run() 55 | 56 | # Visualize the embeddings. 57 | visualize_embeddings(final_embeddings, reverse_dictionary) 58 | 59 | -------------------------------------------------------------------------------- /cs224n/word2vec/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('TKAgg') 3 | from matplotlib import pyplot as plt 4 | import cPickle as pickle 5 | import numpy as np 6 | 7 | from sklearn.manifold import TSNE 8 | 9 | def load_data(): 10 | train_data_path = './data/train.p' 11 | val_data_path = './data/val.p' 12 | reverse_dictionary_path = './data/reverse_dictionary.p' 13 | 14 | train_data = pickle.load(open(train_data_path, 'rb')) 15 | print "Loaded train data!" 16 | val_data = pickle.load(open(val_data_path, 'rb')) 17 | print "Loaded val data!" 18 | reverse_dictionary = pickle.load(open(reverse_dictionary_path, 'rb')) 19 | print "Loaded reverse dictionary!" 20 | return train_data, val_data, reverse_dictionary 21 | 22 | def print_closest_words(val_index, nearest, reverse_dictionary): 23 | val_word = reverse_dictionary[val_index] 24 | log_str = "Nearest to %s:" % val_word 25 | for k in xrange(len(nearest)): 26 | close_word = reverse_dictionary[nearest[k]] 27 | log_str = "%s %s," % (log_str, close_word) 28 | print(log_str) 29 | 30 | def plot_with_labels(low_dim_embs, labels, filename='tsne.png'): 31 | assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" 32 | plt.figure(figsize=(18, 18)) # in inches 33 | for i, label in enumerate(labels): 34 | x, y = low_dim_embs[i, :] 35 | plt.scatter(x, y) 36 | plt.annotate(label, 37 | xy=(x, y), 38 | xytext=(5, 2), 39 | textcoords='offset points', 40 | ha='right', 41 | va='bottom') 42 | 43 | plt.savefig(filename) 44 | 45 | def visualize_embeddings(final_embeddings, reverse_dictionary): 46 | tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) 47 | plot_only = 500 48 | low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :]) 49 | labels = [reverse_dictionary[i] for i in xrange(plot_only)] 50 | plot_with_labels(low_dim_embs, labels) 51 | -------------------------------------------------------------------------------- /cs224n/linear_regression/linear_regression_complete.py: -------------------------------------------------------------------------------- 1 | # linear regression 2 | # Author: Nishith Khandwala (nishith@stanford.edu) 3 | # Adapted from https://github.com/hans/ipython-notebooks/ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import matplotlib 12 | matplotlib.use('TKAgg') 13 | from matplotlib import pyplot as plt 14 | 15 | ''' 16 | Good ole linear regression: find the best linear fit to our data 17 | ''' 18 | 19 | def generate_dataset(): 20 | # data is generated by y = 2x + e 21 | # where 'e' is sampled from a normal distribution 22 | x_batch = np.linspace(-1, 1, 101) 23 | y_batch = 2 * x_batch + np.random.randn(*x_batch.shape) * 0.3 24 | return x_batch, y_batch 25 | 26 | def linear_regression(): 27 | x = tf.placeholder(tf.float32, shape=(None,), name="x") 28 | y = tf.placeholder(tf.float32, shape=(None,), name="y") 29 | # Note: The second argument shape=(None,) indicates that 30 | # these variables take on a 1-dimensional value of a dynamic 31 | # size. We can use the None value in this case to allow for 32 | # arbitrary batch sizes. 33 | 34 | with tf.variable_scope('lreg') as scope: 35 | w = tf.Variable(np.random.normal(), name="W") 36 | y_pred = tf.multiply(w, x) 37 | 38 | loss = tf.reduce_mean(tf.square(y_pred - y)) 39 | return x, y, y_pred, loss 40 | 41 | def run(): 42 | x_batch, y_batch = generate_dataset() 43 | 44 | x, y, y_pred, loss = linear_regression() 45 | 46 | optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss) 47 | 48 | init = tf.global_variables_initializer() 49 | with tf.Session() as session: 50 | session.run(init) 51 | 52 | feed_dict = {x: x_batch, y: y_batch} 53 | for _ in range(30): 54 | loss_val, _ = session.run([loss, optimizer], feed_dict) 55 | print("loss:", loss_val) 56 | 57 | y_pred_batch = session.run(y_pred, {x: x_batch}) 58 | 59 | plt.figure(1) 60 | plt.scatter(x_batch, y_batch) 61 | plt.plot(x_batch, y_pred_batch) 62 | plt.savefig('plot.png') 63 | 64 | if __name__ == '__main__': 65 | run() 66 | -------------------------------------------------------------------------------- /cs20si/layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from layer_utils import get_deconv2d_output_dims 4 | 5 | def conv(input, name, filter_dims, stride_dims, padding='SAME', 6 | non_linear_fn=tf.nn.relu): 7 | input_dims = input.get_shape().as_list() 8 | assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in 9 | assert(len(filter_dims) == 3) # height, width and num_channels out 10 | assert(len(stride_dims) == 2) # stride height and width 11 | 12 | num_channels_in = input_dims[-1] 13 | filter_h, filter_w, num_channels_out = filter_dims 14 | stride_h, stride_w = stride_dims 15 | 16 | # Define a variable scope for the conv layer 17 | with tf.variable_scope(name) as scope: 18 | # Create filter weight variable 19 | 20 | # Create bias variable 21 | 22 | # Define the convolution flow graph 23 | 24 | # Add bias to conv output 25 | 26 | # Apply non-linearity (if asked) and return output 27 | pass 28 | 29 | def deconv(input, name, filter_dims, stride_dims, padding='SAME', 30 | non_linear_fn=tf.nn.relu): 31 | input_dims = input.get_shape().as_list() 32 | assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in 33 | assert(len(filter_dims) == 3) # height, width and num_channels out 34 | assert(len(stride_dims) == 2) # stride height and width 35 | 36 | num_channels_in = input_dims[-1] 37 | filter_h, filter_w, num_channels_out = filter_dims 38 | stride_h, stride_w = stride_dims 39 | # Let's step into this function 40 | output_dims = get_deconv2d_output_dims(input_dims, 41 | filter_dims, 42 | stride_dims, 43 | padding) 44 | 45 | # Define a variable scope for the deconv layer 46 | with tf.variable_scope(name) as scope: 47 | # Create filter weight variable 48 | # Note that num_channels_out and in positions are flipped for deconv. 49 | 50 | # Create bias variable 51 | 52 | # Define the deconv flow graph 53 | 54 | # Add bias to deconv output 55 | 56 | # Apply non-linearity (if asked) and return output 57 | pass 58 | 59 | def max_pool(input, name, filter_dims, stride_dims, padding='SAME'): 60 | assert(len(filter_dims) == 2) # filter height and width 61 | assert(len(stride_dims) == 2) # stride height and width 62 | 63 | filter_h, filter_w = filter_dims 64 | stride_h, stride_w = stride_dims 65 | 66 | # Define the max pool flow graph and return output 67 | pass 68 | 69 | def fc(input, name, out_dim, non_linear_fn=tf.nn.relu): 70 | assert(type(out_dim) == int) 71 | 72 | # Define a variable scope for the FC layer 73 | with tf.variable_scope(name) as scope: 74 | input_dims = input.get_shape().as_list() 75 | # the input to the fc layer should be flattened 76 | if len(input_dims) == 4: 77 | # for eg. the output of a conv layer 78 | batch_size, input_h, input_w, num_channels = input_dims 79 | # ignore the batch dimension 80 | in_dim = input_h * input_w * num_channels 81 | flat_input = tf.reshape(input, [batch_size, in_dim]) 82 | else: 83 | in_dim = input_dims[-1] 84 | flat_input = input 85 | 86 | # Create weight variable 87 | 88 | # Create bias variable 89 | 90 | # Define FC flow graph 91 | 92 | # Apply non-linearity (if asked) and return output 93 | pass 94 | -------------------------------------------------------------------------------- /cs20si/layers_complete.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from layer_utils import get_deconv2d_output_dims 4 | 5 | def conv(input, name, filter_dims, stride_dims, padding='SAME', 6 | non_linear_fn=tf.nn.relu): 7 | input_dims = input.get_shape().as_list() 8 | assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in 9 | assert(len(filter_dims) == 3) # height, width and num_channels out 10 | assert(len(stride_dims) == 2) # stride height and width 11 | 12 | num_channels_in = input_dims[-1] 13 | filter_h, filter_w, num_channels_out = filter_dims 14 | stride_h, stride_w = stride_dims 15 | 16 | with tf.variable_scope(name) as scope: 17 | weights = tf.get_variable('weights', [filter_h, 18 | filter_w, 19 | num_channels_in, 20 | num_channels_out]) 21 | biases = tf.get_variable('biases', [num_channels_out]) 22 | out = tf.nn.conv2d(input, 23 | weights, 24 | [1, stride_h, stride_w, 1], 25 | padding=padding) 26 | out = tf.nn.bias_add(out, biases) 27 | if non_linear_fn: 28 | return non_linear_fn(out, name=scope.name) 29 | else: 30 | return out 31 | 32 | def deconv(input, name, filter_dims, stride_dims, padding='SAME', 33 | non_linear_fn=tf.nn.relu): 34 | input_dims = input.get_shape().as_list() 35 | assert(len(input_dims) == 4) # batch_size, height, width, num_channels_in 36 | assert(len(filter_dims) == 3) # height, width and num_channels out 37 | assert(len(stride_dims) == 2) # stride height and width 38 | 39 | num_channels_in = input_dims[-1] 40 | filter_h, filter_w, num_channels_out = filter_dims 41 | stride_h, stride_w = stride_dims 42 | output_dims = get_deconv2d_output_dims(input_dims, 43 | filter_dims, 44 | stride_dims, 45 | padding) 46 | 47 | with tf.variable_scope(name) as scope: 48 | # note that num_channels_out and in positions are flipped for deconv. 49 | weights = tf.get_variable('weights', [filter_h, 50 | filter_w, 51 | num_channels_out, 52 | num_channels_in]) 53 | biases = tf.get_variable('biases', [num_channels_out]) 54 | out = tf.nn.conv2d_transpose(input, 55 | weights, 56 | output_dims, 57 | [1, stride_h, stride_w, 1], 58 | padding=padding) 59 | out = tf.nn.bias_add(out, biases) 60 | if non_linear_fn: 61 | return non_linear_fn(out, name=scope.name) 62 | else: 63 | return out 64 | 65 | def max_pool(input, name, filter_dims, stride_dims, padding='SAME'): 66 | assert(len(filter_dims) == 2) # filter height and width 67 | assert(len(stride_dims) == 2) # stride height and width 68 | 69 | filter_h, filter_w = filter_dims 70 | stride_h, stride_w = stride_dims 71 | return tf.nn.max_pool(input, 72 | [1, filter_h, filter_w, 1], 73 | [1, stride_h, stride_w, 1], 74 | padding=padding) 75 | 76 | def fc(input, name, out_dim, non_linear_fn=tf.nn.relu): 77 | assert(type(out_dim) == int) 78 | 79 | with tf.variable_scope(name) as scope: 80 | input_dims = input.get_shape().as_list() 81 | # the input to the fc layer should be flattened 82 | if len(input_dims) == 4: 83 | # for eg. the output of a conv layer 84 | batch_size, input_h, input_w, num_channels = input_dims 85 | # ignore the batch dimension 86 | in_dim = input_h * input_w * num_channels 87 | flat_input = tf.reshape(input, [batch_size, in_dim]) 88 | else: 89 | in_dim = input_dims[-1] 90 | flat_input = input 91 | 92 | weights = tf.get_variable('weights', [in_dim, out_dim]) 93 | biases = tf.get_variable('biases', [out_dim]) 94 | out = tf.nn.xw_plus_b(flat_input, weights, biases) 95 | if non_linear_fn: 96 | return non_linear_fn(out, name=scope.name) 97 | else: 98 | return out 99 | 100 | -------------------------------------------------------------------------------- /cs224n/word2vec/get_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import collections 7 | import os 8 | import random 9 | import cPickle as pickle 10 | import zipfile 11 | 12 | import numpy as np 13 | from six.moves import urllib 14 | from six.moves import xrange # pylint: disable=redefined-builtin 15 | from tqdm import tqdm 16 | 17 | # Step 1: Download the data. 18 | url = 'http://mattmahoney.net/dc/' 19 | 20 | def maybe_download(filename, expected_bytes): 21 | """Download a file if not present, and make sure it's the right size.""" 22 | if not os.path.exists(filename): 23 | filename, _ = urllib.request.urlretrieve(url + filename, filename) 24 | statinfo = os.stat(filename) 25 | if statinfo.st_size == expected_bytes: 26 | print('Found and verified', filename) 27 | else: 28 | print(statinfo.st_size) 29 | raise Exception( 30 | 'Failed to verify ' + filename + '. Can you get to it with a browser?') 31 | return filename 32 | 33 | filename = maybe_download('text8.zip', 31344016) 34 | 35 | # Read the data into a list of strings. 36 | def read_data(filename): 37 | """Extract the first file enclosed in a zip file as a list of words""" 38 | with zipfile.ZipFile(filename) as f: 39 | data = tf.compat.as_str(f.read(f.namelist()[0])).split() 40 | return data 41 | 42 | words = read_data(filename) 43 | print('Data size', len(words)) 44 | 45 | # Step 2: Build the dictionary and replace rare words with UNK token. 46 | vocabulary_size = 50000 47 | 48 | def build_dataset(words): 49 | count = [['UNK', -1]] 50 | count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) 51 | dictionary = dict() 52 | for word, _ in count: 53 | dictionary[word] = len(dictionary) 54 | data = list() 55 | unk_count = 0 56 | for word in words: 57 | if word in dictionary: 58 | index = dictionary[word] 59 | else: 60 | index = 0 # dictionary['UNK'] 61 | unk_count += 1 62 | data.append(index) 63 | count[0][1] = unk_count 64 | reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) 65 | return data, count, dictionary, reverse_dictionary 66 | 67 | data, count, dictionary, reverse_dictionary = build_dataset(words) 68 | del words # Hint to reduce memory. 69 | 70 | data_index = 0 71 | # Step 3: Function to generate a training batch for the skip-gram model. 72 | def generate_batch(batch_size, num_skips, skip_window): 73 | global data_index 74 | assert batch_size % num_skips == 0 75 | assert num_skips <= 2 * skip_window 76 | batch = np.ndarray(shape=(batch_size), dtype=np.int32) 77 | labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) 78 | span = 2 * skip_window + 1 # [ skip_window target skip_window ] 79 | buffer = collections.deque(maxlen=span) 80 | for _ in range(span): 81 | buffer.append(data[data_index]) 82 | data_index = (data_index + 1) % len(data) 83 | for i in range(batch_size // num_skips): 84 | target = skip_window # target label at the center of the buffer 85 | targets_to_avoid = [skip_window] 86 | for j in range(num_skips): 87 | while target in targets_to_avoid: 88 | target = random.randint(0, span - 1) 89 | targets_to_avoid.append(target) 90 | batch[i * num_skips + j] = buffer[skip_window] 91 | labels[i * num_skips + j, 0] = buffer[target] 92 | buffer.append(data[data_index]) 93 | data_index = (data_index + 1) % len(data) 94 | return batch, labels 95 | 96 | batch_size = 128 97 | skip_window = 1 # How many words to consider left and right. 98 | num_skips = 2 # How many times to reuse an input to generate a label. 99 | 100 | # We pick a random validation set to sample nearest neighbors. Here we limit the 101 | # validation samples to the words that have a low numeric ID, which by 102 | # construction are also the most frequent. 103 | valid_size = 16 # Random set of words to evaluate similarity on. 104 | valid_window = 100 # Only pick dev samples in the head of the distribution. 105 | valid_examples = np.random.choice(valid_window, valid_size, replace=False) 106 | 107 | # Step 5: Begin training. 108 | num_steps = 30000 109 | training_data = [] 110 | for step in tqdm(range(num_steps)): 111 | batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window) 112 | training_data.append((batch_inputs, batch_labels)) 113 | 114 | data_folder = './data' 115 | if not os.path.exists(data_folder): 116 | os.makedirs(data_folder) 117 | print('Saving training data...') 118 | pickle.dump(training_data, open( "./data/train.p", "wb" )) 119 | print('Saving validation data...') 120 | pickle.dump(valid_examples, open("./data/val.p", "wb")) 121 | print('Saving reverse_dictionary') 122 | pickle.dump(reverse_dictionary, open("./data/reverse_dictionary.p", "wb")) 123 | -------------------------------------------------------------------------------- /cs224n/word2vec/word2vec_complete.py: -------------------------------------------------------------------------------- 1 | # word2vec 2 | # Author: Nishith Khandwala (nishith@stanford.edu) 3 | # Adapted from https://www.tensorflow.org/tutorials/word2vec/ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import math 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | from utils import * 14 | 15 | ''' 16 | Consider the following sentence: 17 | "the first cs224n homework was a lot of fun" 18 | 19 | With a window size of 1, we have the dataset: 20 | ([the, cs224n], first), ([lot, fun], of) ... 21 | 22 | Remember that Skipgram tries to predict each context word from 23 | its target word, and so the task becomes to predict 'the' and 24 | 'cs224n' from first, 'lot' and 'fun' from 'of' and so on. 25 | 26 | Our dataset now becomes: 27 | (first, the), (first, cs224n), (of, lot), (of, fun) ... 28 | ''' 29 | 30 | # Let's define some constants first 31 | batch_size = 128 32 | vocabulary_size = 50000 33 | embedding_size = 128 # Dimension of the embedding vector. 34 | num_sampled = 64 # Number of negative examples to sample. 35 | 36 | ''' 37 | load_data loads the already preprocessed training and val data. 38 | 39 | train data is a list of (batch_input, batch_labels) pairs. 40 | val data is a list of all validation inputs. 41 | reverse_dictionary is a python dict from word index to word 42 | ''' 43 | train_data, val_data, reverse_dictionary = load_data() 44 | print("Number of training examples:", len(train_data)*batch_size) 45 | print("Number of validation examples:", len(val_data)) 46 | 47 | def skipgram(): 48 | batch_inputs = tf.placeholder(tf.int32, shape=[batch_size]) 49 | batch_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) 50 | val_dataset = tf.constant(val_data, dtype=tf.int32) 51 | 52 | with tf.variable_scope('word2vec') as scope: 53 | embeddings = tf.Variable(tf.random_uniform([vocabulary_size, 54 | embedding_size], 55 | -1.0, 1.0)) 56 | batch_embeddings = tf.nn.embedding_lookup(embeddings, batch_inputs) 57 | 58 | weights = tf.Variable(tf.truncated_normal([vocabulary_size, 59 | embedding_size], 60 | stddev=1.0/math.sqrt(embedding_size))) 61 | biases = tf.Variable(tf.zeros([vocabulary_size])) 62 | 63 | # This objective is maximized when the model assigns high probabilities 64 | # to the real words, and low probabilities to noise words. 65 | loss = tf.reduce_mean(tf.nn.nce_loss(weights=weights, 66 | biases=biases, 67 | labels=batch_labels, 68 | inputs=batch_embeddings, 69 | num_sampled=num_sampled, 70 | num_classes=vocabulary_size)) 71 | 72 | 73 | norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) 74 | normalized_embeddings = embeddings/norm 75 | 76 | val_embeddings = tf.nn.embedding_lookup(normalized_embeddings, 77 | val_dataset) 78 | similarity = tf.matmul(val_embeddings, 79 | normalized_embeddings, transpose_b=True) 80 | 81 | return batch_inputs, batch_labels, normalized_embeddings, similarity, loss 82 | 83 | def run(): 84 | # load model 85 | batch_inputs, batch_labels, normalized_embeddings, similarity, loss = skipgram() 86 | optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) 87 | 88 | init = tf.global_variables_initializer() 89 | with tf.Session() as session: 90 | session.run(init) 91 | 92 | average_loss = 0 93 | for step, batch_data in enumerate(train_data): 94 | inputs, labels = batch_data 95 | feed_dict = {batch_inputs: inputs, batch_labels: labels} 96 | 97 | _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict) 98 | average_loss += loss_val 99 | 100 | if step % 1000 == 0: 101 | if step > 0: 102 | average_loss /= 1000 103 | print("Average loss at step ", step, ": ", average_loss) 104 | average_loss = 0 105 | 106 | if step % 5000 == 0: 107 | sim = similarity.eval() 108 | for i in xrange(len(val_data)): 109 | top_k = 8 # number of nearest neighbors 110 | nearest = (-sim[i, :]).argsort()[1:top_k + 1] 111 | print_closest_words(val_data[i], nearest, reverse_dictionary) 112 | 113 | final_embeddings = normalized_embeddings.eval() 114 | return final_embeddings 115 | 116 | # Let's start training 117 | final_embeddings = run() 118 | 119 | # Visualize the embeddings. 120 | visualize_embeddings(final_embeddings, reverse_dictionary) 121 | 122 | --------------------------------------------------------------------------------