├── .gitignore ├── README.md ├── ch05_implementing_nn_pytorch ├── Ch05_PyTorch.ipynb └── data │ └── train │ ├── img_0.jpg │ ├── img_1.jpg │ ├── img_2.jpg │ ├── img_3.jpg │ └── labels.npy ├── ch06_beyond_gradient_descent └── Ch06_Beyond_GD.ipynb ├── ch07_cnn └── Ch07_CNN.ipynb ├── ch08_embedding_representation └── Ch08_Embedding.ipynb ├── ch09_models_for_sequence_analysis ├── Ch09_01_POS_Tagger.ipynb ├── Ch09_02_RNN.ipynb ├── Ch09_03_LSTM_Sentiment.ipynb └── Ch09_04_Dissecting_NTN.ipynb ├── ch10_generative_models └── Ch10_Generative.ipynb ├── ch12_memory_augmented_nn └── Ch12_MemoryAugmented.ipynb ├── ch13_deep_reinforcement_learning ├── Ch13_01_RL_Pole_Cart.ipynb └── Ch13_02_RL_DQN_Breakout.ipynb └── first_edition_archive ├── archive ├── README.md ├── ast_edits.py ├── autoencoder_mnist.py ├── autoencoder_tsne.py ├── autoencoder_vs_pca.py ├── cifar10_input.py ├── convnet_cifar.py ├── convnet_cifar_bn.py ├── convnet_mnist.py ├── denoising_autoencoder_mnist.py ├── dnc │ ├── mem_ops.py │ ├── preprocess.py │ ├── test_babi.py │ └── train_babi.py ├── download_tweets.py ├── feed_forward_network-[THEANO] │ ├── feed_forward_network.py │ ├── hidden_layer.py │ ├── mnist_feed_forward_sgd.py │ └── softmax_layer.py ├── feedforward_pos.py ├── imdb_bn_lstm.py ├── imdb_lstm.py ├── imdb_ohlstm.py ├── input_data.py ├── input_word_data.py ├── linear_interpolation.py ├── logistic_network-[THEANO] │ ├── logistic_network.py │ └── mnist_logistic_sgd.py ├── logistic_regression.py ├── logistic_regression.py.ipynb ├── lstm.py ├── multilayer_perceptron.ipynb ├── multilayer_perceptron.py ├── neural_style │ ├── main.py │ ├── stylize.py │ └── vgg.py ├── one_layer_autoencoder.py ├── optimzer_mlp.py ├── random_walk.py ├── read_16M_tweet_data.py ├── read_imdb_data.py ├── read_pos_data.py ├── read_tweet_data.py ├── report.txt ├── requirements.txt ├── seq2seq │ ├── INFO.md │ ├── INTRO.md │ ├── data_utils.py │ ├── extract_data.py │ ├── nmt_lr_plot.png │ ├── nmt_perplexity_plot.png │ ├── old_seq2seq.py │ ├── output.txt │ ├── perplexity_data.txt │ ├── seq2seq.py │ ├── seq2seq_model.py │ ├── tmp_seq2seq_model.py │ └── translate.py ├── skipgram.py ├── text8.zip ├── tf_upgrade.py ├── tsne.png ├── twitter_lstm.py └── word2vec_fast.py └── fdl_examples ├── __init__.py ├── chapter3 ├── __init__.py ├── logistic_regression_updated.py └── multilayer_perceptron_updated.py ├── chapter4 ├── __init__.py ├── frozen_mlp_checkpoint │ ├── checkpoint │ ├── events.out.tfevents.1503341411.Nikhils-MacBook-Pro.local │ ├── model-checkpoint-547800.data-00000-of-00001 │ ├── model-checkpoint-547800.index │ ├── model-checkpoint-547800.meta │ ├── model-checkpoint-548350.data-00000-of-00001 │ ├── model-checkpoint-548350.index │ ├── model-checkpoint-548350.meta │ ├── model-checkpoint-548900.data-00000-of-00001 │ ├── model-checkpoint-548900.index │ ├── model-checkpoint-548900.meta │ ├── model-checkpoint-549450.data-00000-of-00001 │ ├── model-checkpoint-549450.index │ ├── model-checkpoint-549450.meta │ ├── model-checkpoint-550000.data-00000-of-00001 │ ├── model-checkpoint-550000.index │ └── model-checkpoint-550000.meta └── linear_interpolation_updated.py ├── chapter9 ├── dqn.py ├── dqn_plot_final.png └── policy_gradient_cartpole.py └── datatools ├── __init__.py └── input_data.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Data files 2 | *.gz 3 | data/ 4 | !data/README.md 5 | !ch05_implementing_nn_pytorch/data/ 6 | 7 | # babi saved model 8 | dnc/babi-model 9 | 10 | # Python 11 | *.pyc 12 | 13 | # Mac OS X custom attribute files 14 | .DS_Store 15 | 16 | # Logs 17 | *logs*/ 18 | 19 | .ipynb_checkpoints 20 | MNIST 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fundamentals of Deep Learning 2 | 3 | This repository is the code companion to [Fundamentals of Deep Learning, Second Edition](https://www.amazon.com/Fundamentals-Deep-Learning-Next-Generation-Intelligence/dp/1491925612 "Fundamentals of Deep Learning") by Nithin Buduma, [Nikhil Buduma](https://github.com/darksigma "Nikhil Buduma"), and [Joe Papa](https://github.com/joe-papa "Joe Papa"), with contributions from [Nicholas Locascio](https://github.com/nicholaslocascio "Nicholas Locascio"). Contributions to the text and code have also been made by [Mostafa Samir](https://github.com/Mostafa-Samir "Mostafa Samir"), [Surya Bhupatiraju](https://github.com/suryabhupa "Surya Bhupatiraju"), and [Anish Athalye](https://github.com/anishathalye "Anish Athalye"). All algorithms in the Second Edition are implemented in [PyTorch](https://www.pytorch.org/ "PyTorch"), one of the most popular machine learning frameworks. 4 | 5 | ## Guide to the repository 6 | 7 | This repo contains code from the Second Edition of Fundamentals of Deep Learning, published in 2022. Code from the First Edition can be found in the first_edition_archive folder. The code from each chapter of the Second Edition can be found in the corresponding folder, if code exists for that chapter. Most of the examples in each chapter are given as Google Colab notebooks. In some cases, .py files have also been included for a more convenient execution of the examples. 8 | 9 | ## Setting up your development environment 10 | You can run the Google Colab notebooks directly from this github repo. [See instructions on Using Google Colab with GitHub here](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb "Using Google Colab with Github"). 11 | 12 | -------------------------------------------------------------------------------- /ch05_implementing_nn_pytorch/data/train/img_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_0.jpg -------------------------------------------------------------------------------- /ch05_implementing_nn_pytorch/data/train/img_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_1.jpg -------------------------------------------------------------------------------- /ch05_implementing_nn_pytorch/data/train/img_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_2.jpg -------------------------------------------------------------------------------- /ch05_implementing_nn_pytorch/data/train/img_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_3.jpg -------------------------------------------------------------------------------- /ch05_implementing_nn_pytorch/data/train/labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/labels.npy -------------------------------------------------------------------------------- /ch09_models_for_sequence_analysis/Ch09_02_RNN.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ch09_02_RNN.ipynb","provenance":[{"file_id":"1XL97FXDkJDFMjM4M_FjCslCrdcanzDE8","timestamp":1641846356338}],"authorship_tag":"ABX9TyM1mSoOVpEqGcIzGXAfkEg8"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# PyTorch Primitives for RNN Models"],"metadata":{"id":"JdVhVjN-llAA"}},{"cell_type":"code","source":["import torch\n","import torch.nn as nn"],"metadata":{"id":"ZWlouOwETInM"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["cell_1 = nn.RNNCell(input_size = 10,\n"," hidden_size = 20, \n"," nonlinearity='tanh')\n","\n","cell_2 = nn.LSTMCell(input_size = 10,\n"," hidden_size = 20)\n","\n","cell_3 = nn.GRUCell(input_size = 10, \n"," hidden_size = 20)"],"metadata":{"id":"IWsgDePImGGp"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Stacking recurrent units\n","cell_1 = nn.LSTMCell(input_size = 10, \n"," hidden_size = 20)\n","cell_2 = nn.LSTMCell(input_size = 20, \n"," hidden_size = 20)\n","\n","full_cell = nn.Sequential(cell_1, cell_2)"],"metadata":{"id":"czzPCIotmPEU"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Run LSTM Cell"],"metadata":{"id":"7ruPWI82TVHf"}},{"cell_type":"code","source":["input = torch.randn(2, 3, 10) # (time_steps, batch, input_size)\n","hx_init = torch.randn(3, 20) # hidden state of size: (batch_size, hidden_size)\n","# output of output gate\n","cx_init = torch.randn(3, 20) # cell state of size: (batch_size, hidden_size)\n","# output of write gate\n","output = []\n","\n","# loop over time_steps\n","hx, cx = hx_init, cx_init\n","for t in range(input.size()[0]):\n"," hx, cx = cell_1(input[t], (hx, cx)) # input[t] is size (batch_size, input_size)\n"," hx2, cx2 = cell_2(hx, (hx, cx)) # input[t] is size (batch_size, input_size)\n"," output.append(hx2)\n","output = torch.stack(output, dim=0) # shape is (time_steps, batch_size, input_size)"],"metadata":{"id":"ICdrzWnATYlf"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# multi-layer RNN and LSTM\n","multi_layer_rnn = nn.RNN(input_size = 10, \n"," hidden_size = 20, \n"," num_layers = 2,\n"," nonlinearity = 'tanh')\n","\n","multi_layer_lstm = nn.LSTM(input_size = 10, \n"," hidden_size = 20, \n"," num_layers = 2)"],"metadata":{"id":"noeIWbGtUuX6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["output2 = []\n","\n","# loop over time_steps\n","hx, cx = hx_init, cx_init\n","for t in range(input.size()[0]):\n"," hx, cx = cell_1(input[t], (hx, cx)) # input[t] is size (batch_size, input_size)\n"," hx2, cx2 = cell_2(hx, (hx, cx)) # input[t] is size (batch_size, input_size)\n"," output2.append(hx2)\n","output2 = torch.stack(output2, dim=0) # shape is (time_steps, batch_size, input_size)"],"metadata":{"id":"CrfO853DUk2j"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["torch.all(output == output2)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9wmZH08eVN9M","executionInfo":{"status":"ok","timestamp":1642042045904,"user_tz":300,"elapsed":93,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}},"outputId":"f0ebbccd-9689-4b14-d1f1-4633e5b70524"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["tensor(True)"]},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":["# multi-layer RNN and LSTM with other settings\n","multi_layer_rnn = nn.RNN(input_size = 10, \n"," hidden_size = 20, \n"," num_layers = 2,\n"," nonlinearity = 'tanh',\n"," batch_first = False,\n"," dropout = 0.5)\n","\n","multi_layer_lstm = nn.LSTM(input_size = 10, \n"," hidden_size = 20, \n"," num_layers = 2,\n"," batch_first = False,\n"," dropout = 0.5)"],"metadata":{"id":"QB1Nwd8dXFNK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Apply dropouts \n","# If dropout parameter is non-zero, the model introduces \n","# a Dropout layer on # the outputs of each LSTM layer \n","# except the last layer, with dropout probability equal to dropout. \n","# Default: 0\n","input_size = 32\n","\n","cell_1 = nn.LSTM(input_size,\n"," hidden_size = 10, \n"," num_layers=2,\n"," dropout = 1.0)"],"metadata":{"id":"HRJSX9fBmPHE"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["rnn = nn.LSTM(input_size = 32, \n"," hidden_size = 20, \n"," num_layers = 1,\n"," batch_first= False)\n","\n","inputs = torch.randn((32, 32, 32))\n","output, states = rnn(inputs)"],"metadata":{"id":"jNQYHSVIRQXZ","executionInfo":{"status":"ok","timestamp":1642046030147,"user_tz":300,"elapsed":130,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}}},"execution_count":25,"outputs":[]},{"cell_type":"code","source":["# LSTM in action"],"metadata":{"id":"6FPrCOwxYOwm","executionInfo":{"status":"ok","timestamp":1642046030981,"user_tz":300,"elapsed":109,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}}},"execution_count":26,"outputs":[]},{"cell_type":"code","source":["input = torch.randn(5, 3, 10) # (time_steps, batch, input_size)\n","h_0 = torch.randn(2, 3, 20) # (n_layers, batch_size, hidden_size)\n","c_0 = torch.randn(2, 3, 20) # (n_layers, batch_size, hidden_size)\n","\n","rnn = nn.LSTM(10, 20, 2) # (input_size, hidden_size, num_layers)\n","output_n, (hn, cn) = rnn(input, (h_0, c_0))"],"metadata":{"id":"2u7pR2sxYrdD","executionInfo":{"status":"ok","timestamp":1642046048985,"user_tz":300,"elapsed":108,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}}},"execution_count":28,"outputs":[]},{"cell_type":"code","source":[""],"metadata":{"id":"CaEGEWJRZGK1"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /ch12_memory_augmented_nn/Ch12_MemoryAugmented.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Ch12_MemoryAugmented.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "source": [ 21 | "#Implementing the DNC in PyTorch" 22 | ], 23 | "metadata": { 24 | "id": "AmTCQQ41h1gs" 25 | } 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "source": [ 30 | "# PyTorch Code\n", 31 | "Only the code printed in the book has been converted to PyTorch. This notebook contains that code.\n", 32 | "\n", 33 | "However, the entire code set needed to run the training and testing of bAbI has not yet been converted. For reference, the complete codeset can be found at https://github.com/darksigma/Fundamentals-of-Deep-Learning-Book/tree/master/first_edition_archive/archive/dnc\n", 34 | "\n", 35 | "The folder above contains the files referenced in the book:\n", 36 | "- mem_ops.py\n", 37 | "- preprocess.py\n", 38 | "- train_babi.py\n", 39 | "- test_babi.py" 40 | ], 41 | "metadata": { 42 | "id": "2XvSCp2f7foV" 43 | } 44 | }, 45 | { 46 | "cell_type": "code", 47 | "source": [ 48 | "import torch\n", 49 | "def Lt(L, wwt, p, N):\n", 50 | "\n", 51 | " L_t = torch.zeros((N,N), dtype=torch.float32)\n", 52 | " for i in range(N):\n", 53 | " for j in range(N):\n", 54 | " if i == j:\n", 55 | " continue\n", 56 | " mask = torch.zeros((N,N), dtype=torch.float32)\n", 57 | " mask[i,j] = 1.0\n", 58 | " \n", 59 | " link_t = (1 - wwt[i] - wwt[j]) * L[i,j] + \\\n", 60 | " wwt[i] * p[j]\n", 61 | " L_t += mask * link_t\n", 62 | " return L_t" 63 | ], 64 | "metadata": { 65 | "id": "g-Fp-Zyr8lpV" 66 | }, 67 | "execution_count": null, 68 | "outputs": [] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "source": [ 73 | "# sample test code\n", 74 | "N = 10\n", 75 | "L = torch.randn((N,N))\n", 76 | "wwt = torch.randn(N)\n", 77 | "p = torch.randn(N)\n", 78 | "\n", 79 | "L_t = Lt(L, wwt, p, N)" 80 | ], 81 | "metadata": { 82 | "id": "gGxEXcFe8pFd" 83 | }, 84 | "execution_count": null, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "source": [ 90 | "def Lt(L, wwt, p, N):\n", 91 | " \"\"\"\n", 92 | " returns the updated link matrix given the previous one along\n", 93 | " with the updated write weightings and the previous precedence\n", 94 | " vector\n", 95 | " \"\"\"\n", 96 | " def pairwise_add(v):\n", 97 | " \"\"\"\n", 98 | " returns the matrix of pairs - adding the elements of v to\n", 99 | " themselves\n", 100 | " \"\"\"\n", 101 | " n = v.shape[0]\n", 102 | " # a NxN matrix of duplicates of u along the columns\n", 103 | " V = v.repeat(1,n) \n", 104 | " return V + V\n", 105 | "\n", 106 | " # expand dimensions of wwt and p to make matmul behave as outer\n", 107 | " # product\n", 108 | " wwt = torch.unsqueeze(wwt, 1)\n", 109 | " p = torch.unsqueeze(p, 0)\n", 110 | "\n", 111 | " I = torch.eye(N, dtype=torch.float32)\n", 112 | " return (((1 - pairwise_add(wwt)) * L +\n", 113 | " torch.matmul(wwt, p)) * (1 - I))" 114 | ], 115 | "metadata": { 116 | "id": "RiUrrSGF_S-V" 117 | }, 118 | "execution_count": null, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "source": [ 124 | "# sample test code\n", 125 | "N = 10\n", 126 | "L = torch.randn((N,N))\n", 127 | "wwt = torch.randn(N)\n", 128 | "p = torch.randn(N)\n", 129 | "\n", 130 | "L_t = Lt(L, wwt, p, N)" 131 | ], 132 | "metadata": { 133 | "id": "gRhe00Pn_X-i" 134 | }, 135 | "execution_count": null, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "source": [ 141 | "# sample test input\n", 142 | "ut = torch.randn((N))\n", 143 | "\n", 144 | "sorted_ut, free_list = torch.topk(-1*ut, N)\n", 145 | "sorted_ut *= -1" 146 | ], 147 | "metadata": { 148 | "id": "1ZqiwjFz9FEU" 149 | }, 150 | "execution_count": null, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "source": [ 156 | "# sample test code\n", 157 | "out_of_location_at = torch.rand(N)\n", 158 | "\n", 159 | "empty_at = torch.empty(N)\n", 160 | "a_t = empty_at.scatter(0, free_list, out_of_location_at)" 161 | ], 162 | "metadata": { 163 | "id": "2VU11cTz9Hv0" 164 | }, 165 | "execution_count": null, 166 | "outputs": [] 167 | } 168 | ] 169 | } -------------------------------------------------------------------------------- /first_edition_archive/archive/README.md: -------------------------------------------------------------------------------- 1 | # Fundamentals of Deep Learning 2 | 3 | This repository is the code companion to my book "Fundamentals of Deep Learning." All algorithms are implemented in [Tensorflow](https://www.tensorflow.org/ "Tensorflow"), Google's new machine intelligence library. 4 | 5 | ## TODO 6 | 7 | ### Networks 8 | 9 | - Logistic Regression (Nikhil) 10 | - Multilayer Perceptron (Nikhil) 11 | - Convolutional Network (Nikhil) 12 | - Neural Style (Anish) 13 | - Autoencoder (Hassan) 14 | - Denoising Autoencoder (Hassan) 15 | - Convolutional Autoencoder (Hassan) 16 | - RNN (Nikhil) 17 | - LSTM Network (Nikhil) 18 | - GRU Network (Nikhil) 19 | - LSTM + Attention (Nikhil) 20 | - RCNN (Nikhil) 21 | - Memory Networks (Nikhil) 22 | - Pointer Networks 23 | - Neural Turing Machines 24 | - Neural Programmer 25 | - DQN 26 | - LSTM-DQN 27 | - Deep Convolutional Inverse Graphics Network 28 | - Highway Networks 29 | - Deep Residual Networks 30 | 31 | ### Embedding 32 | 33 | - Word2Vec (Nikhil) 34 | - Skip-gram/CBoW 35 | - GloVe (Nikhil) 36 | - Skip-thought Vectors (Nikhil) 37 | 38 | ### Optimizers 39 | 40 | - MLP + Momentum 41 | - MLP + RMSProp 42 | - MLP + ADAM 43 | - MLP + FTRL 44 | - MLP + ADADELTA 45 | -------------------------------------------------------------------------------- /first_edition_archive/archive/autoencoder_tsne.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/autoencoder_tsne.py -------------------------------------------------------------------------------- /first_edition_archive/archive/autoencoder_vs_pca.py: -------------------------------------------------------------------------------- 1 | from sklearn import decomposition 2 | from matplotlib import pyplot as plt 3 | import tensorflow as tf 4 | import autoencoder_mnist as ae 5 | import argparse, input_data 6 | import numpy as np 7 | # model-checkpoint-0349-191950 8 | 9 | def scatter(codes, labels): 10 | colors = [ 11 | ('#27ae60', 'o'), 12 | ('#2980b9', 'o'), 13 | ('#8e44ad', 'o'), 14 | ('#f39c12', 'o'), 15 | ('#c0392b', 'o'), 16 | ('#27ae60', 'x'), 17 | ('#2980b9', 'x'), 18 | ('#8e44ad', 'x'), 19 | ('#c0392b', 'x'), 20 | ('#f39c12', 'x'), 21 | ] 22 | for num in xrange(10): 23 | plt.scatter([codes[:,0][i] for i in xrange(len(labels)) if labels[i] == num], 24 | [codes[:,1][i] for i in xrange(len(labels)) if labels[i] == num], 7, 25 | label=str(num), color = colors[num][0], marker=colors[num][1]) 26 | plt.legend() 27 | plt.show() 28 | 29 | if __name__ == '__main__': 30 | 31 | parser = argparse.ArgumentParser(description='Test various optimization strategies') 32 | parser.add_argument('savepath', nargs=1, type=str) 33 | args = parser.parse_args() 34 | 35 | print "\nPULLING UP MNIST DATA" 36 | mnist = input_data.read_data_sets("data/", one_hot=False) 37 | print mnist.test.labels 38 | 39 | # print "\nSTARTING PCA" 40 | # pca = decomposition.PCA(n_components=2) 41 | # pca.fit(mnist.train.images) 42 | # 43 | # print "\nGENERATING PCA CODES AND RECONSTRUCTION" 44 | # pca_codes = pca.transform(mnist.test.images) 45 | # print pca_codes 46 | # 47 | # scatter(pca_codes, mnist.test.labels) 48 | 49 | with tf.Graph().as_default(): 50 | 51 | with tf.variable_scope("autoencoder_model"): 52 | 53 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 54 | phase_train = tf.placeholder(tf.bool) 55 | 56 | code = ae.encoder(x, 2, phase_train) 57 | 58 | output = ae.decoder(code, 2, phase_train) 59 | 60 | cost, train_summary_op = ae.loss(output, x) 61 | 62 | global_step = tf.Variable(0, name='global_step', trainable=False) 63 | 64 | train_op = ae.training(cost, global_step) 65 | 66 | eval_op, in_im_op, out_im_op, val_summary_op = ae.evaluate(output, x) 67 | 68 | saver = tf.train.Saver() 69 | 70 | sess = tf.Session() 71 | 72 | 73 | print "\nSTARTING AUTOENCODER\n", args.savepath[0] 74 | sess = tf.Session() 75 | saver = tf.train.Saver() 76 | saver.restore(sess, args.savepath[0]) 77 | 78 | print "\nGENERATING AE CODES AND RECONSTRUCTION" 79 | ae_codes, ae_reconstruction = sess.run([code, output], feed_dict={x: mnist.test.images * np.random.randint(2, size=(784)), phase_train: True}) 80 | 81 | scatter(ae_codes, mnist.test.labels) 82 | 83 | plt.imshow(ae_reconstruction[0].reshape((28,28)), cmap=plt.cm.gray) 84 | plt.show() 85 | -------------------------------------------------------------------------------- /first_edition_archive/archive/convnet_cifar.py: -------------------------------------------------------------------------------- 1 | import cifar10_input 2 | cifar10_input.maybe_download_and_extract() 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | import time, os 7 | 8 | # Architecture 9 | n_hidden_1 = 256 10 | n_hidden_2 = 256 11 | 12 | # Parameters 13 | learning_rate = 0.001 14 | training_epochs = 1000 15 | batch_size = 128 16 | display_step = 1 17 | 18 | def inputs(eval_data=True): 19 | data_dir = os.path.join('data/cifar10_data', 'cifar-10-batches-bin') 20 | return cifar10_input.inputs(eval_data=eval_data, data_dir=data_dir, 21 | batch_size=batch_size) 22 | 23 | def distorted_inputs(): 24 | data_dir = os.path.join('data/cifar10_data', 'cifar-10-batches-bin') 25 | return cifar10_input.distorted_inputs(data_dir=data_dir, 26 | batch_size=batch_size) 27 | 28 | def filter_summary(V, weight_shape): 29 | ix = weight_shape[0] 30 | iy = weight_shape[1] 31 | cx, cy = 8, 8 32 | V_T = tf.transpose(V, (3, 0, 1, 2)) 33 | tf.image_summary("filters", V_T, max_images=64) 34 | 35 | def conv2d(input, weight_shape, bias_shape, visualize=False): 36 | incoming = weight_shape[0] * weight_shape[1] * weight_shape[2] 37 | weight_init = tf.random_normal_initializer(stddev=(2.0/incoming)**0.5) 38 | W = tf.get_variable("W", weight_shape, initializer=weight_init) 39 | if visualize: 40 | filter_summary(W, weight_shape) 41 | bias_init = tf.constant_initializer(value=0) 42 | b = tf.get_variable("b", bias_shape, initializer=bias_init) 43 | return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding='SAME'), b)) 44 | 45 | def max_pool(input, k=2): 46 | return tf.nn.max_pool(input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME') 47 | 48 | def layer(input, weight_shape, bias_shape): 49 | weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5) 50 | bias_init = tf.constant_initializer(value=0) 51 | W = tf.get_variable("W", weight_shape, 52 | initializer=weight_init) 53 | b = tf.get_variable("b", bias_shape, 54 | initializer=bias_init) 55 | return tf.nn.relu(tf.matmul(input, W) + b) 56 | 57 | def inference(x, keep_prob): 58 | 59 | with tf.variable_scope("conv_1"): 60 | conv_1 = conv2d(x, [5, 5, 3, 64], [64], visualize=True) 61 | pool_1 = max_pool(conv_1) 62 | 63 | with tf.variable_scope("conv_2"): 64 | conv_2 = conv2d(pool_1, [5, 5, 64, 64], [64]) 65 | pool_2 = max_pool(conv_2) 66 | 67 | with tf.variable_scope("fc_1"): 68 | 69 | dim = 1 70 | for d in pool_2.get_shape()[1:].as_list(): 71 | dim *= d 72 | 73 | pool_2_flat = tf.reshape(pool_2, [-1, dim]) 74 | fc_1 = layer(pool_2_flat, [dim, 384], [384]) 75 | 76 | # apply dropout 77 | fc_1_drop = tf.nn.dropout(fc_1, keep_prob) 78 | 79 | with tf.variable_scope("fc_2"): 80 | 81 | fc_2 = layer(fc_1_drop, [384, 192], [192]) 82 | 83 | # apply dropout 84 | fc_2_drop = tf.nn.dropout(fc_2, keep_prob) 85 | 86 | with tf.variable_scope("output"): 87 | output = layer(fc_2_drop, [192, 10], [10]) 88 | 89 | return output 90 | 91 | 92 | def loss(output, y): 93 | xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(output, tf.cast(y, tf.int64)) 94 | loss = tf.reduce_mean(xentropy) 95 | return loss 96 | 97 | def training(cost, global_step): 98 | tf.scalar_summary("cost", cost) 99 | optimizer = tf.train.AdamOptimizer(learning_rate) 100 | train_op = optimizer.minimize(cost, global_step=global_step) 101 | return train_op 102 | 103 | def evaluate(output, y): 104 | correct_prediction = tf.equal(tf.cast(tf.argmax(output, 1), dtype=tf.int32), y) 105 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 106 | tf.scalar_summary("validation error", (1.0 - accuracy)) 107 | return accuracy 108 | 109 | if __name__ == '__main__': 110 | 111 | 112 | 113 | with tf.device("/gpu:0"): 114 | 115 | with tf.Graph().as_default(): 116 | 117 | with tf.variable_scope("cifar_conv_model"): 118 | 119 | x = tf.placeholder("float", [None, 24, 24, 3]) 120 | y = tf.placeholder("int32", [None]) 121 | keep_prob = tf.placeholder(tf.float32) # dropout probability 122 | 123 | distorted_images, distorted_labels = distorted_inputs() 124 | val_images, val_labels = inputs() 125 | 126 | output = inference(x, keep_prob) 127 | 128 | cost = loss(output, y) 129 | 130 | global_step = tf.Variable(0, name='global_step', trainable=False) 131 | 132 | train_op = training(cost, global_step) 133 | 134 | eval_op = evaluate(output, y) 135 | 136 | summary_op = tf.merge_all_summaries() 137 | 138 | saver = tf.train.Saver() 139 | 140 | sess = tf.Session() 141 | 142 | summary_writer = tf.train.SummaryWriter("conv_cifar_logs/", 143 | graph_def=sess.graph_def) 144 | 145 | 146 | init_op = tf.initialize_all_variables() 147 | 148 | sess.run(init_op) 149 | 150 | tf.train.start_queue_runners(sess=sess) 151 | 152 | # Training cycle 153 | for epoch in range(training_epochs): 154 | 155 | avg_cost = 0. 156 | total_batch = int(cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN/batch_size) 157 | # Loop over all batches 158 | for i in range(total_batch): 159 | # Fit training using batch data 160 | 161 | train_x, train_y = sess.run([distorted_images, distorted_labels]) 162 | 163 | _, new_cost = sess.run([train_op, cost], feed_dict={x: train_x, y: train_y, keep_prob: 0.5}) 164 | # Compute average loss 165 | avg_cost += new_cost/total_batch 166 | # print "Epoch %d, minibatch %d of %d. Cost = %0.4f." %(epoch, i, total_batch, new_cost) 167 | 168 | # Display logs per epoch step 169 | if epoch % display_step == 0: 170 | print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost) 171 | 172 | val_x, val_y = sess.run([val_images, val_labels]) 173 | 174 | accuracy = sess.run(eval_op, feed_dict={x: val_x, y: val_y, keep_prob: 1}) 175 | 176 | print "Validation Error:", (1 - accuracy) 177 | 178 | summary_str = sess.run(summary_op, feed_dict={x: train_x, y: train_y, keep_prob: 1}) 179 | summary_writer.add_summary(summary_str, sess.run(global_step)) 180 | 181 | saver.save(sess, "conv_cifar_logs/model-checkpoint", global_step=global_step) 182 | 183 | 184 | print "Optimization Finished!" 185 | 186 | val_x, val_y = sess.run([val_images, val_labels]) 187 | accuracy = sess.run(eval_op, feed_dict={x: val_x, y: val_y, keep_prob: 1}) 188 | 189 | print "Test Accuracy:", accuracy 190 | -------------------------------------------------------------------------------- /first_edition_archive/archive/convnet_mnist.py: -------------------------------------------------------------------------------- 1 | import input_data 2 | mnist = input_data.read_data_sets("data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import time 6 | 7 | # Architecture 8 | n_hidden_1 = 256 9 | n_hidden_2 = 256 10 | 11 | # Parameters 12 | learning_rate = 0.0001 13 | training_epochs = 1000 14 | batch_size = 100 15 | display_step = 1 16 | 17 | def conv2d(input, weight_shape, bias_shape): 18 | incoming = weight_shape[0] * weight_shape[1] * weight_shape[2] 19 | weight_init = tf.random_normal_initializer(stddev=(2.0/incoming)**0.5) 20 | W = tf.get_variable("W", weight_shape, initializer=weight_init) 21 | bias_init = tf.constant_initializer(value=0) 22 | b = tf.get_variable("b", bias_shape, initializer=bias_init) 23 | return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding='SAME'), b)) 24 | 25 | def max_pool(input, k=2): 26 | return tf.nn.max_pool(input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME') 27 | 28 | def layer(input, weight_shape, bias_shape): 29 | weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5) 30 | bias_init = tf.constant_initializer(value=0) 31 | W = tf.get_variable("W", weight_shape, 32 | initializer=weight_init) 33 | b = tf.get_variable("b", bias_shape, 34 | initializer=bias_init) 35 | return tf.nn.relu(tf.matmul(input, W) + b) 36 | 37 | 38 | def inference(x, keep_prob): 39 | 40 | x = tf.reshape(x, shape=[-1, 28, 28, 1]) 41 | with tf.variable_scope("conv_1"): 42 | conv_1 = conv2d(x, [5, 5, 1, 32], [32]) 43 | pool_1 = max_pool(conv_1) 44 | 45 | with tf.variable_scope("conv_2"): 46 | conv_2 = conv2d(pool_1, [5, 5, 32, 64], [64]) 47 | pool_2 = max_pool(conv_2) 48 | 49 | with tf.variable_scope("fc"): 50 | pool_2_flat = tf.reshape(pool_2, [-1, 7 * 7 * 64]) 51 | fc_1 = layer(pool_2_flat, [7*7*64, 1024], [1024]) 52 | 53 | # apply dropout 54 | fc_1_drop = tf.nn.dropout(fc_1, keep_prob) 55 | 56 | with tf.variable_scope("output"): 57 | output = layer(fc_1_drop, [1024, 10], [10]) 58 | 59 | return output 60 | 61 | 62 | def loss(output, y): 63 | xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y) 64 | loss = tf.reduce_mean(xentropy) 65 | return loss 66 | 67 | def training(cost, global_step): 68 | tf.scalar_summary("cost", cost) 69 | optimizer = tf.train.AdamOptimizer(learning_rate) 70 | train_op = optimizer.minimize(cost, global_step=global_step) 71 | return train_op 72 | 73 | 74 | def evaluate(output, y): 75 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 76 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 77 | tf.scalar_summary("validation error", (1.0 - accuracy)) 78 | return accuracy 79 | 80 | if __name__ == '__main__': 81 | 82 | with tf.device("/gpu:0"): 83 | 84 | with tf.Graph().as_default(): 85 | 86 | with tf.variable_scope("mnist_conv_model"): 87 | 88 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 89 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 90 | keep_prob = tf.placeholder(tf.float32) # dropout probability 91 | 92 | output = inference(x, keep_prob) 93 | 94 | cost = loss(output, y) 95 | 96 | global_step = tf.Variable(0, name='global_step', trainable=False) 97 | 98 | train_op = training(cost, global_step) 99 | 100 | eval_op = evaluate(output, y) 101 | 102 | summary_op = tf.merge_all_summaries() 103 | 104 | saver = tf.train.Saver() 105 | 106 | sess = tf.Session() 107 | 108 | summary_writer = tf.train.SummaryWriter("conv_mnist_logs/", 109 | graph_def=sess.graph_def) 110 | 111 | 112 | init_op = tf.initialize_all_variables() 113 | 114 | sess.run(init_op) 115 | 116 | 117 | # Training cycle 118 | for epoch in range(training_epochs): 119 | 120 | avg_cost = 0. 121 | total_batch = int(mnist.train.num_examples/batch_size) 122 | # Loop over all batches 123 | for i in range(total_batch): 124 | minibatch_x, minibatch_y = mnist.train.next_batch(batch_size) 125 | # Fit training using batch data 126 | sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5}) 127 | # Compute average loss 128 | avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})/total_batch 129 | # Display logs per epoch step 130 | if epoch % display_step == 0: 131 | print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost) 132 | 133 | accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels, keep_prob: 1}) 134 | 135 | print "Validation Error:", (1 - accuracy) 136 | 137 | summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5}) 138 | summary_writer.add_summary(summary_str, sess.run(global_step)) 139 | 140 | saver.save(sess, "conv_mnist_logs/model-checkpoint", global_step=global_step) 141 | 142 | 143 | print "Optimization Finished!" 144 | 145 | 146 | accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1}) 147 | 148 | print "Test Accuracy:", accuracy 149 | -------------------------------------------------------------------------------- /first_edition_archive/archive/dnc/mem_ops.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def init_memory(N, W, R): 5 | """ 6 | returns the initial values of the memory matrix, usage vector, 7 | precedence vector, link matrix, read weightings, write weightings, 8 | and the read vectors 9 | """ 10 | 11 | M0 = tf.fill([N, W], 1e-6) 12 | u0 = tf.zeros([N]) 13 | p0 = tf.zeros([N]) 14 | L0 = tf.zeros([N, N]) 15 | wr0 = tf.fill([N, R], 1e-6) # initial read weightings 16 | ww0 = tf.fill([N], 1e-6) # initial write weightings 17 | r0 = tf.fill([W, R], 1e-6) # initial read vector 18 | 19 | return M0, u0, p0, L0, wr0, ww0, r0 20 | 21 | 22 | def parse_interface(zeta, N, W, R): 23 | """ 24 | returns the individual components of the interface vector 25 | """ 26 | cursor = 0 # keeps track of how far we parsed into zeta 27 | kr, cursor = tf.reshape(zeta[cursor:cursor + W*R], [W, R]), cursor + W*R 28 | br, cursor = zeta[cursor:cursor + R], cursor + R 29 | kw, cursor = tf.reshape(zeta[cursor: cursor + W], [W, 1]), cursor + W 30 | bw, cursor = zeta[cursor], cursor + 1 31 | e, cursor = zeta[cursor: cursor + W], cursor + W 32 | v, cursor = zeta[cursor: cursor + W], cursor + W 33 | f, cursor = zeta[cursor: cursor + R], cursor + R 34 | ga, cursor = zeta[cursor], cursor + 1 35 | gw, cursor = zeta[cursor], cursor + 1 36 | pi = tf.reshape(zeta[cursor:], [3, R]) 37 | 38 | # transforming the parsed components into their correct values 39 | oneplus = lambda z: 1 + tf.nn.softplus(z) 40 | 41 | e = tf.nn.sigmoid(e) 42 | f = tf.nn.sigmoid(f) 43 | ga = tf.nn.sigmoid(ga) 44 | gw = tf.nn.sigmoid(gw) 45 | br = oneplus(br) 46 | bw = oneplus(bw) 47 | pi = tf.nn.softmax(pi, 0) 48 | 49 | return kr, br, kw, bw, e, v, f, ga, gw, pi 50 | 51 | 52 | def C(M, k, b): 53 | """ 54 | Content-based addressing weightings 55 | """ 56 | M_normalized = tf.nn.l2_normalize(M, 1) 57 | k_normalized = tf.nn.l2_normalize(k, 0) 58 | similarity = tf.matmul(M_normalized, k_normalized) 59 | 60 | return tf.nn.softmax(similarity * b, 0) 61 | 62 | 63 | def ut(u, f, wr, ww): 64 | """ 65 | returns the updated usage vector given the previous one along with 66 | free gates and previous read and write weightings 67 | """ 68 | psi_t = tf.reduce_prod(1 - f * wr, 1) 69 | return (u + ww - u * ww) * psi_t 70 | 71 | 72 | def at(ut, N): 73 | """ 74 | returns the allocation weighting given the updated usage vector 75 | """ 76 | sorted_ut, free_list = tf.nn.top_k(-1 * ut, N) 77 | sorted_ut *= -1 # brings the usages to the original positive values 78 | 79 | # the exclusive argument makes the first element in the cumulative 80 | # product a 1 instead of the first element in the given tensor 81 | sorted_ut_cumprod = tf.cumprod(sorted_ut, exclusive=True) 82 | out_of_location_at = (1 - sorted_ut) * sorted_ut_cumprod 83 | 84 | empty_at_container = tf.TensorArray(tf.float32, N) 85 | full_at_container = empty_at_container.scatter(free_list, out_of_location_at) 86 | 87 | return full_at_container.pack() 88 | 89 | 90 | def wwt(ct, at, gw, ga): 91 | """ 92 | returns the upadted write weightings given allocation and content-based 93 | weightings along with the write and allocation gates 94 | """ 95 | ct = tf.squeeze(ct) 96 | return gw * (ga * at + (1 - ga) * ct) 97 | 98 | 99 | def Lt(L, wwt, p, N): 100 | """ 101 | returns the updated link matrix given the previous one along with 102 | the updated write weightings and the previous precedence vector 103 | """ 104 | def pairwise_add(v): 105 | """ 106 | returns the matrix of pairwe-adding the elements of v to themselves 107 | """ 108 | n = v.get_shape().as_list()[0] 109 | V = tf.concat(1, [v] * n) # a NxN matrix of duplicates of u along the columns 110 | return V + V 111 | 112 | # expand dimensions of wwt and p to make matmul behave as outer product 113 | wwt = tf.expand_dims(wwt, 1) 114 | p = tf.expand_dims(p, 0) 115 | 116 | I = tf.constant(np.identity(N, dtype=np.float32)) 117 | return ((1 - pairwise_add(wwt)) * L + tf.matmul(wwt, p)) * (1 - I) 118 | 119 | 120 | def pt(wwt, p): 121 | """ 122 | returns the updated precedence vector given the new write weightings and 123 | the previous precedence vector 124 | """ 125 | return (1 - tf.reduce_sum(wwt)) * p + wwt 126 | 127 | 128 | def Mt(M, wwt, e, v): 129 | """ 130 | returns the updated memory matrix given the previous one, the new write 131 | weightings, and the erase and write vectors 132 | """ 133 | # expand the dims of wwt, e, and v to make matmul 134 | # behave as outer product 135 | wwt = tf.expand_dims(wwt, 1) 136 | e = tf.expand_dims(e, 0) 137 | v = tf.expand_dims(v, 0) 138 | 139 | return M * (1 - tf.matmul(wwt, e)) + tf.matmul(wwt, v) 140 | 141 | 142 | def wrt(wr, Lt, ct, pi): 143 | """ 144 | returns the updated read weightings given the previous ones, the new link 145 | matrix, a content-based weighting, and the read modes 146 | """ 147 | ft = tf.matmul(Lt, wr) 148 | bt = tf.matmul(Lt, wr, transpose_a=True) 149 | 150 | return pi[0] * bt + pi[1] * ct + pi[2] * ft 151 | 152 | 153 | def rt(Mt, wrt): 154 | """ 155 | returns the new read vectors given the new memory matrix and the new read 156 | weightings 157 | """ 158 | return tf.matmul(Mt, wrt, transpose_a=True) 159 | -------------------------------------------------------------------------------- /first_edition_archive/archive/dnc/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pickle 3 | import getopt 4 | import urllib2 5 | import tarfile 6 | import numpy as np 7 | from shutil import rmtree 8 | from os import listdir, mkdir 9 | from os.path import join, isfile, isdir, dirname, basename, normpath, realpath, exists, getsize 10 | 11 | def llprint(message): 12 | sys.stdout.write(message) 13 | sys.stdout.flush() 14 | 15 | 16 | def create_dictionary(files_list): 17 | """ 18 | creates a dictionary of unique lexicons in the dataset and their mapping to numbers 19 | 20 | Parameters: 21 | ---------- 22 | files_list: list 23 | the list of files to scan through 24 | 25 | Returns: dict 26 | the constructed dictionary of lexicons 27 | """ 28 | 29 | lexicons_dict = {} 30 | id_counter = 0 31 | 32 | llprint("Creating Dictionary ... 0/%d" % (len(files_list))) 33 | 34 | for indx, filename in enumerate(files_list): 35 | with open(filename, 'r') as fobj: 36 | for line in fobj: 37 | 38 | # first seperate . and ? away from words into seperate lexicons 39 | line = line.replace('.', ' .') 40 | line = line.replace('?', ' ?') 41 | line = line.replace(',', ' ') 42 | 43 | for word in line.split(): 44 | if not word.lower() in lexicons_dict and word.isalpha(): 45 | lexicons_dict[word.lower()] = id_counter 46 | id_counter += 1 47 | 48 | llprint("\rCreating Dictionary ... %d/%d" % ((indx + 1), len(files_list))) 49 | 50 | print "\rCreating Dictionary ... Done!" 51 | return lexicons_dict 52 | 53 | 54 | def encode_data(files_list, lexicons_dictionary, length_limit=None): 55 | """ 56 | encodes the dataset into its numeric form given a constructed dictionary 57 | 58 | Parameters: 59 | ---------- 60 | files_list: list 61 | the list of files to scan through 62 | lexicons_dictionary: dict 63 | the mappings of unique lexicons 64 | 65 | Returns: tuple (dict, int) 66 | the data in its numeric form, maximum story length 67 | """ 68 | 69 | files = {} 70 | story_inputs = None 71 | story_outputs = None 72 | stories_lengths = [] 73 | answers_flag = False # a flag to specify when to put data into outputs list 74 | limit = length_limit if not length_limit is None else float("inf") 75 | 76 | llprint("Encoding Data ... 0/%d" % (len(files_list))) 77 | 78 | for indx, filename in enumerate(files_list): 79 | 80 | files[filename] = [] 81 | 82 | with open(filename, 'r') as fobj: 83 | for line in fobj: 84 | 85 | # first seperate . and ? away from words into seperate lexicons 86 | line = line.replace('.', ' .') 87 | line = line.replace('?', ' ?') 88 | line = line.replace(',', ' ') 89 | 90 | answers_flag = False # reset as answers end by end of line 91 | 92 | for i, word in enumerate(line.split()): 93 | 94 | if word == '1' and i == 0: 95 | # beginning of a new story 96 | if not story_inputs is None: 97 | stories_lengths.append(len(story_inputs)) 98 | if len(story_inputs) <= limit: 99 | files[filename].append({ 100 | 'inputs':story_inputs, 101 | 'outputs': story_outputs 102 | }) 103 | story_inputs = [] 104 | story_outputs = [] 105 | 106 | if word.isalpha() or word == '?' or word == '.': 107 | if not answers_flag: 108 | story_inputs.append(lexicons_dictionary[word.lower()]) 109 | else: 110 | story_inputs.append(lexicons_dictionary['-']) 111 | story_outputs.append(lexicons_dictionary[word.lower()]) 112 | 113 | # set the answers_flags if a question mark is encountered 114 | if not answers_flag: 115 | answers_flag = (word == '?') 116 | 117 | llprint("\rEncoding Data ... %d/%d" % (indx + 1, len(files_list))) 118 | 119 | print "\rEncoding Data ... Done!" 120 | return files, stories_lengths 121 | 122 | 123 | if __name__ == '__main__': 124 | task_dir = dirname(realpath(__file__)) 125 | options,_ = getopt.getopt(sys.argv[1:], '', ['length_limit=']) 126 | data_dir = join(task_dir, "../data/babi-en-10k/") 127 | joint_train = True 128 | length_limit = None 129 | files_list = [] 130 | 131 | if not exists(join(task_dir, 'data')): 132 | mkdir(join(task_dir, 'data')) 133 | 134 | for opt in options: 135 | if opt[0] == '--length_limit': 136 | length_limit = int(opt[1]) 137 | 138 | """if data_dir is None: 139 | raise ValueError("data_dir argument cannot be None")""" 140 | 141 | for entryname in listdir(data_dir): 142 | entry_path = join(data_dir, entryname) 143 | if isfile(entry_path): 144 | files_list.append(entry_path) 145 | 146 | lexicon_dictionary = create_dictionary(files_list) 147 | lexicon_count = len(lexicon_dictionary) 148 | 149 | # append used punctuation to dictionary 150 | lexicon_dictionary['?'] = lexicon_count 151 | lexicon_dictionary['.'] = lexicon_count + 1 152 | lexicon_dictionary['-'] = lexicon_count + 2 153 | 154 | encoded_files, stories_lengths = encode_data(files_list, lexicon_dictionary, length_limit) 155 | 156 | stories_lengths = np.array(stories_lengths) 157 | length_limit = np.max(stories_lengths) if length_limit is None else length_limit 158 | print "Total Number of stories: %d" % (len(stories_lengths)) 159 | print "Number of stories with lengthes > %d: %d (%% %.2f) [discarded]" % (length_limit, np.sum(stories_lengths > length_limit), np.mean(stories_lengths > length_limit) * 100.0) 160 | print "Number of Remaining Stories: %d" % (len(stories_lengths[stories_lengths <= length_limit])) 161 | 162 | processed_data_dir = join(task_dir, 'data', basename(normpath(data_dir))) 163 | train_data_dir = join(processed_data_dir, 'train') 164 | test_data_dir = join(processed_data_dir, 'test') 165 | if exists(processed_data_dir) and isdir(processed_data_dir): 166 | rmtree(processed_data_dir) 167 | 168 | mkdir(processed_data_dir) 169 | mkdir(train_data_dir) 170 | mkdir(test_data_dir) 171 | 172 | llprint("Saving processed data to disk ... ") 173 | 174 | pickle.dump(lexicon_dictionary, open(join(processed_data_dir, 'lexicon-dict.pkl'), 'wb')) 175 | 176 | joint_train_data = [] 177 | 178 | for filename in encoded_files: 179 | if filename.endswith("test.txt"): 180 | pickle.dump(encoded_files[filename], open(join(test_data_dir, basename(filename) + '.pkl'), 'wb')) 181 | elif filename.endswith("train.txt"): 182 | joint_train_data.extend(encoded_files[filename]) 183 | 184 | pickle.dump(joint_train_data, open(join(train_data_dir, 'train.pkl'), 'wb')) 185 | 186 | llprint("Done!\n") 187 | -------------------------------------------------------------------------------- /first_edition_archive/archive/dnc/train_babi.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import getopt 4 | import shutil 5 | import pickle 6 | import sys 7 | import os 8 | 9 | from mem_ops import * 10 | 11 | def llprint(message): 12 | sys.stdout.write(message) 13 | sys.stdout.flush() 14 | 15 | def load(path): 16 | return pickle.load(open(path, 'rb')) 17 | 18 | def onehot(index, size): 19 | vec = np.zeros(size, dtype=np.float32) 20 | index = int(index) 21 | vec[index] = 1.0 22 | return vec 23 | 24 | def prepare_sample(sample, target_code, word_space_size): 25 | """ 26 | prepares the input/output sequence of a sample story by encoding it 27 | into one-hot vectors and generates the necessary loss weights 28 | """ 29 | input_vec = np.array(sample[0]['inputs'], dtype=np.float32) 30 | output_vec = np.array(sample[0]['inputs'], dtype=np.float32) 31 | seq_len = input_vec.shape[0] 32 | weights_vec = np.zeros(seq_len, dtype=np.float32) 33 | 34 | target_mask = (input_vec == target_code) 35 | output_vec[target_mask] = sample[0]['outputs'] 36 | weights_vec[target_mask] = 1.0 37 | 38 | input_vec = np.array([onehot(code, word_space_size) for code in input_vec]) 39 | output_vec = np.array([onehot(code, word_space_size) for code in output_vec]) 40 | 41 | return ( 42 | np.reshape(input_vec, (-1, word_space_size)), 43 | np.reshape(output_vec, (-1, word_space_size)), 44 | seq_len, 45 | np.reshape(weights_vec, (-1, 1)) 46 | ) 47 | 48 | task_dir = os.path.dirname(os.path.realpath(__file__)) 49 | llprint("Loading Data ... ") 50 | lexicon_dict = load(os.path.join(task_dir, "data/babi-en-10k/lexicon-dict.pkl")) 51 | data = load(os.path.join(task_dir, "data/babi-en-10k/train/train.pkl")) 52 | llprint("Done!\n") 53 | 54 | # the model parameters 55 | N = 256; W = 64; R = 4 # memory parameters 56 | X = Y = 159 # input/output size 57 | NN = 256 # controller's network output size 58 | zeta_size = R*W + 3*W + 5*R + 3 59 | # training parameters 60 | iterations = 100000 61 | learning_rate = 1e-4 62 | momentum = 0.9 63 | 64 | def network(step_input, state): 65 | """ 66 | defines the recurrent neural network operation 67 | """ 68 | global NN 69 | step_input = tf.expand_dims(step_input, 0) 70 | lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(NN) 71 | 72 | return lstm_cell(step_input, state) 73 | 74 | # START: Computaional Graph 75 | graph = tf.Graph() 76 | with graph.as_default(): 77 | # optimizer 78 | optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum) 79 | 80 | # placeholders 81 | input_data = tf.placeholder(tf.float32, [None, X]) 82 | target_output = tf.placeholder(tf.float32, [None, Y]) 83 | loss_weights = tf.placeholder(tf.float32, [None, 1]) 84 | sequence_length = tf.placeholder(tf.int32) 85 | 86 | initial_nn_state = tf.nn.rnn_cell.BasicLSTMCell(NN).zero_state(1, tf.float32) 87 | 88 | empty_unpacked_inputs = tf.TensorArray(tf.float32, sequence_length) 89 | unpacked_inputs = empty_unpacked_inputs.unpack(input_data) 90 | outputs_container = tf.TensorArray(tf.float32, sequence_length) # accumelates the step outputs 91 | t = tf.constant(0, dtype=tf.int32) 92 | 93 | def step_op(time, memory_state, controller_state, inputs, outputs): 94 | """ 95 | defines the operation of one step of the sequence 96 | """ 97 | global N, W, R 98 | 99 | step_input = inputs.read(time) 100 | M, u, p, L, wr, ww, r = memory_state 101 | 102 | with tf.variable_scope('controller'): 103 | Xt = tf.concat(0, [step_input, tf.reshape(r, [-1])]) 104 | nn_output, nn_state = network(Xt, controller_state) 105 | std = lambda input_size: np.min(0.01, np.sqrt(2. / input_size)) 106 | W_y = tf.get_variable('W_y', [NN, Y], tf.float32, tf.truncated_normal_initializer(stddev=std(NN))) 107 | W_zeta = tf.get_variable('W_zeta', [NN, zeta_size], tf.float32, tf.truncated_normal_initializer(stddev=std(NN))) 108 | 109 | pre_output = tf.matmul(nn_output, W_y) 110 | zeta = tf.squeeze(tf.matmul(nn_output, W_zeta)) 111 | kr, br, kw, bw, e, v, f, ga, gw, pi = parse_interface(zeta, N, W, R) 112 | 113 | # write head operations 114 | u_t = ut(u, f, wr, ww) 115 | a_t = at(u_t, N) 116 | cw_t = C(M, kw, bw) 117 | ww_t = wwt(cw_t, a_t, gw, ga) 118 | M_t = Mt(M, ww_t, e, v) 119 | L_t = Lt(L, ww_t, p, N) 120 | p_t = pt(ww_t, p) 121 | 122 | # read heads operations 123 | cr_t = C(M_t, kr, br) 124 | wr_t = wrt(wr, L_t, cr_t, pi) 125 | r_t = rt(M_t, wr_t) 126 | 127 | W_r = tf.get_variable('W_r', [W*R, Y], tf.float32, tf.truncated_normal_initializer(stddev=std(W*R))) 128 | flat_rt = tf.reshape(r_t, [-1]) 129 | final_output = pre_output + tf.matmul(tf.expand_dims(flat_rt, 0), W_r) 130 | updated_outputs = outputs.write(time, tf.squeeze(final_output)) 131 | 132 | return time + 1, (M_t, u_t, p_t, L_t, wr_t, ww_t, r_t), nn_state, inputs, updated_outputs 133 | 134 | _, _, _, _, final_outputs = tf.while_loop( 135 | cond = lambda time, *_: time < sequence_length, 136 | body = step_op, 137 | loop_vars=(t, init_memory(N,W,R), initial_nn_state, unpacked_inputs, outputs_container), 138 | parallel_iterations=32, 139 | swap_memory=True 140 | ) 141 | 142 | # pack the individual steps outputs into a single (sequence_length x Y) tensor 143 | packed_output = final_outputs.pack() 144 | 145 | loss = tf.reduce_mean( 146 | loss_weights * tf.nn.softmax_cross_entropy_with_logits(packed_output, target_output) 147 | ) 148 | gradients = optimizer.compute_gradients(loss) 149 | # clipping the gradients value to avoid explosion 150 | for i, (grad, var) in enumerate(gradients): 151 | if grad is not None: 152 | gradients[i] = (tf.clip_by_value(grad, -10, 10), var) 153 | apply_grads = optimizer.apply_gradients(gradients) 154 | # END: Computational Graph 155 | 156 | # Reading command line arguments and adapting parameters 157 | options,_ = getopt.getopt(sys.argv[1:], '', ['iterations=']) 158 | for opt in options: 159 | iterations = int(opt[1]) 160 | 161 | with tf.Session(graph=graph) as session: 162 | 163 | session.run(tf.initialize_all_variables()) 164 | 165 | last_100_losses = [] 166 | print "" 167 | for i in range(iterations): 168 | 169 | llprint("\rIteration %d/%d" % (i, iterations)) 170 | 171 | sample = np.random.choice(data, 1) 172 | input_seq, target_seq, seq_len, weights = prepare_sample(sample, lexicon_dict['-'], 159) 173 | 174 | loss_value,_, = session.run([loss, apply_grads], feed_dict={ 175 | input_data: input_seq, 176 | target_output: target_seq, 177 | sequence_length: seq_len, 178 | loss_weights: weights 179 | }) 180 | 181 | last_100_losses.append(loss_value) 182 | if i % 100 == 0: 183 | print "\n\tAvg. Cross-Entropy Loss: %.6f" % (np.mean(last_100_losses)) 184 | last_100_losses = [] 185 | 186 | model_path = os.path.join(task_dir, 'babi-model') 187 | if os.path.exists(model_path): 188 | shutil.rmtree(model_path) 189 | os.mkdir(model_path) 190 | tf.train.Saver().save(session, os.path.join(model_path, 'model.ckpt')) 191 | -------------------------------------------------------------------------------- /first_edition_archive/archive/download_tweets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import urllib 5 | import re 6 | import json 7 | 8 | import socket 9 | socket.setdefaulttimeout(10) 10 | 11 | from bs4 import BeautifulSoup 12 | 13 | cache = {} 14 | 15 | for line in open(sys.argv[1]): 16 | fields = line.rstrip('\n').split('\t') 17 | sid = fields[0] 18 | uid = fields[1] 19 | 20 | #url = 'http://twitter.com/%s/status/%s' % (uid, sid) 21 | #print url 22 | 23 | tweet = None 24 | text = "Not Available" 25 | if cache.has_key(sid): 26 | text = cache[sid] 27 | else: 28 | try: 29 | f = urllib.urlopen("http://twitter.com/%s/status/%s" % (uid, sid)) 30 | #Thanks to Arturo! 31 | html = f.read().replace("", "") + "" 32 | soup = BeautifulSoup(html) 33 | 34 | jstt = soup.find_all("p", "js-tweet-text") 35 | tweets = list(set([x.get_text() for x in jstt])) 36 | #print len(tweets) 37 | #print tweets 38 | if(len(tweets)) > 1: 39 | continue 40 | 41 | text = tweets[0] 42 | cache[sid] = tweets[0] 43 | 44 | for j in soup.find_all("input", "json-data", id="init-data"): 45 | js = json.loads(j['value']) 46 | if(js.has_key("embedData")): 47 | tweet = js["embedData"]["status"] 48 | text = js["embedData"]["status"]["text"] 49 | cache[sid] = text 50 | break 51 | except Exception: 52 | continue 53 | 54 | if(tweet != None and tweet["id_str"] != sid): 55 | text = "Not Available" 56 | cache[sid] = "Not Available" 57 | text = text.replace('\n', ' ',) 58 | text = re.sub(r'\s+', ' ', text) 59 | #print json.dumps(tweet, indent=2) 60 | print "\t".join(fields + [text]).encode('utf-8') 61 | -------------------------------------------------------------------------------- /first_edition_archive/archive/feed_forward_network-[THEANO]/feed_forward_network.py: -------------------------------------------------------------------------------- 1 | """ 2 | We will use this class to represent a simple feedforward neural 3 | network. Here, we'll use this class to crack the MNIST handwritten 4 | digit dataset problem, but this class has been constructed so 5 | that it can be reappropriated to any use! 6 | 7 | References: 8 | - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2 9 | - websites: http://deeplearning.net/tutorial, Lisa Lab 10 | """ 11 | 12 | import numpy as np 13 | import theano.tensor as T 14 | import theano 15 | 16 | from hidden_layer import HiddenLayer as HL 17 | from softmax_layer import SoftmaxLayer as SL 18 | 19 | 20 | 21 | class FeedForwardNetwork(object): 22 | """ 23 | The feed forward neural network is described mostly input 24 | data in the form of a minibatch, a list of hidden layers, 25 | and a softmax layer to make predictions. 26 | """ 27 | 28 | def __init__ (self, random_gen, input, input_dim, output_dim, hidden_layer_sizes): 29 | """ 30 | We first initialize the feed forward network with some important 31 | information. 32 | 33 | PARAM random_gen : numpy.random.RandomState 34 | A random number generator used to properly initialize the weights 35 | of this neural network 36 | 37 | PARAM input : theano.tensor.TensorType 38 | A symbolic variable that we'll use a minibatch of data 39 | 40 | PARAM input_dim : int 41 | This will represent the number of input neurons in our model (size 42 | of a single training example's input vector) 43 | 44 | PARAM ouptut_dim : int 45 | This will represent the number of neurons in the output layer (i.e. 46 | the number of possible classifications for the input) 47 | 48 | Param hidden_layers : List[int] 49 | This will represent an ordered list of number of neurons in each 50 | hidden layer of our network. The first element corresponds to the 51 | first hidden layer and the last element corresponds to the last. 52 | This list cannot be empty 53 | """ 54 | 55 | # We'll keep track of these sizes internally in case we need them later 56 | self.hidden_layer_sizes = hidden_layer_sizes 57 | 58 | # Now we'll build all of our hidden layers 59 | self.hidden_layers = [] 60 | for i in xrange(len(hidden_layer_sizes)): 61 | if i == 0: 62 | hidden_layer = HL( 63 | input=input, 64 | input_dim=input_dim, 65 | output_dim=hidden_layer_sizes[i], 66 | random_gen=random_gen, 67 | ) 68 | self.hidden_layers.append(hidden_layer) 69 | else: 70 | hidden_layer = HL( 71 | input=self.hidden_layers[i - 1].output, 72 | input_dim=hidden_layer_sizes[i - 1], 73 | output_dim=hidden_layer_sizes[i], 74 | random_gen=random_gen, 75 | ) 76 | 77 | self.softmax_layer = SL( 78 | input=self.hidden_layers[-1].output, 79 | input_dim=hidden_layer_sizes[-1], 80 | output_dim=output_dim 81 | ) 82 | 83 | # Let's grab the output of the softmax layer and use that as our output 84 | self.output = self.softmax_layer.output 85 | 86 | # Now let's look at what our final prediction should be 87 | self.predicted = T.argmax(self.output, axis=1) 88 | 89 | def feed_forward_network_cost(self, y, lambda_l2=0): 90 | """ 91 | Here we express the cost incurred by an example given the correct 92 | distribution 93 | 94 | PARAM y : theano.tensor.TensorType 95 | These are the correct answers, and we compute the cost with 96 | respect to this ground truth (over the entire minibatch). This 97 | means that y is of size (minibatch_size, output_dim) 98 | 99 | PARAM lambda : float 100 | This is the L2 regularization parameter that we use to penalize large 101 | values for components of W, thus discouraging potential overfitting 102 | """ 103 | # Calculate the log probabilities of the softmax output 104 | log_probabilities = T.log(self.output) 105 | 106 | # We use these log probabilities to compute the negative log likelihood 107 | negative_log_likelihood = -T.mean(log_probabilities[T.arange(y.shape[0]), y]) 108 | 109 | # Compute the L2 regularization component of the cost function 110 | hl_squared_sum = (self.hidden_layers[0].W ** 2).sum() 111 | for hidden_layer in self.hidden_layers[1:]: 112 | hl_squared_sum += (hidden_layer.W ** 2).sum() 113 | 114 | sl_squared_sum = (self.softmax_layer.W ** 2).sum() 115 | 116 | l2_regularization = lambda_l2 * (hl_squared_sum + sl_squared_sum) 117 | 118 | # Return a symbolic description of the cost function 119 | return negative_log_likelihood + l2_regularization 120 | 121 | def error_rate(self, y): 122 | """ 123 | Here we return the error rate of the model over a set of given labels 124 | (perhaps in a minibatch, in the validation set, or the test set) 125 | 126 | PARAM y : theano.tensor.TensorType 127 | These are the correct answers, and we compute the cost with 128 | respect to this ground truth (over the entire minibatch). This 129 | means that y is of size (minibatch_size, output_dim) 130 | """ 131 | 132 | # Make sure y is of the correct dimension 133 | assert y.ndim == self.predicted.ndim 134 | 135 | # Make sure that y contains values of the correct data type (ints) 136 | assert y.dtype.startswith('int') 137 | 138 | # Return the error rate on the data 139 | return T.mean(T.neq(self.predicted, y)) 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /first_edition_archive/archive/feed_forward_network-[THEANO]/hidden_layer.py: -------------------------------------------------------------------------------- 1 | """ 2 | We will use this class to represent a tanh hidden layer. 3 | This will be a building block for a simplefeedforward neural 4 | network. 5 | 6 | References: 7 | - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2 8 | - websites: http://deeplearning.net/tutorial, Lisa Lab 9 | """ 10 | 11 | import numpy as np 12 | import theano.tensor as T 13 | import theano 14 | 15 | class HiddenLayer(object): 16 | """ 17 | The hidden layer class is described by two parameters (which 18 | we will want to learn). The first is a incoming weight matrix. 19 | We'll refer to this weight matrix as W. The second is a bias 20 | vector b. Refer to the text if you want to learn more about how 21 | this layer works. Let's get started! 22 | """ 23 | 24 | def __init__(self, input, input_dim, output_dim, random_gen): 25 | """ 26 | We first initialize the hidden layer object with some important 27 | information. 28 | 29 | PARAM input : theano.tensor.TensorType 30 | A symbolic variable that we'll use to describe incoming data from 31 | the previous layer 32 | 33 | PARAM input_dim : int 34 | This will represent the number of neurons in the previous layer 35 | 36 | PARAM ouptut_dim : int 37 | This will represent the number of neurons in the hidden layer 38 | 39 | PARAM random_gen : numpy.random.RandomState 40 | A random number generator used to properly initialize the weights. 41 | For a tanh activation function, the literature suggests that the 42 | incoming weights should be sampled from the uniform distribution 43 | [-sqrt(6./(input_dim + output_dim)), sqrt(6./(input_dim + output_dim)] 44 | """ 45 | 46 | # We initialize the weight matrix W of size (input_dim, output_dim) 47 | self.W = theano.shared( 48 | value=np.asarray( 49 | random_gen.uniform( 50 | low=-np.sqrt(6. / (input_dim + output_dim)), 51 | high=np.sqrt(6. / (input_dim + output_dim)), 52 | size=(input_dim, output_dim) 53 | ), 54 | dtype=theano.config.floatX 55 | ), 56 | name='W', 57 | borrow=True 58 | ) 59 | 60 | # We initialize a bias vector for the neurons of the output layer 61 | self.b = theano.shared( 62 | value=np.zeros(output_dim), 63 | name='b', 64 | borrow='True' 65 | ) 66 | 67 | # Symbolic description of the incoming logits 68 | logit = T.dot(input, self.W) + self.b 69 | 70 | # Symbolic description of the outputs of the hidden layer neurons 71 | self.output = T.tanh(logit) 72 | 73 | -------------------------------------------------------------------------------- /first_edition_archive/archive/feed_forward_network-[THEANO]/softmax_layer.py: -------------------------------------------------------------------------------- 1 | """ 2 | We will use this class to represent a simple softmax layer. 3 | This will be a building block for a simplefeedforward neural 4 | network. 5 | 6 | References: 7 | - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2 8 | - websites: http://deeplearning.net/tutorial, Lisa Lab 9 | """ 10 | 11 | import numpy as np 12 | import theano.tensor as T 13 | import theano 14 | 15 | class SoftmaxLayer(object): 16 | """ 17 | The softmax layer class is described by two parameters (which 18 | we will want to learn). The first is a incoming weight matrix. 19 | We'll refer to this weight matrix as W. The second is a bias 20 | vector b. Refer to the text if you want to learn more about how 21 | this layer works. Let's get started! 22 | """ 23 | 24 | def __init__(self, input, input_dim, output_dim): 25 | """ 26 | We first initialize the softmax layer object with some important 27 | information. 28 | 29 | PARAM input : theano.tensor.TensorType 30 | A symbolic variable that we'll use to describe incoming data from 31 | the previous layer 32 | 33 | PARAM input_dim : int 34 | This will represent the number of neurons in the previous layer 35 | 36 | PARAM ouptut_dim : int 37 | This will represent the number of neurons in the softmax layer (i.e. 38 | the number of possible classifications for the input) 39 | """ 40 | 41 | # We initialize the weight matrix W of size (input_dim, output_dim) 42 | self.W = theano.shared( 43 | value=np.zeros((input_dim, output_dim)), 44 | name='W', 45 | borrow=True 46 | ) 47 | 48 | # We initialize a bias vector for the neurons of the output layer 49 | self.b = theano.shared( 50 | value=np.zeros(output_dim), 51 | name='b', 52 | borrow='True' 53 | ) 54 | 55 | # Symbolic description of how to compute class membership probabilities 56 | self.output = T.nnet.softmax(T.dot(input, self.W) + self.b) 57 | 58 | # Symbolic description of the final prediction 59 | self.predicted = T.argmax(self.output, axis=1) 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /first_edition_archive/archive/imdb_bn_lstm.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from lstm import BNLSTMCell 3 | import read_imdb_data as data 4 | 5 | training_epochs = 1000 6 | batch_size = 32 7 | display_step = 1 8 | 9 | def embedding_layer(input, weight_shape): 10 | weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5) 11 | E = tf.get_variable("E", weight_shape, 12 | initializer=weight_init) 13 | # E_exp = tf.expand_dims(E, 0) 14 | # E_tiled= tf.tile(E_exp, [32, 1, 1]) 15 | # return tf.batch_matmul(input, E_exp) 16 | incoming = tf.cast(input, tf.int32) 17 | embeddings = tf.nn.embedding_lookup(E, incoming) 18 | return embeddings 19 | 20 | def lstm(input, hidden_dim, keep_prob, phase_train): 21 | lstm = BNLSTMCell(hidden_dim, phase_train) 22 | lstm_outputs, state = tf.nn.dynamic_rnn(lstm, input, dtype=tf.float32) 23 | return tf.squeeze(tf.slice(lstm_outputs, [0, tf.shape(lstm_outputs)[1]-1, 0], [tf.shape(lstm_outputs)[0], 1, tf.shape(lstm_outputs)[2]])) 24 | 25 | def layer_batch_norm(x, n_out, phase_train): 26 | beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32) 27 | gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32) 28 | 29 | beta = tf.get_variable("beta", [n_out], initializer=beta_init) 30 | gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init) 31 | 32 | batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') 33 | ema = tf.train.ExponentialMovingAverage(decay=0.9) 34 | ema_apply_op = ema.apply([batch_mean, batch_var]) 35 | ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) 36 | def mean_var_with_update(): 37 | with tf.control_dependencies([ema_apply_op]): 38 | return tf.identity(batch_mean), tf.identity(batch_var) 39 | mean, var = tf.cond(phase_train, 40 | mean_var_with_update, 41 | lambda: (ema_mean, ema_var)) 42 | 43 | reshaped_x = tf.reshape(x, [-1, 1, 1, n_out]) 44 | normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var, 45 | beta, gamma, 1e-3, True) 46 | return tf.reshape(normed, [-1, n_out]) 47 | 48 | def layer(input, weight_shape, bias_shape, phase_train): 49 | weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5) 50 | bias_init = tf.constant_initializer(value=0) 51 | W = tf.get_variable("W", weight_shape, 52 | initializer=weight_init) 53 | b = tf.get_variable("b", bias_shape, 54 | initializer=bias_init) 55 | logits = tf.matmul(input, W) + b 56 | return tf.nn.sigmoid(layer_batch_norm(logits, weight_shape[1], phase_train)) 57 | 58 | def inference(input, phase_train): 59 | embedding = embedding_layer(input, [10000, 128]) 60 | lstm_output = lstm(embedding, 128, 0.8, phase_train) 61 | output = layer(lstm_output, [128, 2], [2], phase_train) 62 | return output 63 | 64 | def loss(output, y): 65 | xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y) 66 | loss = tf.reduce_mean(xentropy) 67 | train_loss_summary_op = tf.scalar_summary("train_cost", loss) 68 | val_loss_summary_op = tf.scalar_summary("val_cost", loss) 69 | return loss, train_loss_summary_op, val_loss_summary_op 70 | 71 | def training(cost, global_step): 72 | optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, 73 | use_locking=False, name='Adam') 74 | gvs = optimizer.compute_gradients(cost) 75 | capped_gvs = [(None if grad is None else tf.clip_by_value(grad, -10., 10.), var) for grad, var in gvs] 76 | train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) 77 | return train_op 78 | 79 | def evaluate(output, y): 80 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 81 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 82 | accuracy_summary_op = tf.scalar_summary("accuracy", accuracy) 83 | return accuracy, accuracy_summary_op 84 | 85 | if __name__ == '__main__': 86 | 87 | with tf.Graph().as_default(): 88 | with tf.device('/gpu:0'): 89 | x = tf.placeholder("float", [None, 100]) 90 | y = tf.placeholder("float", [None, 2]) 91 | phase_train = tf.placeholder(tf.bool) 92 | 93 | output = inference(x, phase_train) 94 | 95 | cost, train_loss_summary_op, val_loss_summary_op = loss(output, y) 96 | 97 | global_step = tf.Variable(0, name='global_step', trainable=False) 98 | 99 | train_op = training(cost, global_step) 100 | 101 | eval_op, eval_summary_op = evaluate(output, y) 102 | 103 | saver = tf.train.Saver(max_to_keep=100) 104 | 105 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) 106 | 107 | summary_writer = tf.train.SummaryWriter("imdb_bnlstm_logs/", 108 | graph=sess.graph) 109 | 110 | init_op = tf.initialize_all_variables() 111 | 112 | sess.run(init_op) 113 | 114 | for epoch in range(training_epochs): 115 | 116 | avg_cost = 0. 117 | total_batch = int(data.train.num_examples/batch_size) 118 | print "Total of %d minbatches in epoch %d" % (total_batch, epoch) 119 | # Loop over all batches 120 | for i in range(total_batch): 121 | minibatch_x, minibatch_y = data.train.minibatch(batch_size) 122 | # Fit training using batch data 123 | _, new_cost, train_summary = sess.run([train_op, cost, train_loss_summary_op], feed_dict={x: minibatch_x, y: minibatch_y, phase_train: True}) 124 | summary_writer.add_summary(train_summary, sess.run(global_step)) 125 | # Compute average loss 126 | avg_cost += new_cost/total_batch 127 | print "Training cost for batch %d in epoch %d was:" % (i, epoch), new_cost 128 | # Display logs per epoch step 129 | if epoch % display_step == 0: 130 | print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost) 131 | val_x, val_y = data.val.minibatch(data.val.num_examples) 132 | val_accuracy, val_summary, val_loss_summary = sess.run([eval_op, eval_summary_op, val_loss_summary_op], feed_dict={x: val_x, y: val_y, phase_train: False}) 133 | summary_writer.add_summary(val_summary, sess.run(global_step)) 134 | summary_writer.add_summary(val_loss_summary, sess.run(global_step)) 135 | print "Validation Accuracy:", val_accuracy 136 | 137 | saver.save(sess, "imdb_bnlstm_logs/model-checkpoint-" + '%04d' % (epoch+1), global_step=global_step) 138 | 139 | 140 | print "Optimization Finished!" 141 | -------------------------------------------------------------------------------- /first_edition_archive/archive/imdb_lstm.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from lstm import LSTMCell 3 | import read_imdb_data as data 4 | 5 | training_epochs = 1000 6 | batch_size = 32 7 | display_step = 1 8 | 9 | def embedding_layer(input, weight_shape): 10 | weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5) 11 | E = tf.get_variable("E", weight_shape, 12 | initializer=weight_init) 13 | # E_exp = tf.expand_dims(E, 0) 14 | # E_tiled= tf.tile(E_exp, [32, 1, 1]) 15 | # return tf.batch_matmul(input, E_exp) 16 | incoming = tf.cast(input, tf.int32) 17 | embeddings = tf.nn.embedding_lookup(E, incoming) 18 | return embeddings 19 | 20 | def lstm(input, hidden_dim, keep_prob, phase_train): 21 | lstm = tf.nn.rnn_cell.BasicLSTMCell(hidden_dim) 22 | dropout_lstm = tf.nn.rnn_cell.DropoutWrapper(lstm, input_keep_prob=keep_prob, output_keep_prob=keep_prob) 23 | # stacked_lstm = tf.nn.rnn_cell.MultiRNNCell([dropout_lstm] * 2, state_is_tuple=True) 24 | lstm_outputs, state = tf.nn.dynamic_rnn(dropout_lstm, input, dtype=tf.float32) 25 | #return tf.squeeze(tf.slice(lstm_outputs, [0, tf.shape(lstm_outputs)[1]-1, 0], [tf.shape(lstm_outputs)[0], 1, tf.shape(lstm_outputs)[2]])) 26 | return tf.reduce_max(lstm_outputs, reduction_indices=[1]) 27 | 28 | def layer_batch_norm(x, n_out, phase_train): 29 | beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32) 30 | gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32) 31 | 32 | beta = tf.get_variable("beta", [n_out], initializer=beta_init) 33 | gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init) 34 | 35 | batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') 36 | ema = tf.train.ExponentialMovingAverage(decay=0.9) 37 | ema_apply_op = ema.apply([batch_mean, batch_var]) 38 | ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) 39 | def mean_var_with_update(): 40 | with tf.control_dependencies([ema_apply_op]): 41 | return tf.identity(batch_mean), tf.identity(batch_var) 42 | mean, var = tf.cond(phase_train, 43 | mean_var_with_update, 44 | lambda: (ema_mean, ema_var)) 45 | 46 | reshaped_x = tf.reshape(x, [-1, 1, 1, n_out]) 47 | normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var, 48 | beta, gamma, 1e-3, True) 49 | return tf.reshape(normed, [-1, n_out]) 50 | 51 | def layer(input, weight_shape, bias_shape, phase_train): 52 | weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5) 53 | bias_init = tf.constant_initializer(value=0) 54 | W = tf.get_variable("W", weight_shape, 55 | initializer=weight_init) 56 | b = tf.get_variable("b", bias_shape, 57 | initializer=bias_init) 58 | logits = tf.matmul(input, W) + b 59 | return tf.nn.sigmoid(layer_batch_norm(logits, weight_shape[1], phase_train)) 60 | 61 | def inference(input, phase_train): 62 | embedding = embedding_layer(input, [30000, 512]) 63 | lstm_output = lstm(embedding, 512, 0.5, phase_train) 64 | output = layer(lstm_output, [512, 2], [2], phase_train) 65 | return output 66 | 67 | def loss(output, y): 68 | xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y) 69 | loss = tf.reduce_mean(xentropy) 70 | train_loss_summary_op = tf.scalar_summary("train_cost", loss) 71 | val_loss_summary_op = tf.scalar_summary("val_cost", loss) 72 | return loss, train_loss_summary_op, val_loss_summary_op 73 | 74 | def training(cost, global_step): 75 | optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, 76 | use_locking=False, name='Adam') 77 | train_op = optimizer.minimize(cost, global_step=global_step) 78 | return train_op 79 | 80 | def evaluate(output, y): 81 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 82 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 83 | accuracy_summary_op = tf.scalar_summary("accuracy", accuracy) 84 | return accuracy, accuracy_summary_op 85 | 86 | if __name__ == '__main__': 87 | 88 | with tf.Graph().as_default(): 89 | with tf.device('/gpu:0'): 90 | x = tf.placeholder("float", [None, 500]) 91 | y = tf.placeholder("float", [None, 2]) 92 | phase_train = tf.placeholder(tf.bool) 93 | 94 | output = inference(x, phase_train) 95 | 96 | cost, train_loss_summary_op, val_loss_summary_op = loss(output, y) 97 | 98 | global_step = tf.Variable(0, name='global_step', trainable=False) 99 | 100 | train_op = training(cost, global_step) 101 | 102 | eval_op, eval_summary_op = evaluate(output, y) 103 | 104 | saver = tf.train.Saver(max_to_keep=100) 105 | 106 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) 107 | 108 | summary_writer = tf.train.SummaryWriter("imdb_lstm_logs/", 109 | graph=sess.graph) 110 | 111 | init_op = tf.initialize_all_variables() 112 | 113 | sess.run(init_op) 114 | 115 | for epoch in range(training_epochs): 116 | 117 | avg_cost = 0. 118 | total_batch = int(data.train.num_examples/batch_size) 119 | print "Total of %d minbatches in epoch %d" % (total_batch, epoch) 120 | # Loop over all batches 121 | for i in range(total_batch): 122 | minibatch_x, minibatch_y = data.train.minibatch(batch_size) 123 | # Fit training using batch data 124 | _, new_cost, train_summary = sess.run([train_op, cost, train_loss_summary_op], feed_dict={x: minibatch_x, y: minibatch_y, phase_train: True}) 125 | summary_writer.add_summary(train_summary, sess.run(global_step)) 126 | # Compute average loss 127 | avg_cost += new_cost/total_batch 128 | print "Training cost for batch %d in epoch %d was:" % (i, epoch), new_cost 129 | if i % 100 == 0: 130 | print "Epoch:", '%04d' % (epoch+1), "Minibatch:", '%04d' % (i+1), "cost =", "{:.9f}".format((avg_cost * total_batch)/(i+1)) 131 | val_x, val_y = data.val.minibatch(data.val.num_examples) 132 | val_accuracy, val_summary, val_loss_summary = sess.run([eval_op, eval_summary_op, val_loss_summary_op], feed_dict={x: val_x, y: val_y, phase_train: False}) 133 | summary_writer.add_summary(val_summary, sess.run(global_step)) 134 | summary_writer.add_summary(val_loss_summary, sess.run(global_step)) 135 | print "Validation Accuracy:", val_accuracy 136 | 137 | saver.save(sess, "imdb_lstm_logs/model-checkpoint-" + '%04d' % (epoch+1), global_step=global_step) 138 | # Display logs per epoch step 139 | # if epoch % display_step == 0: 140 | # print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost) 141 | # val_x, val_y = data.val.minibatch(data.val.num_examples) 142 | # val_accuracy, val_summary, val_loss_summary = sess.run([eval_op, eval_summary_op, val_loss_summary_op], feed_dict={x: val_x, y: val_y, phase_train: False}) 143 | # summary_writer.add_summary(val_summary, sess.run(global_step)) 144 | # summary_writer.add_summary(val_loss_summary, sess.run(global_step)) 145 | # print "Validation Accuracy:", val_accuracy 146 | # 147 | # saver.save(sess, "imdb_lstm_logs/model-checkpoint-" + '%04d' % (epoch+1), global_step=global_step) 148 | 149 | 150 | print "Optimization Finished!" 151 | -------------------------------------------------------------------------------- /first_edition_archive/archive/input_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Functions for downloading and reading MNIST data.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | import gzip 20 | import os 21 | import tensorflow.python.platform 22 | import numpy 23 | from six.moves import urllib 24 | from six.moves import xrange # pylint: disable=redefined-builtin 25 | import tensorflow as tf 26 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' 27 | def maybe_download(filename, work_directory): 28 | """Download the data from Yann's website, unless it's already here.""" 29 | if not os.path.exists(work_directory): 30 | os.mkdir(work_directory) 31 | filepath = os.path.join(work_directory, filename) 32 | if not os.path.exists(filepath): 33 | filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath) 34 | statinfo = os.stat(filepath) 35 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 36 | return filepath 37 | def _read32(bytestream): 38 | dt = numpy.dtype(numpy.uint32).newbyteorder('>') 39 | return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] 40 | def extract_images(filename): 41 | """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" 42 | print('Extracting', filename) 43 | with gzip.open(filename) as bytestream: 44 | magic = _read32(bytestream) 45 | if magic != 2051: 46 | raise ValueError( 47 | 'Invalid magic number %d in MNIST image file: %s' % 48 | (magic, filename)) 49 | num_images = _read32(bytestream) 50 | rows = _read32(bytestream) 51 | cols = _read32(bytestream) 52 | buf = bytestream.read(rows * cols * num_images) 53 | data = numpy.frombuffer(buf, dtype=numpy.uint8) 54 | data = data.reshape(num_images, rows, cols, 1) 55 | return data 56 | def dense_to_one_hot(labels_dense, num_classes=10): 57 | """Convert class labels from scalars to one-hot vectors.""" 58 | num_labels = labels_dense.shape[0] 59 | index_offset = numpy.arange(num_labels) * num_classes 60 | labels_one_hot = numpy.zeros((num_labels, num_classes)) 61 | labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 62 | return labels_one_hot 63 | def extract_labels(filename, one_hot=False): 64 | """Extract the labels into a 1D uint8 numpy array [index].""" 65 | print('Extracting', filename) 66 | with gzip.open(filename) as bytestream: 67 | magic = _read32(bytestream) 68 | if magic != 2049: 69 | raise ValueError( 70 | 'Invalid magic number %d in MNIST label file: %s' % 71 | (magic, filename)) 72 | num_items = _read32(bytestream) 73 | buf = bytestream.read(num_items) 74 | labels = numpy.frombuffer(buf, dtype=numpy.uint8) 75 | if one_hot: 76 | return dense_to_one_hot(labels) 77 | return labels 78 | class DataSet(object): 79 | def __init__(self, images, labels, fake_data=False, one_hot=False, 80 | dtype=tf.float32): 81 | """Construct a DataSet. 82 | one_hot arg is used only if fake_data is true. `dtype` can be either 83 | `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into 84 | `[0, 1]`. 85 | """ 86 | dtype = tf.as_dtype(dtype).base_dtype 87 | if dtype not in (tf.uint8, tf.float32): 88 | raise TypeError('Invalid image dtype %r, expected uint8 or float32' % 89 | dtype) 90 | if fake_data: 91 | self._num_examples = 10000 92 | self.one_hot = one_hot 93 | else: 94 | assert images.shape[0] == labels.shape[0], ( 95 | 'images.shape: %s labels.shape: %s' % (images.shape, 96 | labels.shape)) 97 | self._num_examples = images.shape[0] 98 | # Convert shape from [num examples, rows, columns, depth] 99 | # to [num examples, rows*columns] (assuming depth == 1) 100 | assert images.shape[3] == 1 101 | images = images.reshape(images.shape[0], 102 | images.shape[1] * images.shape[2]) 103 | if dtype == tf.float32: 104 | # Convert from [0, 255] -> [0.0, 1.0]. 105 | images = images.astype(numpy.float32) 106 | images = numpy.multiply(images, 1.0 / 255.0) 107 | self._images = images 108 | self._labels = labels 109 | self._epochs_completed = 0 110 | self._index_in_epoch = 0 111 | @property 112 | def images(self): 113 | return self._images 114 | @property 115 | def labels(self): 116 | return self._labels 117 | @property 118 | def num_examples(self): 119 | return self._num_examples 120 | @property 121 | def epochs_completed(self): 122 | return self._epochs_completed 123 | def next_batch(self, batch_size, fake_data=False): 124 | """Return the next `batch_size` examples from this data set.""" 125 | if fake_data: 126 | fake_image = [1] * 784 127 | if self.one_hot: 128 | fake_label = [1] + [0] * 9 129 | else: 130 | fake_label = 0 131 | return [fake_image for _ in xrange(batch_size)], [ 132 | fake_label for _ in xrange(batch_size)] 133 | start = self._index_in_epoch 134 | self._index_in_epoch += batch_size 135 | if self._index_in_epoch > self._num_examples: 136 | # Finished epoch 137 | self._epochs_completed += 1 138 | # Shuffle the data 139 | perm = numpy.arange(self._num_examples) 140 | numpy.random.shuffle(perm) 141 | self._images = self._images[perm] 142 | self._labels = self._labels[perm] 143 | # Start next epoch 144 | start = 0 145 | self._index_in_epoch = batch_size 146 | assert batch_size <= self._num_examples 147 | end = self._index_in_epoch 148 | return self._images[start:end], self._labels[start:end] 149 | def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32): 150 | class DataSets(object): 151 | pass 152 | data_sets = DataSets() 153 | if fake_data: 154 | def fake(): 155 | return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) 156 | data_sets.train = fake() 157 | data_sets.validation = fake() 158 | data_sets.test = fake() 159 | return data_sets 160 | TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' 161 | TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' 162 | TEST_IMAGES = 't10k-images-idx3-ubyte.gz' 163 | TEST_LABELS = 't10k-labels-idx1-ubyte.gz' 164 | VALIDATION_SIZE = 5000 165 | local_file = maybe_download(TRAIN_IMAGES, train_dir) 166 | train_images = extract_images(local_file) 167 | local_file = maybe_download(TRAIN_LABELS, train_dir) 168 | train_labels = extract_labels(local_file, one_hot=one_hot) 169 | local_file = maybe_download(TEST_IMAGES, train_dir) 170 | test_images = extract_images(local_file) 171 | local_file = maybe_download(TEST_LABELS, train_dir) 172 | test_labels = extract_labels(local_file, one_hot=one_hot) 173 | validation_images = train_images[:VALIDATION_SIZE] 174 | validation_labels = train_labels[:VALIDATION_SIZE] 175 | train_images = train_images[VALIDATION_SIZE:] 176 | train_labels = train_labels[VALIDATION_SIZE:] 177 | data_sets.train = DataSet(train_images, train_labels, dtype=dtype) 178 | data_sets.validation = DataSet(validation_images, validation_labels, 179 | dtype=dtype) 180 | data_sets.test = DataSet(test_images, test_labels, dtype=dtype) 181 | return data_sets -------------------------------------------------------------------------------- /first_edition_archive/archive/input_word_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import collections 21 | import math 22 | import os 23 | import random 24 | import zipfile 25 | 26 | import numpy as np 27 | from six.moves import urllib 28 | from six.moves import xrange # pylint: disable=redefined-builtin 29 | import tensorflow as tf 30 | 31 | # Step 1: Download the data. 32 | url = 'http://mattmahoney.net/dc/' 33 | 34 | def maybe_download(filename, expected_bytes): 35 | """Download a file if not present, and make sure it's the right size.""" 36 | if not os.path.exists(filename): 37 | filename, _ = urllib.request.urlretrieve(url + filename, filename) 38 | statinfo = os.stat(filename) 39 | if statinfo.st_size == expected_bytes: 40 | print('Found and verified', filename) 41 | else: 42 | print(statinfo.st_size) 43 | raise Exception( 44 | 'Failed to verify ' + filename + '. Can you get to it with a browser?') 45 | return filename 46 | 47 | filename = maybe_download('text8.zip', 31344016) 48 | 49 | 50 | # Read the data into a list of strings. 51 | def read_data(filename): 52 | """Extract the first file enclosed in a zip file as a list of words""" 53 | with zipfile.ZipFile(filename) as f: 54 | data = tf.compat.as_str(f.read(f.namelist()[0])).split() 55 | return data 56 | 57 | words = read_data(filename) 58 | data_size = len(words) 59 | print('Data size', data_size) 60 | 61 | # Step 2: Build the dictionary and replace rare words with UNK token. 62 | vocabulary_size = 10000 63 | 64 | def build_dataset(words): 65 | count = [['UNK', -1]] 66 | count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) 67 | dictionary = dict() 68 | for word, _ in count: 69 | dictionary[word] = len(dictionary) 70 | data = list() 71 | unk_count = 0 72 | for word in words: 73 | if word in dictionary: 74 | index = dictionary[word] 75 | else: 76 | index = 0 # dictionary['UNK'] 77 | unk_count += 1 78 | data.append(index) 79 | count[0][1] = unk_count 80 | reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) 81 | return data, count, dictionary, reverse_dictionary 82 | 83 | data, count, dictionary, reverse_dictionary = build_dataset(words) 84 | del words # Hint to reduce memory. 85 | print('Most common words (+UNK)', count[:5]) 86 | print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]]) 87 | 88 | data_index = 0 89 | 90 | 91 | # Step 3: Function to generate a training batch for the skip-gram model. 92 | def generate_batch(batch_size, num_skips, skip_window): 93 | global data_index 94 | assert batch_size % num_skips == 0 95 | assert num_skips <= 2 * skip_window 96 | batch = np.ndarray(shape=(batch_size), dtype=np.int32) 97 | labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) 98 | span = 2 * skip_window + 1 # [ skip_window target skip_window ] 99 | buffer = collections.deque(maxlen=span) 100 | for _ in range(span): 101 | buffer.append(data[data_index]) 102 | data_index = (data_index + 1) % len(data) 103 | for i in range(batch_size // num_skips): 104 | target = skip_window # target label at the center of the buffer 105 | targets_to_avoid = [ skip_window ] 106 | for j in range(num_skips): 107 | while target in targets_to_avoid: 108 | target = random.randint(0, span - 1) 109 | targets_to_avoid.append(target) 110 | batch[i * num_skips + j] = buffer[skip_window] 111 | labels[i * num_skips + j, 0] = buffer[target] 112 | buffer.append(data[data_index]) 113 | data_index = (data_index + 1) % len(data) 114 | return batch, labels 115 | 116 | batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) 117 | for i in range(8): 118 | print(batch[i], reverse_dictionary[batch[i]], 119 | '->', labels[i, 0], reverse_dictionary[labels[i, 0]]) 120 | 121 | def plot_with_labels(low_dim_embs, labels, filename='tsne.png'): 122 | assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" 123 | plt.figure(figsize=(18, 18)) #in inches 124 | for i, label in enumerate(labels): 125 | x, y = low_dim_embs[i,:] 126 | plt.scatter(x, y) 127 | plt.annotate(label, 128 | xy=(x, y), 129 | xytext=(5, 2), 130 | textcoords='offset points', 131 | ha='right', 132 | va='bottom') 133 | 134 | plt.savefig(filename) 135 | -------------------------------------------------------------------------------- /first_edition_archive/archive/linear_interpolation.py: -------------------------------------------------------------------------------- 1 | from fdl_examples.datatools import input_data 2 | mnist = input_data.read_data_sets("data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from fdl_examples.chapter3.multilayer_perceptron_updated import inference, loss 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | sess = tf.Session() 11 | 12 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 13 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 14 | 15 | with tf.variable_scope("mlp_model") as scope: 16 | 17 | output_opt = inference(x) 18 | cost_opt = loss(output_opt, y) 19 | 20 | saver = tf.train.Saver() 21 | 22 | scope.reuse_variables() 23 | 24 | var_list_opt = ["hidden_1/W", "hidden_1/b", "hidden_2/W", "hidden_2/b", "output/W", "output/b"] 25 | var_list_opt = [tf.get_variable(v) for v in var_list_opt] 26 | 27 | saver.restore(sess, "model-checkpoint-547800") 28 | 29 | 30 | with tf.variable_scope("mlp_init") as scope: 31 | 32 | output_rand = inference(x) 33 | cost_rand = loss(output_rand, y) 34 | 35 | scope.reuse_variables() 36 | 37 | var_list_rand = ["hidden_1/W", "hidden_1/b", "hidden_2/W", "hidden_2/b", "output/W", "output/b"] 38 | var_list_rand = [tf.get_variable(v) for v in var_list_rand] 39 | 40 | init_op = tf.initialize_variables(var_list_rand) 41 | 42 | sess.run(init_op) 43 | 44 | 45 | feed_dict = { 46 | x: mnist.test.images, 47 | y: mnist.test.labels, 48 | } 49 | 50 | print(sess.run([cost_opt, cost_rand], feed_dict=feed_dict)) 51 | 52 | with tf.variable_scope("mlp_inter") as scope: 53 | 54 | alpha = tf.placeholder("float", [1, 1]) 55 | 56 | h1_W_inter = var_list_opt[0] * (1 - alpha) + var_list_rand[0] * (alpha) 57 | h1_b_inter = var_list_opt[1] * (1 - alpha) + var_list_rand[1] * (alpha) 58 | h2_W_inter = var_list_opt[2] * (1 - alpha) + var_list_rand[2] * (alpha) 59 | h2_b_inter = var_list_opt[3] * (1 - alpha) + var_list_rand[3] * (alpha) 60 | o_W_inter = var_list_opt[4] * (1 - alpha) + var_list_rand[4] * (alpha) 61 | o_b_inter = var_list_opt[5] * (1 - alpha) + var_list_rand[5] * (alpha) 62 | 63 | h1_inter = tf.nn.relu(tf.matmul(x, h1_W_inter) + h1_b_inter) 64 | h2_inter = tf.nn.relu(tf.matmul(h1_inter, h2_W_inter) + h2_b_inter) 65 | o_inter = tf.nn.relu(tf.matmul(h2_inter, o_W_inter) + o_b_inter) 66 | 67 | cost_inter = loss(o_inter, y) 68 | tf.scalar_summary("interpolated_cost", cost_inter) 69 | 70 | 71 | summary_writer = tf.train.SummaryWriter("linear_interp_logs/", 72 | graph_def=sess.graph_def) 73 | summary_op = tf.merge_all_summaries() 74 | results = [] 75 | for a in np.arange(-2, 2, 0.01): 76 | feed_dict = { 77 | x: mnist.test.images, 78 | y: mnist.test.labels, 79 | alpha: [[a]], 80 | } 81 | 82 | cost, summary_str = sess.run([cost_inter, summary_op], feed_dict=feed_dict) 83 | summary_writer.add_summary(summary_str, (a + 2)/0.01) 84 | results.append(cost) 85 | 86 | plt.plot(np.arange(-2, 2, 0.01), results, 'ro') 87 | plt.ylabel('Incurred Error') 88 | plt.xlabel('Alpha') 89 | plt.show() 90 | 91 | 92 | -------------------------------------------------------------------------------- /first_edition_archive/archive/logistic_network-[THEANO]/logistic_network.py: -------------------------------------------------------------------------------- 1 | """ 2 | We will use this class to represent a simple logistic regression 3 | classifier. We'll represent this in Theano as a neural network 4 | with no hidden layers. This is our first attempt at building a 5 | neural network model to solve interesting problems. Here, we'll 6 | use this class to crack the MNIST handwritten digit dataset problem, 7 | but this class has been constructed so that it can be reappropriated 8 | to any use! 9 | 10 | References: 11 | - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2 12 | - websites: http://deeplearning.net/tutorial, Lisa Lab 13 | """ 14 | 15 | import numpy as np 16 | import theano.tensor as T 17 | import theano 18 | 19 | class LogisticNetwork(object): 20 | """ 21 | The logistic regression class is described by two parameters (which 22 | we will want to learn). The first is a weight matrix. We'll refer to 23 | this weight matrix as W. The second is a bias vector b. Refer to the 24 | text if you want to learn more about how this network works. Let's get 25 | started! 26 | """ 27 | 28 | def __init__(self, input, input_dim, output_dim): 29 | """ 30 | We first initialize the logistic network object with some important 31 | information. 32 | 33 | PARAM input : theano.tensor.TensorType 34 | A symbolic variable that we'll use to represent one minibatch of our 35 | dataset 36 | 37 | PARAM input_dim : int 38 | This will represent the number of input neurons in our model 39 | 40 | PARAM ouptut_dim : int 41 | This will represent the number of neurons in the output layer (i.e. 42 | the number of possible classifications for the input) 43 | """ 44 | 45 | # We initialize the weight matrix W of size (input_dim, output_dim) 46 | self.W = theano.shared( 47 | value=np.zeros((input_dim, output_dim)), 48 | name='W', 49 | borrow=True 50 | ) 51 | 52 | # We initialize a bias vector for the neurons of the output layer 53 | self.b = theano.shared( 54 | value=np.zeros(output_dim), 55 | name='b', 56 | borrow='True' 57 | ) 58 | 59 | # Symbolic description of how to compute class membership probabilities 60 | self.output = T.nnet.softmax(T.dot(input, self.W) + self.b) 61 | 62 | # Symbolic description of the final prediction 63 | self.predicted = T.argmax(self.output, axis=1) 64 | 65 | def logistic_network_cost(self, y, lambda_l2=0): 66 | """ 67 | Here we express the cost incurred by an example given the correct 68 | distribution 69 | 70 | PARAM y : theano.tensor.TensorType 71 | These are the correct answers, and we compute the cost with 72 | respect to this ground truth (over the entire minibatch). This 73 | means that y is of size (minibatch_size, output_dim) 74 | 75 | PARAM lambda : float 76 | This is the L2 regularization parameter that we use to penalize large 77 | values for components of W, thus discouraging potential overfitting 78 | """ 79 | # Calculate the log probabilities of the softmax output 80 | log_probabilities = T.log(self.output) 81 | 82 | # We use these log probabilities to compute the negative log likelihood 83 | negative_log_likelihood = -T.mean(log_probabilities[T.arange(y.shape[0]), y]) 84 | 85 | # Compute the L2 regularization component of the cost function 86 | l2_regularization = lambda_l2 * (self.W ** 2).sum() 87 | 88 | # Return a symbolic description of the cost function 89 | return negative_log_likelihood + l2_regularization 90 | 91 | def error_rate(self, y): 92 | """ 93 | Here we return the error rate of the model over a set of given labels 94 | (perhaps in a minibatch, in the validation set, or the test set) 95 | 96 | PARAM y : theano.tensor.TensorType 97 | These are the correct answers, and we compute the cost with 98 | respect to this ground truth (over the entire minibatch). This 99 | means that y is of size (minibatch_size, output_dim) 100 | """ 101 | 102 | # Make sure y is of the correct dimension 103 | assert y.ndim == self.predicted.ndim 104 | 105 | # Make sure that y contains values of the correct data type (ints) 106 | assert y.dtype.startswith('int') 107 | 108 | # Return the error rate on the data 109 | return T.mean(T.neq(self.predicted, y)) 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /first_edition_archive/archive/logistic_network-[THEANO]/mnist_logistic_sgd.py: -------------------------------------------------------------------------------- 1 | """ 2 | We'll now use the LogisticNetwork object we built in logistic_network.py in 3 | order to tackle the MNIST dataset challenge. We will use minibatch gradient 4 | descent to train this simplistic network model. 5 | 6 | References: 7 | - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2 8 | - websites: http://deeplearning.net/tutorial, Lisa Lab 9 | """ 10 | 11 | __docformat__ = 'restructedtext en' 12 | 13 | import cPickle 14 | import gzip 15 | import os 16 | import time 17 | import urllib 18 | from theano import function, shared, config 19 | import theano.tensor as T 20 | import numpy as np 21 | import logistic_network 22 | 23 | 24 | # Let's start off by defining some constants 25 | # EXPERIMENT!!! Play around the the learning rate! 26 | LEARNING_RATE = 0.2 27 | N_EPOCHS = 1000 28 | DATASET = 'mnist.pkl.gz' 29 | BATCH_SIZE = 600 30 | 31 | # Time to check if we have the data and if we don't, let's download it 32 | print "... LOADING DATA ..." 33 | 34 | data_path = os.path.join( 35 | os.path.split(__file__)[0], 36 | "..", 37 | "data", 38 | DATASET 39 | ) 40 | 41 | if (not os.path.isfile(data_path)): 42 | import urllib 43 | origin = ( 44 | 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' 45 | ) 46 | print 'Downloading data from %s' % origin 47 | urllib.urlretrieve(origin, data_path) 48 | 49 | # Time to build our models 50 | print "... BUILDING MODEL ..." 51 | 52 | # Load the dataset 53 | data_file = gzip.open(data_path, 'rb') 54 | training_set, validation_set, test_set = cPickle.load(data_file) 55 | data_file.close() 56 | 57 | # Define a quick function to established a shared dataset (for efficiency) 58 | 59 | def shared_dataset(data_xy): 60 | """ 61 | We store the data in a shared variable because it allows Theano to copy it 62 | into GPU memory (if GPU utilization is enabled). By default, if a variable is 63 | not shared, it is moved to GPU at every use. This results in a big performance 64 | hit because that means the data will be copied one minibatch at a time. Instead, 65 | if we use shared variables, we don't have to worry about copying data 66 | repeatedly. 67 | """ 68 | 69 | data_x, data_y = data_xy 70 | shared_x = shared(np.asarray(data_x, dtype=config.floatX), borrow=True) 71 | shared_y = shared(np.asarray(data_y, dtype='int32'), borrow=True) 72 | return shared_x, shared_y 73 | 74 | # We now instantiate the shared datasets 75 | training_set_x , training_set_y = shared_dataset(training_set) 76 | validation_set_x, validation_set_y = shared_dataset(validation_set) 77 | test_set_x, test_set_y = shared_dataset(test_set) 78 | 79 | # Lets compute the number of minibatches for training, validation, and testing 80 | n_training_batches = training_set_x.get_value(borrow=True).shape[0] / BATCH_SIZE 81 | n_validation_batches = validation_set_x.get_value(borrow=True).shape[0] / BATCH_SIZE 82 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] / BATCH_SIZE 83 | 84 | # Now it's time for us to build the model! 85 | #Let's start of with an index to the minibatch we're using 86 | index = T.lscalar() 87 | 88 | # Generate symbolic variables for the input (a minibatch) 89 | x = T.dmatrix('x') 90 | y = T.ivector('y') 91 | 92 | # Construct the logistic network model 93 | # Keep in mind MNIST image is of size (28, 28) 94 | # Also number of output class is is 10 (digits 0, 1, ..., 9) 95 | model = logistic_network.LogisticNetwork(input=x, input_dim=28*28, output_dim=10) 96 | 97 | # Obtain a symbolic expression for the objective function 98 | # EXPERIMENT!!! Play around with L2 regression parameter! 99 | objective = model.logistic_network_cost(y, lambda_l2=0.0001) 100 | 101 | # Obtain a symbolic expression for the error incurred 102 | error = model.error_rate(y) 103 | 104 | # Compute symbolic gradients of objective with respect to model parameters 105 | dW, db = T.grad(objective, model.W), T.grad(objective, model.b) 106 | 107 | # Compile theano function for training with a minibatch 108 | train_model = function( 109 | inputs=[index], 110 | outputs=objective, 111 | updates=[ 112 | (model.W, model.W - LEARNING_RATE * dW), 113 | (model.b, model.b - LEARNING_RATE * db) 114 | ], 115 | givens={ 116 | x : training_set_x[index * BATCH_SIZE : (index + 1) * BATCH_SIZE], 117 | y : training_set_y[index * BATCH_SIZE : (index + 1) * BATCH_SIZE] 118 | } 119 | ) 120 | 121 | # Compile theano functions for validation and testing 122 | validate_model = function( 123 | inputs=[index], 124 | outputs=error, 125 | givens={ 126 | x : validation_set_x[index * BATCH_SIZE : (index + 1) * BATCH_SIZE], 127 | y : validation_set_y[index * BATCH_SIZE : (index + 1) * BATCH_SIZE] 128 | } 129 | ) 130 | 131 | test_model = function( 132 | inputs=[index], 133 | outputs=error, 134 | givens={ 135 | x : test_set_x[index * BATCH_SIZE : (index + 1) * BATCH_SIZE], 136 | y : test_set_y[index * BATCH_SIZE : (index + 1) * BATCH_SIZE] 137 | } 138 | ) 139 | 140 | 141 | # Let's set up the early stopping parameters (based on the validation set) 142 | 143 | # Must look at this many examples no matter what 144 | patience = 5000 145 | 146 | # Wait this much longer if a new best is found 147 | patience_increase = 2 148 | 149 | # This is when an improvement is significant 150 | improvement_threshold = 0.995 151 | 152 | # We go through this number of minbatches before we check on the validation set 153 | validation_freq = min(n_training_batches, patience / 2) 154 | 155 | # We keep of the best loss on the validation set here 156 | best_loss = np.inf 157 | 158 | # We also keep track of the epoch we are in 159 | epoch = 0 160 | 161 | # A boolean flag that propagates when patience has been exceeded 162 | exceeded_patience = False 163 | 164 | # Now we're ready to start training the model 165 | print "... TRAINING MODEL ..." 166 | start_time = time.clock() 167 | while (epoch < N_EPOCHS) and not exceeded_patience: 168 | epoch = epoch + 1 169 | for minibatch_index in xrange(n_training_batches): 170 | minibatch_objective = train_model(minibatch_index) 171 | iteration = (epoch - 1) * n_training_batches + minibatch_index 172 | 173 | if (iteration + 1) % validation_freq == 0: 174 | # Compute loss on validation set 175 | validation_losses = [validate_model(i) for i in xrange(n_validation_batches)] 176 | validation_loss = np.mean(validation_losses) 177 | 178 | print 'epoch %i, minibatch %i/%i, validation error: %f %%' % ( 179 | epoch, 180 | minibatch_index + 1, 181 | n_training_batches, 182 | validation_loss * 100 183 | ) 184 | 185 | if validation_loss < best_loss: 186 | if validation_loss < best_loss * improvement_threshold: 187 | patience = max(patience, iteration * patience_increase) 188 | best_loss = validation_loss 189 | 190 | if patience <= iteration: 191 | exceeded_patience = True 192 | break 193 | end_time = time.clock() 194 | 195 | # Let's compute how well we do on the test set 196 | test_losses = [test_model(i) for i in xrange(n_test_batches)] 197 | test_loss = np.mean(test_losses) 198 | 199 | # Print out the results! 200 | print '\n' 201 | print 'Optimization complete with best validation score of %f %%' % (best_loss * 100) 202 | print 'And with a test score of %f %%' % (test_loss * 100) 203 | print '\n' 204 | print 'The code ran for %d epochs and for a total time of %.1f seconds' % (epoch, end_time - start_time) 205 | print '\n' 206 | 207 | -------------------------------------------------------------------------------- /first_edition_archive/archive/logistic_regression.py: -------------------------------------------------------------------------------- 1 | import input_data 2 | mnist = input_data.read_data_sets("data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import time 6 | 7 | 8 | # Parameters 9 | learning_rate = 0.01 10 | training_epochs = 60 11 | batch_size = 100 12 | display_step = 1 13 | 14 | def inference(x): 15 | init = tf.constant_initializer(value=0) 16 | W = tf.get_variable("W", [784, 10], 17 | initializer=init) 18 | b = tf.get_variable("b", [10], 19 | initializer=init) 20 | output = tf.nn.softmax(tf.matmul(x, W) + b) 21 | 22 | w_hist = tf.histogram_summary("weights", W) 23 | b_hist = tf.histogram_summary("biases", b) 24 | y_hist = tf.histogram_summary("output", output) 25 | 26 | return output 27 | 28 | def loss(output, y): 29 | dot_product = y * tf.log(output) 30 | 31 | # Reduction along axis 0 collapses each column into a single 32 | # value, whereas reduction along axis 1 collapses each row 33 | # into a single value. In general, reduction along axis i 34 | # collapses the ith dimension of a tensor to size 1. 35 | xentropy = -tf.reduce_sum(dot_product, reduction_indices=1) 36 | 37 | loss = tf.reduce_mean(xentropy) 38 | 39 | return loss 40 | 41 | def training(cost, global_step): 42 | 43 | tf.scalar_summary("cost", cost) 44 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 45 | train_op = optimizer.minimize(cost, global_step=global_step) 46 | 47 | return train_op 48 | 49 | 50 | def evaluate(output, y): 51 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 52 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 53 | 54 | tf.scalar_summary("validation error", (1.0 - accuracy)) 55 | 56 | return accuracy 57 | 58 | if __name__ == '__main__': 59 | 60 | with tf.Graph().as_default(): 61 | 62 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 63 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 64 | 65 | 66 | output = inference(x) 67 | 68 | cost = loss(output, y) 69 | 70 | global_step = tf.Variable(0, name='global_step', trainable=False) 71 | 72 | train_op = training(cost, global_step) 73 | 74 | eval_op = evaluate(output, y) 75 | 76 | summary_op = tf.merge_all_summaries() 77 | 78 | saver = tf.train.Saver() 79 | 80 | sess = tf.Session() 81 | 82 | summary_writer = tf.train.SummaryWriter("logistic_logs/", 83 | graph_def=sess.graph_def) 84 | 85 | 86 | init_op = tf.initialize_all_variables() 87 | 88 | sess.run(init_op) 89 | 90 | 91 | # Training cycle 92 | for epoch in range(training_epochs): 93 | 94 | avg_cost = 0. 95 | total_batch = int(mnist.train.num_examples/batch_size) 96 | # Loop over all batches 97 | for i in range(total_batch): 98 | minibatch_x, minibatch_y = mnist.train.next_batch(batch_size) 99 | # Fit training using batch data 100 | sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y}) 101 | # Compute average loss 102 | avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch 103 | # Display logs per epoch step 104 | if epoch % display_step == 0: 105 | print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)) 106 | 107 | accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels}) 108 | 109 | print("Validation Error:", (1 - accuracy)) 110 | 111 | summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y}) 112 | summary_writer.add_summary(summary_str, sess.run(global_step)) 113 | 114 | saver.save(sess, "logistic_logs/model-checkpoint", global_step=global_step) 115 | 116 | 117 | print("Optimization Finished!") 118 | 119 | 120 | accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels}) 121 | 122 | print("Test Accuracy:", accuracy) 123 | -------------------------------------------------------------------------------- /first_edition_archive/archive/lstm.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import tensorflow as tf 4 | from tensorflow.python.ops.rnn_cell import RNNCell 5 | 6 | class LSTMCell(RNNCell): 7 | '''Vanilla LSTM implemented with same initializations as BN-LSTM''' 8 | def __init__(self, num_units): 9 | self.num_units = num_units 10 | 11 | @property 12 | def state_size(self): 13 | return (self.num_units, self.num_units) 14 | 15 | @property 16 | def output_size(self): 17 | return self.num_units 18 | 19 | def __call__(self, x, state, scope=None): 20 | with tf.variable_scope(scope or type(self).__name__): 21 | c, h = state 22 | 23 | # Keep W_xh and W_hh separate here as well to reuse initialization methods 24 | x_size = x.get_shape().as_list()[1] 25 | print x.get_shape().as_list() 26 | W_xh = tf.get_variable('W_xh', 27 | [x_size, 4 * self.num_units], 28 | initializer=orthogonal_initializer()) 29 | W_hh = tf.get_variable('W_hh', 30 | [self.num_units, 4 * self.num_units], 31 | initializer=bn_lstm_identity_initializer(0.95)) 32 | bias = tf.get_variable('bias', [4 * self.num_units]) 33 | 34 | # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias 35 | # improve speed by concat. 36 | concat = tf.concat(1, [x, h]) 37 | W_both = tf.concat(0, [W_xh, W_hh]) 38 | hidden = tf.matmul(concat, W_both) + bias 39 | 40 | i, j, f, o = tf.split(1, 4, hidden) 41 | 42 | new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) 43 | new_h = tf.tanh(new_c) * tf.sigmoid(o) 44 | 45 | return new_h, (new_c, new_h) 46 | 47 | class BNLSTMCell(RNNCell): 48 | '''Batch normalized LSTM as described in arxiv.org/abs/1603.09025''' 49 | def __init__(self, num_units, training): 50 | self.num_units = num_units 51 | self.training = training 52 | 53 | @property 54 | def state_size(self): 55 | return (self.num_units, self.num_units) 56 | 57 | @property 58 | def output_size(self): 59 | return self.num_units 60 | 61 | def __call__(self, x, state, scope=None): 62 | with tf.variable_scope(scope or type(self).__name__): 63 | c, h = state 64 | 65 | x_size = x.get_shape().as_list()[1] 66 | W_xh = tf.get_variable('W_xh', 67 | [x_size, 4 * self.num_units], 68 | initializer=orthogonal_initializer()) 69 | W_hh = tf.get_variable('W_hh', 70 | [self.num_units, 4 * self.num_units], 71 | initializer=bn_lstm_identity_initializer(0.95)) 72 | bias = tf.get_variable('bias', [4 * self.num_units]) 73 | 74 | xh = tf.matmul(x, W_xh) 75 | hh = tf.matmul(h, W_hh) 76 | 77 | bn_xh = batch_norm(xh, 'xh', self.training) 78 | bn_hh = batch_norm(hh, 'hh', self.training) 79 | 80 | hidden = bn_xh + bn_hh + bias 81 | 82 | i, j, f, o = tf.split(1, 4, hidden) 83 | 84 | new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) 85 | bn_new_c = batch_norm(new_c, 'c', self.training) 86 | 87 | new_h = tf.tanh(bn_new_c) * tf.sigmoid(o) 88 | 89 | return new_h, (new_c, new_h) 90 | 91 | def orthogonal(shape): 92 | flat_shape = (shape[0], np.prod(shape[1:])) 93 | a = np.random.normal(0.0, 1.0, flat_shape) 94 | u, _, v = np.linalg.svd(a, full_matrices=False) 95 | q = u if u.shape == flat_shape else v 96 | return q.reshape(shape) 97 | 98 | def bn_lstm_identity_initializer(scale): 99 | def _initializer(shape, dtype=tf.float32, partition_info=None): 100 | '''Ugly cause LSTM params calculated in one matrix multiply''' 101 | size = shape[0] 102 | # gate (j) is identity 103 | t = np.zeros(shape) 104 | t[:, size:size * 2] = np.identity(size) * scale 105 | t[:, :size] = orthogonal([size, size]) 106 | t[:, size * 2:size * 3] = orthogonal([size, size]) 107 | t[:, size * 3:] = orthogonal([size, size]) 108 | return tf.constant(t, dtype) 109 | 110 | return _initializer 111 | 112 | def orthogonal_initializer(): 113 | def _initializer(shape, dtype=tf.float32, partition_info=None): 114 | return tf.constant(orthogonal(shape), dtype) 115 | return _initializer 116 | 117 | def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999): 118 | '''Assume 2d [batch, values] tensor''' 119 | 120 | with tf.variable_scope(name_scope): 121 | size = x.get_shape().as_list()[1] 122 | 123 | scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1)) 124 | offset = tf.get_variable('offset', [size]) 125 | 126 | pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer, trainable=False) 127 | pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer, trainable=False) 128 | batch_mean, batch_var = tf.nn.moments(x, [0]) 129 | 130 | train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) 131 | train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) 132 | 133 | def batch_statistics(): 134 | with tf.control_dependencies([train_mean_op, train_var_op]): 135 | return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon) 136 | 137 | def population_statistics(): 138 | return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon) 139 | 140 | return tf.cond(training, batch_statistics, population_statistics) 141 | -------------------------------------------------------------------------------- /first_edition_archive/archive/multilayer_perceptron.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "'''\n", 12 | "A Multilayer Perceptron implementation example using TensorFlow library.\n", 13 | "This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/)\n", 14 | "\n", 15 | "Author: Aymeric Damien\n", 16 | "Project: https://github.com/aymericdamien/TensorFlow-Examples/\n", 17 | "'''\n", 18 | "\n", 19 | "# Import MINST data\n", 20 | "import input_data\n", 21 | "mnist = input_data.read_data_sets(\"/tmp/data/\", one_hot=True)\n", 22 | "\n", 23 | "import tensorflow as tf\n", 24 | "\n", 25 | "# Parameters\n", 26 | "learning_rate = 0.001\n", 27 | "training_epochs = 15\n", 28 | "batch_size = 100\n", 29 | "display_step = 1\n", 30 | "\n", 31 | "# Network Parameters\n", 32 | "n_hidden_1 = 256 # 1st layer num features\n", 33 | "n_hidden_2 = 256 # 2nd layer num features\n", 34 | "n_input = 784 # MNIST data input (img shape: 28*28)\n", 35 | "n_classes = 10 # MNIST total classes (0-9 digits)\n", 36 | "\n", 37 | "# tf Graph input\n", 38 | "x = tf.placeholder(\"float\", [None, n_input])\n", 39 | "y = tf.placeholder(\"float\", [None, n_classes])\n", 40 | "\n", 41 | "# Create model\n", 42 | "def multilayer_perceptron(_X, _weights, _biases):\n", 43 | " layer_1 = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])) #Hidden layer with RELU activation\n", 44 | " layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])) #Hidden layer with RELU activation\n", 45 | " return tf.matmul(layer_2, _weights['out']) + _biases['out']\n", 46 | "\n", 47 | "# Store layers weight & bias\n", 48 | "weights = {\n", 49 | " 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),\n", 50 | " 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),\n", 51 | " 'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))\n", 52 | "}\n", 53 | "biases = {\n", 54 | " 'b1': tf.Variable(tf.random_normal([n_hidden_1])),\n", 55 | " 'b2': tf.Variable(tf.random_normal([n_hidden_2])),\n", 56 | " 'out': tf.Variable(tf.random_normal([n_classes]))\n", 57 | "}\n", 58 | "\n", 59 | "# Construct model\n", 60 | "pred = multilayer_perceptron(x, weights, biases)\n", 61 | "\n", 62 | "# Define loss and optimizer\n", 63 | "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss\n", 64 | "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer\n", 65 | "\n", 66 | "# Initializing the variables\n", 67 | "init = tf.initialize_all_variables()\n", 68 | "\n", 69 | "# Launch the graph\n", 70 | "with tf.Session() as sess:\n", 71 | " sess.run(init)\n", 72 | "\n", 73 | " # Training cycle\n", 74 | " for epoch in range(training_epochs):\n", 75 | " avg_cost = 0.\n", 76 | " total_batch = int(mnist.train.num_examples/batch_size)\n", 77 | " # Loop over all batches\n", 78 | " for i in range(total_batch):\n", 79 | " batch_xs, batch_ys = mnist.train.next_batch(batch_size)\n", 80 | " # Fit training using batch data\n", 81 | " sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})\n", 82 | " # Compute average loss\n", 83 | " avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/total_batch\n", 84 | " # Display logs per epoch step\n", 85 | " if epoch % display_step == 0:\n", 86 | " print \"Epoch:\", '%04d' % (epoch+1), \"cost=\", \"{:.9f}\".format(avg_cost)\n", 87 | "\n", 88 | " print \"Optimization Finished!\"\n", 89 | "\n", 90 | " # Test model\n", 91 | " correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))\n", 92 | " # Calculate accuracy\n", 93 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n", 94 | " print \"Accuracy:\", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})\n" 95 | ] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python 2", 101 | "language": "python", 102 | "name": "python2" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 2 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython2", 114 | "version": "2.7.9" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 0 119 | } 120 | -------------------------------------------------------------------------------- /first_edition_archive/archive/multilayer_perceptron.py: -------------------------------------------------------------------------------- 1 | from fdl_examples.datatools import input_data 2 | mnist = input_data.read_data_sets("data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import time, shutil, os 6 | 7 | # Architecture 8 | n_hidden_1 = 256 9 | n_hidden_2 = 256 10 | 11 | # Parameters 12 | learning_rate = 0.01 13 | training_epochs = 1000 14 | batch_size = 100 15 | display_step = 1 16 | 17 | def layer(input, weight_shape, bias_shape): 18 | weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5) 19 | bias_init = tf.constant_initializer(value=0) 20 | W = tf.get_variable("W", weight_shape, 21 | initializer=weight_init) 22 | b = tf.get_variable("b", bias_shape, 23 | initializer=bias_init) 24 | return tf.nn.relu(tf.matmul(input, W) + b) 25 | 26 | def inference(x): 27 | with tf.variable_scope("hidden_1"): 28 | hidden_1 = layer(x, [784, n_hidden_1], [n_hidden_1]) 29 | 30 | with tf.variable_scope("hidden_2"): 31 | hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2]) 32 | 33 | with tf.variable_scope("output"): 34 | output = layer(hidden_2, [n_hidden_2, 10], [10]) 35 | 36 | return output 37 | 38 | def loss(output, y): 39 | xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y) 40 | loss = tf.reduce_mean(xentropy) 41 | return loss 42 | 43 | def training(cost, global_step): 44 | tf.scalar_summary("cost", cost) 45 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 46 | train_op = optimizer.minimize(cost, global_step=global_step) 47 | return train_op 48 | 49 | 50 | def evaluate(output, y): 51 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 52 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 53 | tf.scalar_summary("validation error", (1.0 - accuracy)) 54 | return accuracy 55 | 56 | if __name__ == '__main__': 57 | 58 | if os.path.exists("mlp_logs/"): 59 | shutil.rmtree("mlp_logs/") 60 | 61 | with tf.Graph().as_default(): 62 | 63 | with tf.variable_scope("mlp_model"): 64 | 65 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 66 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 67 | 68 | 69 | output = inference(x) 70 | 71 | cost = loss(output, y) 72 | 73 | global_step = tf.Variable(0, name='global_step', trainable=False) 74 | 75 | train_op = training(cost, global_step) 76 | 77 | eval_op = evaluate(output, y) 78 | 79 | summary_op = tf.merge_all_summaries() 80 | 81 | saver = tf.train.Saver() 82 | 83 | sess = tf.Session() 84 | 85 | summary_writer = tf.train.SummaryWriter("mlp_logs/", 86 | graph_def=sess.graph_def) 87 | 88 | 89 | init_op = tf.initialize_all_variables() 90 | 91 | sess.run(init_op) 92 | 93 | # saver.restore(sess, "mlp_logs/model-checkpoint-66000") 94 | 95 | 96 | # Training cycle 97 | for epoch in range(training_epochs): 98 | 99 | avg_cost = 0. 100 | total_batch = int(mnist.train.num_examples/batch_size) 101 | # Loop over all batches 102 | for i in range(total_batch): 103 | minibatch_x, minibatch_y = mnist.train.next_batch(batch_size) 104 | # Fit training using batch data 105 | sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y}) 106 | # Compute average loss 107 | avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch 108 | # Display logs per epoch step 109 | if epoch % display_step == 0: 110 | print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)) 111 | 112 | accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels}) 113 | 114 | print("Validation Error:", (1 - accuracy)) 115 | 116 | summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y}) 117 | summary_writer.add_summary(summary_str, sess.run(global_step)) 118 | 119 | saver.save(sess, "mlp_logs/model-checkpoint", global_step=global_step) 120 | 121 | 122 | print("Optimization Finished!") 123 | 124 | 125 | accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels}) 126 | 127 | print("Test Accuracy:", accuracy) 128 | -------------------------------------------------------------------------------- /first_edition_archive/archive/neural_style/main.py: -------------------------------------------------------------------------------- 1 | ##################################################################################### 2 | # Code borrowed with permissions for reuse with modification from Anish Athalye # 3 | ##################################################################################### 4 | 5 | 6 | from stylize import * 7 | 8 | import numpy as np 9 | import scipy.misc 10 | 11 | import math 12 | from argparse import ArgumentParser 13 | 14 | # default arguments 15 | CONTENT_WEIGHT = 5e0 16 | STYLE_WEIGHT = 1e2 17 | TV_WEIGHT = 1e2 18 | LEARNING_RATE = 1e1 19 | STYLE_SCALE = 1.0 20 | ITERATIONS = 1000 21 | VGG_PATH = 'imagenet-vgg-verydeep-19.mat' 22 | 23 | 24 | def build_parser(): 25 | parser = ArgumentParser() 26 | parser.add_argument('--content', 27 | dest='content', help='content image', 28 | metavar='CONTENT', required=True) 29 | parser.add_argument('--styles', 30 | dest='styles', 31 | nargs='+', help='one or more style images', 32 | metavar='STYLE', required=True) 33 | parser.add_argument('--output', 34 | dest='output', help='output path', 35 | metavar='OUTPUT', required=True) 36 | parser.add_argument('--iterations', type=int, 37 | dest='iterations', help='iterations', 38 | metavar='ITERATIONS', default=ITERATIONS) 39 | parser.add_argument('--width', type=int, 40 | dest='width', help='output width', 41 | metavar='WIDTH') 42 | parser.add_argument('--style-scales', type=float, 43 | dest='style_scales', 44 | nargs='+', help='one or more style scales', 45 | metavar='STYLE_SCALE') 46 | parser.add_argument('--network', 47 | dest='network', help='path to network parameters', 48 | metavar='VGG_PATH', default=VGG_PATH) 49 | parser.add_argument('--content-weight', type=float, 50 | dest='content_weight', help='content weight', 51 | metavar='CONTENT_WEIGHT', default=CONTENT_WEIGHT) 52 | parser.add_argument('--style-weight', type=float, 53 | dest='style_weight', help='style weight', 54 | metavar='STYLE_WEIGHT', default=STYLE_WEIGHT) 55 | parser.add_argument('--style-blend-weights', type=float, 56 | dest='style_blend_weights', help='style blending weights', 57 | nargs='+', metavar='STYLE_BLEND_WEIGHT') 58 | parser.add_argument('--tv-weight', type=float, 59 | dest='tv_weight', help='total variation regularization weight', 60 | metavar='TV_WEIGHT', default=TV_WEIGHT) 61 | parser.add_argument('--learning-rate', type=float, 62 | dest='learning_rate', help='learning rate', 63 | metavar='LEARNING_RATE', default=LEARNING_RATE) 64 | parser.add_argument('--initial', 65 | dest='initial', help='initial image', 66 | metavar='INITIAL') 67 | parser.add_argument('--print-iterations', type=int, 68 | dest='print_iterations', help='statistics printing frequency', 69 | metavar='PRINT_ITERATIONS') 70 | parser.add_argument('--checkpoint-iterations', type=int, 71 | dest='checkpoint_iterations', help='checkpoint frequency', 72 | metavar='CHECKPOINT_ITERATIONS') 73 | return parser 74 | 75 | 76 | def main(): 77 | parser = build_parser() 78 | options = parser.parse_args() 79 | 80 | content_image = imread(options.content) 81 | style_images = [imread(style) for style in options.styles] 82 | 83 | width = options.width 84 | if width is not None: 85 | new_shape = (int(math.floor(float(content_image.shape[0]) / 86 | content_image.shape[1] * width)), width) 87 | content_image = scipy.misc.imresize(content_image, new_shape) 88 | target_shape = content_image.shape 89 | for i in range(len(style_images)): 90 | style_scale = STYLE_SCALE 91 | if options.style_scales is not None: 92 | style_scale = options.style_scales[i] 93 | style_images[i] = scipy.misc.imresize(style_images[i], style_scale * 94 | target_shape[1] / style_images[i].shape[1]) 95 | 96 | style_blend_weights = options.style_blend_weights 97 | if style_blend_weights is None: 98 | # default is equal weights 99 | style_blend_weights = [1.0/len(style_images) for _ in style_images] 100 | else: 101 | total_blend_weight = sum(style_blend_weights) 102 | style_blend_weights = [weight/total_blend_weight 103 | for weight in style_blend_weights] 104 | 105 | initial = options.initial 106 | if initial is not None: 107 | initial = scipy.misc.imresize(imread(initial), content_image.shape[:2]) 108 | 109 | image = stylize(options.network, initial, content_image, style_images, 110 | options.iterations, options.content_weight, options.style_weight, 111 | style_blend_weights, options.tv_weight, options.learning_rate, 112 | print_iterations=options.print_iterations, 113 | checkpoint_iterations=options.checkpoint_iterations) 114 | imsave(options.output, image) 115 | 116 | 117 | def imread(path): 118 | return scipy.misc.imread(path).astype(np.float) 119 | 120 | 121 | def imsave(path, img): 122 | img = np.clip(img, 0, 255).astype(np.uint8) 123 | scipy.misc.imsave(path, img) 124 | 125 | 126 | if __name__ == '__main__': 127 | main() -------------------------------------------------------------------------------- /first_edition_archive/archive/neural_style/stylize.py: -------------------------------------------------------------------------------- 1 | ##################################################################################### 2 | # Code borrowed with permissions for reuse with modification from Anish Athalye # 3 | ##################################################################################### 4 | 5 | import vgg 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | 10 | from sys import stderr 11 | 12 | CONTENT_LAYER = 'relu4_2' 13 | STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1') 14 | 15 | 16 | def stylize(network, initial, content, styles, iterations, 17 | content_weight, style_weight, style_blend_weights, tv_weight, 18 | learning_rate, print_iterations=None, checkpoint_iterations=None): 19 | shape = (1,) + content.shape 20 | style_shapes = [(1,) + style.shape for style in styles] 21 | content_features = {} 22 | style_features = [{} for _ in styles] 23 | 24 | # compute content features in feedforward mode 25 | g = tf.Graph() 26 | with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: 27 | image = tf.placeholder('float', shape=shape) 28 | net, mean_pixel = vgg.net(network, image) 29 | content_pre = np.array([vgg.preprocess(content, mean_pixel)]) 30 | content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( 31 | feed_dict={image: content_pre}) 32 | 33 | # compute style features in feedforward mode 34 | for i in range(len(styles)): 35 | g = tf.Graph() 36 | with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: 37 | image = tf.placeholder('float', shape=style_shapes[i]) 38 | net, _ = vgg.net(network, image) 39 | style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)]) 40 | for layer in STYLE_LAYERS: 41 | features = net[layer].eval(feed_dict={image: style_pre}) 42 | features = np.reshape(features, (-1, features.shape[3])) 43 | gram = np.matmul(features.T, features) / features.size 44 | style_features[i][layer] = gram 45 | 46 | # make stylized image using backpropogation 47 | with tf.Graph().as_default(): 48 | if initial is None: 49 | noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) 50 | initial = tf.random_normal(shape) * 0.256 51 | else: 52 | initial = np.array([vgg.preprocess(initial, mean_pixel)]) 53 | initial = initial.astype('float32') 54 | image = tf.Variable(initial) 55 | net, _ = vgg.net(network, image) 56 | 57 | # content loss 58 | content_loss = content_weight * (2 * tf.nn.l2_loss( 59 | net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / 60 | content_features[CONTENT_LAYER].size) 61 | # style loss 62 | style_loss = 0 63 | for i in range(len(styles)): 64 | style_losses = [] 65 | for style_layer in STYLE_LAYERS: 66 | layer = net[style_layer] 67 | _, height, width, number = map(lambda i: i.value, layer.get_shape()) 68 | size = height * width * number 69 | feats = tf.reshape(layer, (-1, number)) 70 | gram = tf.matmul(tf.transpose(feats), feats) / size 71 | style_gram = style_features[i][style_layer] 72 | style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) 73 | style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses) 74 | # total variation denoising 75 | tv_y_size = _tensor_size(image[:,1:,:,:]) 76 | tv_x_size = _tensor_size(image[:,:,1:,:]) 77 | tv_loss = tv_weight * 2 * ( 78 | (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) / 79 | tv_y_size) + 80 | (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / 81 | tv_x_size)) 82 | # overall loss 83 | loss = content_loss + style_loss + tv_loss 84 | 85 | # optimizer setup 86 | train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) 87 | 88 | def print_progress(i, last=False): 89 | if print_iterations is not None: 90 | if i is not None and i % print_iterations == 0 or last: 91 | print >> stderr, ' content loss: %g' % content_loss.eval() 92 | print >> stderr, ' style loss: %g' % style_loss.eval() 93 | print >> stderr, ' tv loss: %g' % tv_loss.eval() 94 | print >> stderr, ' total loss: %g' % loss.eval() 95 | 96 | # optimization 97 | best_loss = float('inf') 98 | best = None 99 | with tf.Session() as sess: 100 | sess.run(tf.initialize_all_variables()) 101 | for i in range(iterations): 102 | print_progress(i) 103 | print >> stderr, 'Iteration %d/%d' % (i + 1, iterations) 104 | train_step.run() 105 | if (checkpoint_iterations is not None and 106 | i % checkpoint_iterations == 0) or i == iterations - 1: 107 | this_loss = loss.eval() 108 | if this_loss < best_loss: 109 | best_loss = this_loss 110 | best = image.eval() 111 | print_progress(None, i == iterations - 1) 112 | return vgg.unprocess(best.reshape(shape[1:]), mean_pixel) 113 | 114 | 115 | def _tensor_size(tensor): 116 | from operator import mul 117 | return reduce(mul, (d.value for d in tensor.get_shape()), 1) -------------------------------------------------------------------------------- /first_edition_archive/archive/neural_style/vgg.py: -------------------------------------------------------------------------------- 1 | ##################################################################################### 2 | # Code borrowed with permissions for reuse with modification from Anish Athalye # 3 | ##################################################################################### 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | import scipy.io 8 | 9 | 10 | def net(data_path, input_image): 11 | layers = ( 12 | 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 13 | 14 | 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 15 | 16 | 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 17 | 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 18 | 19 | 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 20 | 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 21 | 22 | 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 23 | 'relu5_3', 'conv5_4', 'relu5_4' 24 | ) 25 | 26 | data = scipy.io.loadmat(data_path) 27 | mean = data['normalization'][0][0][0] 28 | mean_pixel = np.mean(mean, axis=(0, 1)) 29 | weights = data['layers'][0] 30 | 31 | net = {} 32 | current = input_image 33 | for i, name in enumerate(layers): 34 | kind = name[:4] 35 | if kind == 'conv': 36 | kernels, bias = weights[i][0][0][0][0] 37 | # matconvnet: weights are [width, height, in_channels, out_channels] 38 | # tensorflow: weights are [height, width, in_channels, out_channels] 39 | kernels = np.transpose(kernels, (1, 0, 2, 3)) 40 | bias = bias.reshape(-1) 41 | current = _conv_layer(current, kernels, bias) 42 | elif kind == 'relu': 43 | current = tf.nn.relu(current) 44 | elif kind == 'pool': 45 | current = _pool_layer(current) 46 | net[name] = current 47 | 48 | assert len(net) == len(layers) 49 | return net, mean_pixel 50 | 51 | 52 | def _conv_layer(input, weights, bias): 53 | conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1), 54 | padding='SAME') 55 | return tf.nn.bias_add(conv, bias) 56 | 57 | 58 | def _pool_layer(input): 59 | return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), 60 | padding='SAME') 61 | 62 | 63 | def preprocess(image, mean_pixel): 64 | return image - mean_pixel 65 | 66 | 67 | def unprocess(image, mean_pixel): 68 | return image + mean_pixel -------------------------------------------------------------------------------- /first_edition_archive/archive/one_layer_autoencoder.py: -------------------------------------------------------------------------------- 1 | ''' A one-layer autoencoder using TensorFlow library''' 2 | import tensorflow as tf 3 | import numpy as np 4 | import math 5 | #import pandas as pd 6 | #import sys 7 | 8 | input = np.array([[2.0, 1.0, 1.0, 2.0], 9 | [-2.0, 1.0, -1.0, 2.0], 10 | [0.0, 1.0, 0.0, 2.0], 11 | [0.0, -1.0, 0.0, -2.0], 12 | [0.0, -1.0, 0.0, -2.0]]) 13 | 14 | # Code here for importing data from file 15 | 16 | noisy_input = input + .2 * np.random.random_sample((input.shape)) - .1 17 | output = input 18 | 19 | # Scale to [0,1] 20 | scaled_input_1 = np.divide((noisy_input-noisy_input.min()), (noisy_input.max()-noisy_input.min())) 21 | scaled_output_1 = np.divide((output-output.min()), (output.max()-output.min())) 22 | # Scale to [-1,1] 23 | scaled_input_2 = (scaled_input_1*2)-1 24 | scaled_output_2 = (scaled_output_1*2)-1 25 | 26 | input_data = scaled_input_2 27 | output_data = scaled_output_2 28 | 29 | # Autoencoder with 1 hidden layer 30 | n_samp, n_input = input_data.shape 31 | n_hidden = 2 32 | 33 | x = tf.placeholder("float", [None, n_input]) 34 | # Weights and biases to hidden layer 35 | Wh = tf.Variable(tf.random_uniform((n_input, n_hidden), -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input))) 36 | bh = tf.Variable(tf.zeros([n_hidden])) 37 | h = tf.nn.tanh(tf.matmul(x,Wh) + bh) 38 | # Weights and biases to output layer 39 | Wo = tf.transpose(Wh) # tied weights 40 | bo = tf.Variable(tf.zeros([n_input])) 41 | y = tf.nn.tanh(tf.matmul(h,Wo) + bo) 42 | # Objective functions 43 | y_ = tf.placeholder("float", [None,n_input]) 44 | cross_entropy = -tf.reduce_sum(y_*tf.log(y)) 45 | meansq = tf.reduce_mean(tf.square(y_-y)) 46 | train_step = tf.train.GradientDescentOptimizer(0.05).minimize(meansq) 47 | 48 | init = tf.initialize_all_variables() 49 | sess = tf.Session() 50 | sess.run(init) 51 | 52 | n_rounds = 5000 53 | batch_size = min(50, n_samp) 54 | 55 | for i in range(n_rounds): 56 | sample = np.random.randint(n_samp, size=batch_size) 57 | batch_xs = input_data[sample][:] 58 | batch_ys = output_data[sample][:] 59 | sess.run(train_step, feed_dict={x: batch_xs, y_:batch_ys}) 60 | if i % 100 == 0: 61 | print i, sess.run(cross_entropy, feed_dict={x: batch_xs, y_:batch_ys}), sess.run(meansq, feed_dict={x: batch_xs, y_:batch_ys}) 62 | 63 | print "Target:" 64 | print output_data 65 | print "Final activations:" 66 | print sess.run(y, feed_dict={x: input_data}) 67 | print "Final weights (input => hidden layer)" 68 | print sess.run(Wh) 69 | print "Final biases (input => hidden layer)" 70 | print sess.run(bh) 71 | print "Final biases (hidden layer => output)" 72 | print sess.run(bo) 73 | print "Final activations of hidden layer" 74 | print sess.run(h, feed_dict={x: input_data}) 75 | -------------------------------------------------------------------------------- /first_edition_archive/archive/optimzer_mlp.py: -------------------------------------------------------------------------------- 1 | import input_data 2 | mnist = input_data.read_data_sets("data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import time, argparse 6 | 7 | # Architecture 8 | n_hidden_1 = 256 9 | n_hidden_2 = 256 10 | 11 | # Parameters 12 | training_epochs = 500 13 | batch_size = 100 14 | display_step = 1 15 | 16 | def layer(input, weight_shape, bias_shape): 17 | weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5) 18 | bias_init = tf.constant_initializer(value=0) 19 | W = tf.get_variable("W", weight_shape, 20 | initializer=weight_init) 21 | b = tf.get_variable("b", bias_shape, 22 | initializer=bias_init) 23 | return tf.nn.relu(tf.matmul(input, W) + b) 24 | 25 | def inference(x): 26 | with tf.variable_scope("hidden_1"): 27 | hidden_1 = layer(x, [784, n_hidden_1], [n_hidden_1]) 28 | 29 | with tf.variable_scope("hidden_2"): 30 | hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2]) 31 | 32 | with tf.variable_scope("output"): 33 | output = layer(hidden_2, [n_hidden_2, 10], [10]) 34 | 35 | return output 36 | 37 | def loss(output, y): 38 | xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y) 39 | loss = tf.reduce_mean(xentropy) 40 | return loss 41 | 42 | def training(cost, global_step, optimizer): 43 | tf.scalar_summary("cost", cost) 44 | train_op = None 45 | print optimizer 46 | if optimizer == "sgd": 47 | learning_rate = 0.01 48 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 49 | train_op = optimizer.minimize(cost, global_step=global_step) 50 | if optimizer == "momentum": 51 | learning_rate = 0.01 52 | momentum = 0.9 53 | optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) 54 | train_op = optimizer.minimize(cost, global_step=global_step) 55 | return train_op 56 | 57 | 58 | def evaluate(output, y): 59 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 60 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 61 | tf.scalar_summary("validation error", (1.0 - accuracy)) 62 | return accuracy 63 | 64 | if __name__ == '__main__': 65 | 66 | parser = argparse.ArgumentParser(description='Test various optimization strategies') 67 | parser.add_argument('optimizer', nargs=1, type=str) 68 | args = parser.parse_args() 69 | 70 | with tf.Graph().as_default(): 71 | 72 | with tf.variable_scope("mlp_model"): 73 | 74 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 75 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 76 | 77 | 78 | output = inference(x) 79 | 80 | cost = loss(output, y) 81 | 82 | global_step = tf.Variable(0, name='global_step', trainable=False) 83 | 84 | train_op = training(cost, global_step, args.optimizer[0]) 85 | 86 | eval_op = evaluate(output, y) 87 | 88 | summary_op = tf.merge_all_summaries() 89 | 90 | saver = tf.train.Saver() 91 | 92 | sess = tf.Session() 93 | 94 | summary_writer = tf.train.SummaryWriter("mlp_logs_%s/" % args.optimizer[0], 95 | graph_def=sess.graph_def) 96 | 97 | 98 | init_op = tf.initialize_all_variables() 99 | 100 | sess.run(init_op) 101 | 102 | # saver.restore(sess, "mlp_logs/model-checkpoint-66000") 103 | 104 | 105 | # Training cycle 106 | for epoch in range(training_epochs): 107 | 108 | avg_cost = 0. 109 | total_batch = int(mnist.train.num_examples/batch_size) 110 | # Loop over all batches 111 | for i in range(total_batch): 112 | minibatch_x, minibatch_y = mnist.train.next_batch(batch_size) 113 | # Fit training using batch data 114 | sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y}) 115 | # Compute average loss 116 | avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch 117 | # Display logs per epoch step 118 | if epoch % display_step == 0: 119 | print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost) 120 | 121 | accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels}) 122 | 123 | print "Validation Error:", (1 - accuracy) 124 | 125 | summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y}) 126 | summary_writer.add_summary(summary_str, sess.run(global_step)) 127 | 128 | saver.save(sess, "mlp_logs_%s/model-checkpoint" % args.optimizer[0], global_step=global_step) 129 | 130 | 131 | print "Optimization Finished!" 132 | 133 | 134 | accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels}) 135 | 136 | print "Test Accuracy:", accuracy 137 | -------------------------------------------------------------------------------- /first_edition_archive/archive/random_walk.py: -------------------------------------------------------------------------------- 1 | import random 2 | import matplotlib.pyplot as plt 3 | import matplotlib.gridspec as gridspec 4 | 5 | step_range = 10 6 | momentum_range = [0.1, 0.5, 0.9, 0.99] 7 | 8 | step_choices = range(-1 * step_range, step_range + 1) 9 | rand_walk = [random.choice(step_choices) for x in xrange(100)] 10 | 11 | 12 | x = range(len(rand_walk)) 13 | zeros = [0 for i in x] 14 | 15 | import numpy as np 16 | yrange = 1.5 * np.max(rand_walk) 17 | 18 | fig = plt.figure(1) 19 | gs = gridspec.GridSpec(3, 4) 20 | ax = plt.subplot(gs[0, 1:3]) 21 | ax.set_title("No Momentum") 22 | plt.xlabel("steps") 23 | plt.plot(x, rand_walk, 'b', x, zeros, 'k') 24 | plt.ylim((-yrange, yrange)) 25 | 26 | 27 | momentum = momentum_range[0] 28 | momentum_rand_walk = [random.choice(step_choices)] 29 | 30 | for i in xrange(len(rand_walk) - 1): 31 | prev = momentum_rand_walk[-1] 32 | momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices)) 33 | 34 | ax = plt.subplot(gs[1,:2]) 35 | ax.set_title("Momentum = %s" % momentum_range[0]) 36 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k') 37 | plt.ylim((-yrange, yrange)) 38 | 39 | momentum = momentum_range[1] 40 | momentum_rand_walk = [random.choice(step_choices)] 41 | 42 | for i in xrange(len(rand_walk) - 1): 43 | prev = momentum_rand_walk[-1] 44 | momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices)) 45 | 46 | ax = plt.subplot(gs[1,2:]) 47 | ax.set_title("Momentum = %s" % momentum_range[1]) 48 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k') 49 | plt.ylim((-yrange, yrange)) 50 | 51 | momentum = momentum_range[2] 52 | momentum_rand_walk = [random.choice(step_choices)] 53 | 54 | for i in xrange(len(rand_walk) - 1): 55 | prev = momentum_rand_walk[-1] 56 | momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices)) 57 | 58 | ax = plt.subplot(gs[2,:2]) 59 | ax.set_title("Momentum = %s" % momentum_range[2]) 60 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k') 61 | plt.ylim((-yrange, yrange)) 62 | 63 | momentum = momentum_range[3] 64 | momentum_rand_walk = [random.choice(step_choices)] 65 | 66 | for i in xrange(len(rand_walk) - 1): 67 | prev = momentum_rand_walk[-1] 68 | momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices)) 69 | 70 | ax = plt.subplot(gs[2,2:]) 71 | ax.set_title("Momentum = %s" % momentum_range[3]) 72 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k') 73 | plt.ylim((-yrange, yrange)) 74 | 75 | fig.tight_layout() 76 | 77 | plt.show() 78 | -------------------------------------------------------------------------------- /first_edition_archive/archive/read_16M_tweet_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cPickle as pickle 3 | import csv, leveldb, os 4 | 5 | char_2_index = {} 6 | index_2_char = {} 7 | 8 | sentiment_2_index = {} 9 | index_2_sentiment = {} 10 | 11 | POSITIVE = 'positive' 12 | NEGATIVE = 'negative' 13 | 14 | train_dataset_raw = [] 15 | val_dataset_raw = [] 16 | 17 | db = None 18 | train_minibatches = 0 19 | 20 | if not os.path.isdir("data/twitter/tweetdb"): 21 | 22 | db = leveldb.LevelDB("data/twitter/tweetdb") 23 | 24 | 25 | with open('data/twitter/training.1600000.processed.noemoticon.csv', 'rb') as f: 26 | reader = csv.reader(f) 27 | train_dataset_raw = list(reader) 28 | np.random.shuffle(train_dataset_raw) 29 | 30 | with open('data/twitter/testdata.manual.2009.06.14.csv', 'rb') as f: 31 | reader = csv.reader(f) 32 | val_dataset_raw = list(reader) 33 | np.random.shuffle(val_dataset_raw) 34 | 35 | counter_c = 0 36 | counter_s = 0 37 | max_row = 0 38 | for row in train_dataset_raw + val_dataset_raw: 39 | # print row[0], row[5] 40 | max_row = max(len(row[5]), max_row) 41 | if row[0] not in sentiment_2_index: 42 | sentiment_2_index[row[0]] = counter_s 43 | index_2_sentiment[counter_s] = row[0] 44 | counter_s += 1 45 | for char in row[5]: 46 | if char not in char_2_index: 47 | char_2_index[char] = counter_c 48 | index_2_char[counter_c] = char 49 | counter_c += 1 50 | 51 | print "Dataset has max length %d" % max_row 52 | 53 | print index_2_char 54 | 55 | print index_2_sentiment 56 | 57 | train_minibatches = 0 58 | inputs = [] 59 | tags = [] 60 | for row in train_dataset_raw: 61 | if len(row[5]) > 200: 62 | continue 63 | # print row[1] 64 | if row[0] == '4': 65 | tags.append(1) 66 | elif row[0] == '0': 67 | tags.append(0) 68 | else: 69 | print "ERROR ON:", row 70 | continue 71 | 72 | print row[0], row[5] 73 | 74 | cur_input = [] 75 | for char in row[5]: 76 | cur_input.append(char_2_index[char]) 77 | cur_input = np.eye(len(char_2_index.keys()))[cur_input] 78 | init_len = len(cur_input) 79 | if 200 - init_len > 0: 80 | zero = np.zeros((200 - init_len, len(char_2_index.keys()))) 81 | cur_input = np.concatenate((cur_input, zero)) 82 | inputs.append(cur_input) 83 | 84 | print len(tags) 85 | 86 | if len(inputs) == 256: 87 | print "FINISH MINIBATCH %d, INSERT INTO DB" % train_minibatches 88 | inputs = np.array(inputs, dtype=np.float32) 89 | tags = np.eye(2, dtype=np.float32)[tags] 90 | db.Put("train_inputs_" + str(train_minibatches), inputs) 91 | db.Put("train_tags_" + str(train_minibatches), tags) 92 | train_minibatches += 1 93 | 94 | inputs = [] 95 | tags = [] 96 | 97 | db.Put("n_minibatches", pickle.dumps(train_minibatches)) 98 | 99 | inputs = [] 100 | tags = [] 101 | 102 | for row in val_dataset_raw: 103 | if len(row[5]) > 200: 104 | continue 105 | # print row[1] 106 | if row[0] == '4': 107 | tags.append(1) 108 | elif row[0] == '0': 109 | tags.append(0) 110 | else: 111 | print "ERROR ON:", row 112 | continue 113 | cur_input = [] 114 | for char in row[5]: 115 | cur_input.append(char_2_index[char]) 116 | cur_input = np.eye(len(char_2_index.keys()))[cur_input] 117 | init_len = len(cur_input) 118 | if 200 - init_len > 0: 119 | zero = np.zeros((200 - init_len, len(char_2_index.keys()))) 120 | cur_input = np.concatenate((cur_input, zero)) 121 | inputs.append(cur_input) 122 | 123 | if len(inputs) == 256: 124 | inputs = np.array(inputs, dtype=np.float32) 125 | tags = np.eye(2, dtype=np.float32)[tags] 126 | db.Put("val_inputs_0", inputs) 127 | db.Put("val_tags_0", tags) 128 | break 129 | else: 130 | db = leveldb.LevelDB("data/twitter/tweetdb") 131 | train_minibatches = pickle.loads(db.Get("n_minibatches")) 132 | 133 | 134 | 135 | 136 | 137 | class TweetDataset: 138 | def __init__(self, db, max_minibatch, prefix): 139 | self.ptr = 0 140 | self.prefix = prefix 141 | self.max_minibatch = max_minibatch 142 | 143 | def minibatch(self): 144 | inputs, tags = np.fromstring(db.Get(self.prefix + "_inputs_" + str(self.ptr)), dtype=np.float32).reshape((-1, 200, 194)), np.fromstring(db.Get(self.prefix + "_tags_" + str(self.ptr)), dtype=np.float32).reshape((-1, 2)) 145 | self.ptr = (self.ptr + 1) % self.max_minibatch 146 | return inputs, tags 147 | 148 | 149 | print "Start train dataset loading" 150 | 151 | train = TweetDataset(db, train_minibatches, "train") 152 | 153 | print "Start val dataset loading" 154 | 155 | val = TweetDataset(db, 1, "val") 156 | 157 | print "Finish dataset loading" 158 | -------------------------------------------------------------------------------- /first_edition_archive/archive/read_imdb_data.py: -------------------------------------------------------------------------------- 1 | import tflearn 2 | from tflearn.data_utils import to_categorical, pad_sequences 3 | from tflearn.datasets import imdb 4 | import numpy as np 5 | 6 | # IMDB Dataset loading 7 | train, test, _ = imdb.load_data(path='data/imdb.pkl', n_words=30000, 8 | valid_portion=0.1) 9 | trainX, trainY = train 10 | testX, testY = test 11 | 12 | # Data preprocessing 13 | # Sequence padding 14 | trainX = pad_sequences(trainX, maxlen=500, value=0.) 15 | testX = pad_sequences(testX, maxlen=500, value=0.) 16 | # Converting labels to binary vectors 17 | trainY = to_categorical(trainY, nb_classes=2) 18 | testY = to_categorical(testY, nb_classes=2) 19 | 20 | 21 | 22 | class IMDBDataset(): 23 | def __init__(self, X, Y): 24 | self.num_examples = len(X) 25 | self.inputs = X 26 | self.tags = Y 27 | self.ptr = 0 28 | 29 | 30 | def minibatch(self, size): 31 | ret = None 32 | if self.ptr + size < len(self.inputs): 33 | ret = self.inputs[self.ptr:self.ptr+size], self.tags[self.ptr:self.ptr+size] 34 | else: 35 | ret = np.concatenate((self.inputs[self.ptr:], self.inputs[:size-len(self.inputs[self.ptr:])])), np.concatenate((self.tags[self.ptr:], self.tags[:size-len(self.tags[self.ptr:])])) 36 | self.ptr = (self.ptr + size) % len(self.inputs) 37 | 38 | return ret 39 | # return np.eye(10000)[ret[0]], ret[1] 40 | 41 | 42 | train = IMDBDataset(trainX, trainY) 43 | val = IMDBDataset(testX, testY) 44 | -------------------------------------------------------------------------------- /first_edition_archive/archive/read_pos_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gensim, leveldb, os, re 3 | 4 | db = None 5 | 6 | tags_to_index = {} 7 | index_to_tags = {} 8 | train_dataset_raw = {} 9 | train_dataset = [] 10 | test_dataset_raw = {} 11 | test_dataset = [] 12 | dataset_vocab = {} 13 | 14 | print "LOADING PRETRAINED WORD2VEC MODEL... " 15 | if not os.path.isdir("data/word2vecdb"): 16 | model = gensim.models.Word2Vec.load_word2vec_format('/Users/nikhilbuduma/Downloads/GoogleNews-vectors-negative300.bin', binary=True) 17 | db = leveldb.LevelDB("data/word2vecdb") 18 | 19 | try: 20 | os.remove("data/pos_data/pos.train.processed.txt") 21 | except OSError: 22 | pass 23 | 24 | try: 25 | os.remove("data/pos_data/pos.test.processed.txt") 26 | except OSError: 27 | pass 28 | 29 | with open("data/pos_data/pos.train.txt") as f: 30 | train_dataset_raw = f.readlines() 31 | train_dataset_raw = [element.split() for element in train_dataset_raw if len(element.split()) > 0] 32 | 33 | counter = 0 34 | while counter < len(train_dataset_raw): 35 | pair = train_dataset_raw[counter] 36 | if counter < len(train_dataset_raw) - 1: 37 | next_pair = train_dataset_raw[counter + 1] 38 | if (pair[0] + "_" + next_pair[0] in model) and (pair[1] == next_pair[1]): 39 | train_dataset.append([pair[0] + "_" + next_pair[0], pair[1]]) 40 | counter += 2 41 | continue 42 | 43 | word = re.sub("\d", "#", pair[0]) 44 | word = re.sub("-", "_", word) 45 | 46 | if word in model: 47 | train_dataset.append([word, pair[1]]) 48 | counter += 1 49 | continue 50 | 51 | if "_" in word: 52 | subwords = word.split("_") 53 | for subword in subwords: 54 | if not (subword.isspace() or len(subword) == 0): 55 | train_dataset.append([subword, pair[1]]) 56 | counter += 1 57 | continue 58 | 59 | train_dataset.append([word, pair[1]]) 60 | counter += 1 61 | 62 | with open('data/pos_data/pos.train.processed.txt', 'w') as train_file: 63 | for item in train_dataset: 64 | train_file.write("%s\n" % (item[0] + " " + item[1])) 65 | 66 | 67 | with open("data/pos_data/pos.test.txt") as f: 68 | test_dataset_raw = f.readlines() 69 | test_dataset_raw = [element.split() for element in test_dataset_raw if len(element.split()) > 0] 70 | 71 | counter = 0 72 | while counter < len(test_dataset_raw): 73 | pair = test_dataset_raw[counter] 74 | if counter < len(test_dataset_raw) - 1: 75 | next_pair = test_dataset_raw[counter + 1] 76 | if (pair[0] + "_" + next_pair[0] in model) and (pair[1] == next_pair[1]): 77 | test_dataset.append([pair[0] + "_" + next_pair[0], pair[1]]) 78 | counter += 2 79 | continue 80 | 81 | word = re.sub("\d", "#", pair[0]) 82 | word = re.sub("-", "_", word) 83 | 84 | if word in model: 85 | test_dataset.append([word, pair[1]]) 86 | counter += 1 87 | continue 88 | 89 | if "_" in word: 90 | subwords = word.split("_") 91 | for subword in subwords: 92 | if not (subword.isspace() or len(subword) == 0): 93 | test_dataset.append([subword, pair[1]]) 94 | counter += 1 95 | continue 96 | 97 | test_dataset.append([word, pair[1]]) 98 | counter += 1 99 | 100 | with open('data/pos_data/pos.test.processed.txt', 'w') as test_file: 101 | for item in test_dataset: 102 | test_file.write("%s\n" % (item[0] + " " + item[1])) 103 | 104 | counter = 0 105 | for pair in train_dataset + test_dataset: 106 | dataset_vocab[pair[0]] = 1 107 | if pair[1] not in tags_to_index: 108 | tags_to_index[pair[1]] = counter 109 | index_to_tags[counter] = pair[1] 110 | counter += 1 111 | 112 | nonmodel_cache = {} 113 | 114 | counter = 1 115 | total = len(dataset_vocab.keys()) 116 | for word in dataset_vocab: 117 | if counter % 100 == 0: 118 | print "Inserted %d words out of %d total" % (counter, total) 119 | if word in model: 120 | db.Put(word, model[word]) 121 | elif word in nonmodel_cache: 122 | db.Put(word, nonmodel_cache[word]) 123 | else: 124 | print word 125 | nonmodel_cache[word] = np.random.uniform(-0.25, 0.25, 300).astype(np.float32) 126 | db.Put(word, nonmodel_cache[word]) 127 | counter += 1 128 | else: 129 | db = leveldb.LevelDB("data/word2vecdb") 130 | 131 | with open("data/pos_data/pos.train.processed.txt") as f: 132 | train_dataset = f.readlines() 133 | train_dataset = [element.split() for element in train_dataset if len(element.split()) > 0] 134 | 135 | with open("data/pos_data/pos.test.processed.txt") as f: 136 | test_dataset = f.readlines() 137 | test_dataset = [element.split() for element in test_dataset if len(element.split()) > 0] 138 | 139 | counter = 0 140 | for pair in train_dataset + test_dataset: 141 | dataset_vocab[pair[0]] = 1 142 | if pair[1] not in tags_to_index: 143 | tags_to_index[pair[1]] = counter 144 | index_to_tags[counter] = pair[1] 145 | counter += 1 146 | 147 | 148 | 149 | 150 | class POSDataset(): 151 | def __init__(self, db, dataset, tags_to_index, get_all=False): 152 | self.db = db 153 | self.inputs = [] 154 | self.tags = [] 155 | self.ptr = 0 156 | self.n = 0 157 | self.get_all = get_all 158 | 159 | for pair in dataset: 160 | self.inputs.append(np.fromstring(db.Get(pair[0]), dtype=np.float32)) 161 | self.tags.append(tags_to_index[pair[1]]) 162 | 163 | self.inputs = np.array(self.inputs, dtype=np.float32) 164 | self.tags = np.eye(len(tags_to_index.keys()))[self.tags] 165 | 166 | def prepare_n_gram(self, n): 167 | self.n = n 168 | 169 | def minibatch(self, size): 170 | batch_inputs = [] 171 | batch_tags = [] 172 | if self.get_all: 173 | counter = 0 174 | while counter < len(self.inputs) - self.n + 1: 175 | batch_inputs.append(self.inputs[counter:counter+self.n].flatten()) 176 | batch_tags.append(self.tags[counter + self.n - 1]) 177 | counter += 1 178 | elif self.ptr + size < len(self.inputs) - self.n: 179 | counter = self.ptr 180 | while counter < self.ptr + size: 181 | batch_inputs.append(self.inputs[counter:counter+self.n].flatten()) 182 | batch_tags.append(self.tags[counter + self.n - 1]) 183 | counter += 1 184 | else: 185 | counter = self.ptr 186 | while counter < len(self.inputs) - self.n + 1: 187 | batch_inputs.append(self.inputs[counter:counter+self.n].flatten()) 188 | batch_tags.append(self.tags[counter + self.n - 1]) 189 | counter += 1 190 | 191 | counter2 = 0 192 | while counter2 < size - counter + self.ptr: 193 | batch_inputs.append(self.inputs[counter2:counter2+self.n].flatten()) 194 | batch_tags.append(self.tags[counter2 + self.n - 1]) 195 | counter2 += 1 196 | 197 | self.ptr = (self.ptr + size) % (len(self.inputs) - self.n) 198 | return np.array(batch_inputs, dtype=np.float32), np.array(batch_tags) 199 | 200 | 201 | 202 | train = POSDataset(db, train_dataset, tags_to_index) 203 | test = POSDataset(db, test_dataset, tags_to_index, get_all=True) 204 | -------------------------------------------------------------------------------- /first_edition_archive/archive/read_tweet_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cPickle as pickle 3 | import csv, leveldb, os 4 | 5 | char_2_index = {} 6 | index_2_char = {} 7 | 8 | sentiment_2_index = {} 9 | index_2_sentiment = {} 10 | 11 | POSITIVE = 'positive' 12 | NEGATIVE = 'negative' 13 | 14 | dataset_original = [] 15 | train_dataset_raw = [] 16 | val_dataset_raw = [] 17 | 18 | db = None 19 | train_minibatches = 0 20 | 21 | if not os.path.isdir("data/twitter/tweetdb"): 22 | 23 | db = leveldb.LevelDB("data/twitter/tweetdb") 24 | 25 | 26 | with open('data/twitter/airlines_tweets.csv', 'rb') as f: 27 | reader = csv.reader(f) 28 | dataset_original = list(reader) 29 | dataset_original = dataset_original[1:] 30 | np.random.shuffle(dataset_original) 31 | 32 | 33 | counter_c = 0 34 | counter_s = 0 35 | max_row = 0 36 | 37 | val_neg = [] 38 | val_pos = [] 39 | 40 | for row in dataset_original: 41 | print row[1], row[10] 42 | if (row[1] == "positive" or row[1] == "negative") and len(row[10])<200 : 43 | if row[1] == "positive" and len(val_pos) < 128: 44 | val_pos.append(row) 45 | elif row[1] == "negative" and len(val_neg) < 128: 46 | val_neg.append(row) 47 | else: 48 | train_dataset_raw.append(row) 49 | max_row = max(len(row[10]), max_row) 50 | if row[1] not in sentiment_2_index: 51 | sentiment_2_index[row[1]] = counter_s 52 | index_2_sentiment[counter_s] = row[1] 53 | counter_s += 1 54 | for char in row[10]: 55 | if char not in char_2_index: 56 | char_2_index[char] = counter_c 57 | index_2_char[counter_c] = char 58 | counter_c += 1 59 | 60 | 61 | val_dataset_raw = val_neg + val_pos 62 | 63 | np.random.shuffle(train_dataset_raw) 64 | np.random.shuffle(val_dataset_raw) 65 | 66 | 67 | print "Dataset has max length %d" % max_row 68 | 69 | print index_2_char 70 | 71 | print index_2_sentiment 72 | 73 | train_minibatches = 0 74 | inputs = [] 75 | tags = [] 76 | for row in train_dataset_raw: 77 | if len(row[10]) > 200: 78 | continue 79 | # print row[1] 80 | if row[1] == 'positive': 81 | tags.append(1) 82 | elif row[1] == 'negative': 83 | tags.append(0) 84 | else: 85 | print "ERROR ON:", row 86 | continue 87 | 88 | print row[1], row[10] 89 | 90 | cur_input = [] 91 | for char in row[10]: 92 | cur_input.append(char_2_index[char]) 93 | cur_input = np.eye(len(char_2_index.keys()))[cur_input] 94 | init_len = len(cur_input) 95 | if 200 - init_len > 0: 96 | zero = np.zeros((200 - init_len, len(char_2_index.keys()))) 97 | cur_input = np.concatenate((cur_input, zero)) 98 | inputs.append(cur_input) 99 | 100 | print len(tags) 101 | 102 | if len(inputs) == 256: 103 | print "FINISH MINIBATCH %d, INSERT INTO DB" % train_minibatches 104 | inputs = np.array(inputs, dtype=np.float32) 105 | tags = np.eye(2, dtype=np.float32)[tags] 106 | db.Put("train_inputs_" + str(train_minibatches), inputs) 107 | db.Put("train_tags_" + str(train_minibatches), tags) 108 | train_minibatches += 1 109 | 110 | inputs = [] 111 | tags = [] 112 | 113 | db.Put("n_minibatches", pickle.dumps(train_minibatches)) 114 | 115 | inputs = [] 116 | tags = [] 117 | 118 | for row in val_dataset_raw: 119 | if len(row[10]) > 200: 120 | continue 121 | # print row[1] 122 | if row[1] == 'positive': 123 | tags.append(1) 124 | elif row[1] == 'negative': 125 | tags.append(0) 126 | else: 127 | print "ERROR ON:", row 128 | continue 129 | cur_input = [] 130 | for char in row[10]: 131 | cur_input.append(char_2_index[char]) 132 | cur_input = np.eye(len(char_2_index.keys()))[cur_input] 133 | init_len = len(cur_input) 134 | if 200 - init_len > 0: 135 | zero = np.zeros((200 - init_len, len(char_2_index.keys()))) 136 | cur_input = np.concatenate((cur_input, zero)) 137 | inputs.append(cur_input) 138 | 139 | if len(inputs) == 256: 140 | inputs = np.array(inputs, dtype=np.float32) 141 | tags = np.eye(2, dtype=np.float32)[tags] 142 | db.Put("val_inputs_0", inputs) 143 | db.Put("val_tags_0", tags) 144 | break 145 | else: 146 | db = leveldb.LevelDB("data/twitter/tweetdb") 147 | train_minibatches = pickle.loads(db.Get("n_minibatches")) 148 | 149 | 150 | 151 | 152 | 153 | class TweetDataset: 154 | def __init__(self, db, max_minibatch, prefix): 155 | self.ptr = 0 156 | self.prefix = prefix 157 | self.max_minibatch = max_minibatch 158 | 159 | def minibatch(self): 160 | inputs, tags = np.fromstring(db.Get(self.prefix + "_inputs_" + str(self.ptr)), dtype=np.float32).reshape((-1, 200, 155)), np.fromstring(db.Get(self.prefix + "_tags_" + str(self.ptr)), dtype=np.float32).reshape((-1, 2)) 161 | self.ptr = (self.ptr + 1) % self.max_minibatch 162 | return inputs, tags 163 | 164 | 165 | print "Start train dataset loading" 166 | 167 | train = TweetDataset(db, train_minibatches, "train") 168 | 169 | print "Start val dataset loading" 170 | 171 | val = TweetDataset(db, 1, "val") 172 | 173 | print "Finish dataset loading" 174 | -------------------------------------------------------------------------------- /first_edition_archive/archive/report.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- 2 | Processing file 'linear_interpolation.py' 3 | outputting to 'linear_interpolation_updated.py' 4 | -------------------------------------------------------------------------------- 5 | 6 | 'linear_interpolation.py' Line 40 7 | -------------------------------------------------------------------------------- 8 | 9 | Renamed function 'tf.initialize_variables' to 'tf.variables_initializer' 10 | 11 | Old: init_op = tf.initialize_variables(var_list_rand) 12 | ~~~~~~~~~~~~~~~~~~~~~~~ 13 | New: init_op = tf.variables_initializer(var_list_rand) 14 | ~~~~~~~~~~~~~~~~~~~~~~~~ 15 | 16 | 'linear_interpolation.py' Line 73 17 | -------------------------------------------------------------------------------- 18 | 19 | Renamed function 'tf.merge_all_summaries' to 'tf.summary.merge_all' 20 | 21 | Old: summary_op = tf.merge_all_summaries() 22 | ~~~~~~~~~~~~~~~~~~~~~~ 23 | New: summary_op = tf.summary.merge_all() 24 | ~~~~~~~~~~~~~~~~~~~~ 25 | 26 | 'linear_interpolation.py' Line 68 27 | -------------------------------------------------------------------------------- 28 | 29 | Renamed function 'tf.scalar_summary' to 'tf.summary.scalar' 30 | 31 | Old: tf.scalar_summary("interpolated_cost", cost_inter) 32 | ~~~~~~~~~~~~~~~~~ 33 | New: tf.summary.scalar("interpolated_cost", cost_inter) 34 | ~~~~~~~~~~~~~~~~~ 35 | 36 | 'linear_interpolation.py' Line 71 37 | -------------------------------------------------------------------------------- 38 | 39 | Renamed function 'tf.train.SummaryWriter' to 'tf.summary.FileWriter' 40 | 41 | Old: summary_writer = tf.train.SummaryWriter("linear_interp_logs/", 42 | ~~~~~~~~~~~~~~~~~~~~~~ 43 | New: summary_writer = tf.summary.FileWriter("linear_interp_logs/", 44 | ~~~~~~~~~~~~~~~~~~~~~ 45 | 46 | 47 | -------------------------------------------------------------------------------- /first_edition_archive/archive/requirements.txt: -------------------------------------------------------------------------------- 1 | appnope==0.1.0 2 | bleach==1.5.0 3 | cycler==0.10.0 4 | decorator==4.1.2 5 | Django==1.11.4 6 | h5py==2.7.0 7 | html5lib==0.9999999 8 | image==1.5.11 9 | ipython==6.1.0 10 | ipython-genutils==0.2.0 11 | jedi==0.10.2 12 | Markdown==2.6.9 13 | matplotlib==2.0.2 14 | numpy==1.13.1 15 | olefile==0.44 16 | pexpect==4.2.1 17 | pickleshare==0.7.4 18 | Pillow==4.2.1 19 | prompt-toolkit==1.0.15 20 | protobuf==3.4.0 21 | ptyprocess==0.5.2 22 | Pygments==2.2.0 23 | pyparsing==2.2.0 24 | python-dateutil==2.6.1 25 | pytz==2017.2 26 | simplegeneric==0.8.1 27 | six==1.10.0 28 | tensorflow==1.3.0 29 | tensorflow-tensorboard==0.1.4 30 | traitlets==4.3.2 31 | wcwidth==0.1.7 32 | Werkzeug==0.12.2 33 | -------------------------------------------------------------------------------- /first_edition_archive/archive/seq2seq/INTRO.md: -------------------------------------------------------------------------------- 1 | Sequence-to-sequence, or Seq2Seq, problems have become a mainstay of modern deep learning. These are problems where the input is a non-empty variable-length sequence of tokens or data points, and our output is similarly a non-empty variable-length sequence of tokens. Many interesting and relevant problems can be phrased as sequence-to-sequence problems. For example, the canonical example of a such a problem is machine translation. Here, we are tasked with designing a system that is able to translate phrases or sentences in one language to another automatically. In this case, the input sequence is a sequence of words in our source language, while the output sequence is a sequence of words in our target language. 2 | 3 | More generally, we can imagine different scenarios where we want to translate from one representation of data to another. For instance, part-of-speech tagging can be solved as a sequence-to-sequence problem, where the input sequence is a sequence of tokens, and the output sequence is the label for each token, such as a noun, verb, preposition, or something else. Other tasks can be considered as special cases of sequence-to-sequence tasks, such as sentiment analysis, where our output sequence is simply a single token denoting some metric of sentiment. 4 | 5 | However, sequence-to-sequence tasks are not restricted to language related tasks. Arithmetic operations can be considered sequence-to-sequence problems, where the input sequence can be an array of numbers and the output is their sum, for instance. Some simple algorithmic examples include inputting an array of integers, and outputting the sorted array. Even more complex problems, such as the traveling salesman problem, can be thought of sequence-to-sequence problems. In this case, the input is a list of cities and their coordinates on a map, and the output is the ordering of the cities that minimizes the total distance traveled if we were to visit each city in the specified order. Finally, we need not have explicitly sequential data in order for us to use the techniques that are used to solve sequence-to-sequence problems. As long as we process our data or input sequentially, it possible to use these techniques to tackle these problems. For example, we can process images by sequentially considering pixels or adjacent regions of the image, and then label smaller parts of the image. 6 | 7 | There are many more problems that can be cast, albeit sometimes inefficiently, as sequence-to-sequence problems. We encourage the reader to think of other problems that can be treated as sequence-to-sequence problems and to try using the techniques discussed in these following sections. 8 | 9 | With these problems in mind, it is now important to consider classes of models that would be useful in tackling these problems. Recurrent neural networks, such as LSTMs, are well equipped for handling sequential data, such as text or time-series data. They are amenable to learning from variable-length sequences in the input, output, or both, making them suitable to apply to any of the problems described above. Moreover, as we've seen, LSTMs are designed to handle longer-term dependencies between the inputs, and this is especially important for some of the problems described above. With these preliminaries in place, we can introduce specific architectures used by researchers to solve these problems. 10 | 11 | The most common approach in solving sequence-to-sequence network is to design an encoder-decoder network. With this, we use the first part of the network, or the encoder network, to compress the input sequence as a fixed-size embedding vector. We use the second part of the network, or the decoder network, to decode this embedding vector into the output sequence. A high-level diagram of this process can be seen in Figure X. 12 | -------------------------------------------------------------------------------- /first_edition_archive/archive/seq2seq/extract_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def main(): 4 | fname = "perplexity_data.txt" 5 | content = [] 6 | with open(fname) as f: 7 | content = f.readlines() 8 | 9 | steps, lr, st, perpl = [], [], [], [] 10 | for line in content: 11 | if "global step" in line: 12 | line_arr = line.split(" ") 13 | steps.append(int(line_arr[2])) 14 | lr.append(float(line_arr[5])) 15 | st.append(float(line_arr[7])) 16 | perpl.append(float(line_arr[9])) 17 | 18 | # for i in range(len(steps)): 19 | # print lr[i] 20 | # for i in range(len(steps)): 21 | # print st[i] 22 | for i in range(len(steps)): 23 | print perpl[i] 24 | 25 | main() 26 | 27 | -------------------------------------------------------------------------------- /first_edition_archive/archive/seq2seq/nmt_lr_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/seq2seq/nmt_lr_plot.png -------------------------------------------------------------------------------- /first_edition_archive/archive/seq2seq/nmt_perplexity_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/seq2seq/nmt_perplexity_plot.png -------------------------------------------------------------------------------- /first_edition_archive/archive/seq2seq/output.txt: -------------------------------------------------------------------------------- 1 | Preparing WMT data in /hdfs/mscog/t-subhup/seq2seq/data 2 | Extracting tar file /hdfs/mscog/t-subhup/seq2seq/data/training-giga-fren.tar 3 | -------------------------------------------------------------------------------- /first_edition_archive/archive/skipgram.py: -------------------------------------------------------------------------------- 1 | import input_word_data as data 2 | import numpy as np 3 | import tensorflow as tf 4 | from sklearn.manifold import TSNE 5 | import matplotlib.pyplot as plt 6 | 7 | # TRAINING PARAMETERS 8 | batch_size = 32 # Number of training examples per batch 9 | embedding_size = 128 # Dimension of embedding vectors 10 | skip_window = 5 # Window size for context to the left and right of target 11 | num_skips = 4 # How many times to reuse target to generate a label for context. 12 | batches_per_epoch = data.data_size*num_skips/batch_size # Number of batches per epoch of training 13 | training_epochs = 5 # Number of epochs to utilize for training 14 | neg_size = 64 # Number of negative samples to use for NCE 15 | display_step = 2000 # Frequency with which to print statistics 16 | val_step = 10000 # Frequency with which to perform validation 17 | learning_rate = 0.1 # Learning rate for SGD 18 | 19 | print "Epochs: %d, Batches per epoch: %d, Examples per batch: %d" % (training_epochs, batches_per_epoch, batch_size) 20 | 21 | # NEAREST NEIGHBORS VALIDATION PARAMETERS 22 | val_size = 20 23 | val_dist_span = 500 24 | val_examples = np.random.choice(val_dist_span, val_size, replace=False) 25 | top_match = 8 26 | plot_num = 500 27 | 28 | 29 | def embedding_layer(x, embedding_shape): 30 | with tf.variable_scope("embedding"): 31 | embedding_init = tf.random_uniform(embedding_shape, -1.0, 1.0) 32 | embedding_matrix = tf.get_variable("E", initializer=embedding_init) 33 | return tf.nn.embedding_lookup(embedding_matrix, x), embedding_matrix 34 | 35 | def noise_contrastive_loss(embedding_lookup, weight_shape, bias_shape, y): 36 | with tf.variable_scope("nce"): 37 | nce_weight_init = tf.truncated_normal(weight_shape, stddev=1.0/(weight_shape[1])**0.5) 38 | nce_bias_init = tf.zeros(bias_shape) 39 | nce_W = tf.get_variable("W", initializer=nce_weight_init) 40 | nce_b = tf.get_variable("b", initializer=nce_bias_init) 41 | 42 | total_loss = tf.nn.nce_loss(nce_W, nce_b, embedding_lookup, y, neg_size, data.vocabulary_size) 43 | return tf.reduce_mean(total_loss) 44 | 45 | def training(cost, global_step): 46 | with tf.variable_scope("training"): 47 | summary_op = tf.scalar_summary("cost", cost) 48 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 49 | train_op = optimizer.minimize(cost, global_step=global_step) 50 | return train_op, summary_op 51 | 52 | def validation(embedding_matrix, x_val): 53 | norm = tf.reduce_sum(embedding_matrix**2, 1, keep_dims=True)**0.5 54 | normalized = embedding_matrix/norm 55 | val_embeddings = tf.nn.embedding_lookup(normalized, x_val) 56 | cosine_similarity = tf.matmul(val_embeddings, normalized, transpose_b=True) 57 | return normalized, cosine_similarity 58 | 59 | if __name__ == '__main__': 60 | 61 | with tf.Graph().as_default(): 62 | 63 | with tf.variable_scope("skipgram_model"): 64 | 65 | x = tf.placeholder(tf.int32, shape=[batch_size]) 66 | y = tf.placeholder(tf.int32, [batch_size, 1]) 67 | val = tf.constant(val_examples, dtype=tf.int32) 68 | global_step = tf.Variable(0, name='global_step', trainable=False) 69 | 70 | e_lookup, e_matrix = embedding_layer(x, [data.vocabulary_size, embedding_size]) 71 | 72 | cost = noise_contrastive_loss(e_lookup, [data.vocabulary_size, embedding_size], [data.vocabulary_size], y) 73 | 74 | train_op, summary_op = training(cost, global_step) 75 | 76 | val_op = validation(e_matrix, val) 77 | 78 | sess = tf.Session() 79 | 80 | train_writer = tf.train.SummaryWriter("skipgram_logs/", graph=sess.graph) 81 | 82 | init_op = tf.initialize_all_variables() 83 | 84 | sess.run(init_op) 85 | 86 | step = 0 87 | avg_cost = 0 88 | 89 | for epoch in xrange(training_epochs): 90 | for minibatch in xrange(batches_per_epoch): 91 | 92 | step +=1 93 | 94 | minibatch_x, minibatch_y = data.generate_batch(batch_size, num_skips, skip_window) 95 | feed_dict = {x : minibatch_x, y : minibatch_y} 96 | 97 | _, new_cost, train_summary = sess.run([train_op, cost, summary_op], feed_dict=feed_dict) 98 | train_writer.add_summary(train_summary, sess.run(global_step)) 99 | # Compute average loss 100 | avg_cost += new_cost/display_step 101 | 102 | if step % display_step == 0: 103 | print "Elapsed:", str(step), "batches. Cost =", "{:.9f}".format(avg_cost) 104 | avg_cost = 0 105 | 106 | if step % val_step == 0: 107 | _, similarity = sess.run(val_op) 108 | for i in xrange(val_size): 109 | val_word = data.reverse_dictionary[val_examples[i]] 110 | neighbors = (-similarity[i, :]).argsort()[1:top_match+1] 111 | print_str = "Nearest neighbor of %s:" % val_word 112 | for k in xrange(top_match): 113 | print_str += " %s," % data.reverse_dictionary[neighbors[k]] 114 | print print_str[:-1] 115 | 116 | final_embeddings, _ = sess.run(val_op) 117 | 118 | 119 | tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) 120 | plot_embeddings = np.asfarray(final_embeddings[:plot_num,:], dtype='float') 121 | low_dim_embs = tsne.fit_transform(plot_embeddings) 122 | labels = [reverse_dictionary[i] for i in xrange(plot_only)] 123 | data.plot_with_labels(low_dim_embs, labels) 124 | -------------------------------------------------------------------------------- /first_edition_archive/archive/text8.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/text8.zip -------------------------------------------------------------------------------- /first_edition_archive/archive/tsne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/tsne.png -------------------------------------------------------------------------------- /first_edition_archive/archive/twitter_lstm.py: -------------------------------------------------------------------------------- 1 | import time 2 | import uuid 3 | import os 4 | import numpy as np 5 | import tensorflow as tf 6 | from tensorflow.python import control_flow_ops 7 | from tensorflow.python.ops.rnn import dynamic_rnn 8 | from tensorflow.python.ops.rnn_cell import MultiRNNCell 9 | from lstm import LSTMCell, BNLSTMCell, orthogonal_initializer 10 | import read_tweet_data as data 11 | from sklearn.metrics import confusion_matrix 12 | 13 | batch_size = 256 14 | hidden_size = 32 15 | 16 | def layer_batch_norm(x, n_out, phase_train): 17 | beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32) 18 | gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32) 19 | 20 | beta = tf.get_variable("beta", [n_out], initializer=beta_init) 21 | gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init) 22 | 23 | batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') 24 | ema = tf.train.ExponentialMovingAverage(decay=0.9) 25 | ema_apply_op = ema.apply([batch_mean, batch_var]) 26 | ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) 27 | def mean_var_with_update(): 28 | with tf.control_dependencies([ema_apply_op]): 29 | return tf.identity(batch_mean), tf.identity(batch_var) 30 | mean, var = control_flow_ops.cond(phase_train, 31 | mean_var_with_update, 32 | lambda: (ema_mean, ema_var)) 33 | 34 | reshaped_x = tf.reshape(x, [-1, 1, 1, n_out]) 35 | normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var, 36 | beta, gamma, 1e-3, True) 37 | return tf.reshape(normed, [-1, n_out]) 38 | 39 | def layer(input, weight_shape, bias_shape, phase_train): 40 | weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5) 41 | bias_init = tf.constant_initializer(value=0) 42 | W = tf.get_variable("W", weight_shape, 43 | initializer=weight_init) 44 | b = tf.get_variable("b", bias_shape, 45 | initializer=bias_init) 46 | logits = tf.matmul(input, W) + b 47 | return tf.nn.sigmoid(layer_batch_norm(logits, weight_shape[1], phase_train)) 48 | 49 | with tf.device('/gpu:0'): 50 | x_inp = tf.placeholder(tf.float32, [None, 200, 155]) 51 | training = tf.placeholder(tf.bool) 52 | 53 | lstm = BNLSTMCell(hidden_size, training) 54 | 55 | #c, h 56 | initialState = ( 57 | tf.random_normal([batch_size, hidden_size], stddev=0.1), 58 | tf.random_normal([batch_size, hidden_size], stddev=0.1)) 59 | 60 | outputs, state = dynamic_rnn(lstm, x_inp, initial_state=initialState) 61 | 62 | _, final_hidden = state 63 | 64 | intermediary = layer(final_hidden, [hidden_size, 2], [2], training) 65 | 66 | y = tf.nn.softmax(intermediary) 67 | 68 | y_ = tf.placeholder(tf.float32, [None, 2]) 69 | 70 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) 71 | 72 | optimizer = tf.train.AdamOptimizer() 73 | gvs = optimizer.compute_gradients(cross_entropy) 74 | capped_gvs = [(None if grad is None else tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs] 75 | train_step = optimizer.apply_gradients(capped_gvs) 76 | 77 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) 78 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 79 | 80 | # Summaries 81 | a_summary = tf.scalar_summary("accuracy", accuracy) 82 | tr_acc = tf.scalar_summary("train_accuracy", accuracy) 83 | xe_summary = tf.scalar_summary("xe_loss", cross_entropy) 84 | val_summary_op = tf.scalar_summary("val_loss", cross_entropy) 85 | for (grad, var), (capped_grad, _) in zip(gvs, capped_gvs): 86 | if grad is not None: 87 | tf.histogram_summary('grad/{}'.format(var.name), capped_grad) 88 | tf.histogram_summary('capped_fraction/{}'.format(var.name), 89 | tf.nn.zero_fraction(grad - capped_grad)) 90 | tf.histogram_summary('weight/{}'.format(var.name), var) 91 | 92 | init = tf.initialize_all_variables() 93 | 94 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) 95 | sess.run(init) 96 | 97 | logdir = 'airline_logs/' 98 | print('logging to ' + logdir) 99 | writer = tf.train.SummaryWriter(logdir, sess.graph) 100 | 101 | current_time = time.time() 102 | print("Using population statistics (training: False) at test time gives worse results than batch statistics") 103 | 104 | for i in range(100000): 105 | t_batch_xs, t_batch_ys = data.train.minibatch() 106 | loss, xe_str, _, train_preds, train_acc = sess.run([cross_entropy, xe_summary, train_step, y, tr_acc], feed_dict={x_inp: t_batch_xs, y_: t_batch_ys, training: True}) 107 | step_time = time.time() - current_time 108 | writer.add_summary(xe_str, i) 109 | writer.add_summary(train_acc, i) 110 | current_time = time.time() 111 | if i % 100 == 0: 112 | batch_xs, batch_ys = data.val.minibatch() 113 | a_str, val_summary, preds = sess.run([a_summary, val_summary_op, y], feed_dict={x_inp: batch_xs, y_: batch_ys, training: False}) 114 | 115 | 116 | print train_preds[:10], t_batch_ys[:10] 117 | 118 | 119 | cnf_matrix = confusion_matrix(np.argmax(train_preds, axis=1), np.argmax(t_batch_ys, axis=1)) 120 | print "Traning Confusion Matrix:", cnf_matrix.tolist() 121 | 122 | 123 | print preds[:10], batch_ys[:10] 124 | 125 | 126 | cnf_matrix = confusion_matrix(np.argmax(preds, axis=1), np.argmax(batch_ys, axis=1)) 127 | print "Validation Confusion Matrix:", cnf_matrix.tolist() 128 | writer.add_summary(a_str, i) 129 | writer.add_summary(val_summary, i) 130 | print(loss, step_time) 131 | -------------------------------------------------------------------------------- /first_edition_archive/archive/word2vec_fast.py: -------------------------------------------------------------------------------- 1 | import dbm, os 2 | import cPickle as pickle 3 | from gensim.models import Word2Vec 4 | import numpy as np 5 | 6 | def save_model(model, directory): 7 | model.init_sims() # making sure syn0norm is initialised 8 | if not os.path.exists(directory): 9 | os.makedirs(directory) 10 | # Saving indexes as DBM'ed dictionary 11 | word_to_index = dbm.open(os.path.join(directory, 'word_to_index'), 'n') 12 | index_to_word = dbm.open(os.path.join(directory, 'index_to_word'), 'n') 13 | for key in model.vocab.keys(): 14 | word_to_index[key.encode('utf8')] = pickle.dumps(model.vocab[key]) 15 | index_to_word[str(model.vocab[key].index)] = key.encode('utf8') 16 | word_to_index.close() 17 | index_to_word.close() 18 | # Memory-mapping normalised word vectors 19 | syn0norm_m = np.memmap(os.path.join(directory, 'syn0norm.dat'), dtype='float32', mode='w+', shape=model.syn0norm.shape) 20 | syn0norm_m[:] = model.syn0norm[:] 21 | syn0norm_m.flush() 22 | # And pickling model object, witout data 23 | vocab, syn0norm, syn0, index2word = model.vocab, model.syn0norm, model.syn0, model.index2word 24 | model.vocab, model.syn0norm, model.syn0, model.index2word = None, None, None, None 25 | model_f = open(os.path.join(directory, 'model.pickle'), 'w') 26 | pickle.dump(model, model_f) 27 | model_f.close() 28 | model.vocab, model.syn0norm, model.syn0, model.index2word = vocab, syn0norm, syn0, index2word 29 | 30 | def load_model(directory): 31 | model = pickle.load(open(os.path.join(directory, 'model.pickle'))) 32 | model.vocab = DBMPickledDict(os.path.join(directory, 'word_to_index')) 33 | model.index2word = DBMPickledDict(os.path.join(directory, 'index_to_word')) 34 | model.syn0norm = np.memmap(os.path.join(directory, 'syn0norm.dat'), dtype='float32', mode='r', shape=(len(model.vocab.keys()), model.layer1_size)) 35 | model.syn0 = model.syn0norm 36 | return model 37 | 38 | 39 | class DBMPickledDict(dict): 40 | def __init__(self, dbm_file): 41 | self._dbm = dbm.open(dbm_file, 'r') 42 | def __setitem__(self, key, value): 43 | raise Exception("Read-only vocabulary") 44 | def __delitem__(self, key): 45 | raise Exception("Read-only vocabulary") 46 | def __iter__(self): 47 | return iter(self._dbm.keys()) 48 | def __len__(self): 49 | return len(self._dbm) 50 | def __contains__(self, key): 51 | if isinstance(key, int): 52 | key = str(key) 53 | return key in self._dbm 54 | def __getitem__(self, key): 55 | if isinstance(key, int): 56 | key = str(key) 57 | return self._dbm[key] 58 | else: 59 | return pickle.loads(self._dbm[key]) 60 | def keys(self): 61 | return self._dbm.keys() 62 | def values(self): 63 | return [self._dbm[key] for key in self._dbm.keys()] 64 | def itervalues(self): 65 | return (self._dbm[key] for key in self._dbm.keys()) 66 | -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/__init__.py -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter3/__init__.py -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter3/logistic_regression_updated.py: -------------------------------------------------------------------------------- 1 | from fdl_examples.datatools import input_data 2 | mnist = input_data.read_data_sets("../../data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import time, shutil, os 6 | from fdl_examples.chapter3.multilayer_perceptron import inference, loss 7 | 8 | # Parameters 9 | learning_rate = 0.01 10 | training_epochs = 60 11 | batch_size = 100 12 | display_step = 1 13 | 14 | def inference(x): 15 | init = tf.constant_initializer(value=0) 16 | W = tf.get_variable("W", [784, 10], 17 | initializer=init) 18 | b = tf.get_variable("b", [10], 19 | initializer=init) 20 | output = tf.nn.softmax(tf.matmul(x, W) + b) 21 | 22 | w_hist = tf.summary.histogram("weights", W) 23 | b_hist = tf.summary.histogram("biases", b) 24 | y_hist = tf.summary.histogram("output", output) 25 | 26 | return output 27 | 28 | def loss(output, y): 29 | dot_product = y * tf.log(output) 30 | 31 | # Reduction along axis 0 collapses each column into a single 32 | # value, whereas reduction along axis 1 collapses each row 33 | # into a single value. In general, reduction along axis i 34 | # collapses the ith dimension of a tensor to size 1. 35 | xentropy = -tf.reduce_sum(dot_product, axis=1) 36 | 37 | loss = tf.reduce_mean(xentropy) 38 | 39 | return loss 40 | 41 | def training(cost, global_step): 42 | 43 | tf.summary.scalar("cost", cost) 44 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 45 | train_op = optimizer.minimize(cost, global_step=global_step) 46 | 47 | return train_op 48 | 49 | 50 | def evaluate(output, y): 51 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 52 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 53 | 54 | tf.summary.scalar("validation error", (1.0 - accuracy)) 55 | 56 | return accuracy 57 | 58 | if __name__ == '__main__': 59 | if os.path.exists("logistic_logs/"): 60 | shutil.rmtree("logistic_logs/") 61 | 62 | with tf.Graph().as_default(): 63 | 64 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 65 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 66 | 67 | 68 | output = inference(x) 69 | 70 | cost = loss(output, y) 71 | 72 | global_step = tf.Variable(0, name='global_step', trainable=False) 73 | 74 | train_op = training(cost, global_step) 75 | 76 | eval_op = evaluate(output, y) 77 | 78 | summary_op = tf.summary.merge_all() 79 | 80 | saver = tf.train.Saver() 81 | 82 | sess = tf.Session() 83 | 84 | summary_writer = tf.summary.FileWriter("logistic_logs/", 85 | graph_def=sess.graph_def) 86 | 87 | 88 | init_op = tf.global_variables_initializer() 89 | 90 | sess.run(init_op) 91 | 92 | 93 | # Training cycle 94 | for epoch in range(training_epochs): 95 | 96 | avg_cost = 0. 97 | total_batch = int(mnist.train.num_examples/batch_size) 98 | # Loop over all batches 99 | for i in range(total_batch): 100 | minibatch_x, minibatch_y = mnist.train.next_batch(batch_size) 101 | # Fit training using batch data 102 | sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y}) 103 | # Compute average loss 104 | avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch 105 | # Display logs per epoch step 106 | if epoch % display_step == 0: 107 | print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)) 108 | 109 | accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels}) 110 | 111 | print("Validation Error:", (1 - accuracy)) 112 | 113 | summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y}) 114 | summary_writer.add_summary(summary_str, sess.run(global_step)) 115 | 116 | saver.save(sess, "logistic_logs/model-checkpoint", global_step=global_step) 117 | 118 | 119 | print("Optimization Finished!") 120 | 121 | 122 | accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels}) 123 | 124 | print("Test Accuracy:", accuracy) 125 | -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter3/multilayer_perceptron_updated.py: -------------------------------------------------------------------------------- 1 | from fdl_examples.datatools import input_data 2 | mnist = input_data.read_data_sets("../../data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import time, shutil, os 6 | 7 | # Architecture 8 | n_hidden_1 = 256 9 | n_hidden_2 = 256 10 | 11 | # Parameters 12 | learning_rate = 0.01 13 | training_epochs = 1000 14 | batch_size = 100 15 | display_step = 1 16 | 17 | def layer(input, weight_shape, bias_shape): 18 | weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5) 19 | bias_init = tf.constant_initializer(value=0) 20 | W = tf.get_variable("W", weight_shape, 21 | initializer=weight_init) 22 | b = tf.get_variable("b", bias_shape, 23 | initializer=bias_init) 24 | return tf.nn.relu(tf.matmul(input, W) + b) 25 | 26 | def inference(x): 27 | with tf.variable_scope("hidden_1"): 28 | hidden_1 = layer(x, [784, n_hidden_1], [n_hidden_1]) 29 | 30 | with tf.variable_scope("hidden_2"): 31 | hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2]) 32 | 33 | with tf.variable_scope("output"): 34 | output = layer(hidden_2, [n_hidden_2, 10], [10]) 35 | 36 | return output 37 | 38 | def loss(output, y): 39 | xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y) 40 | loss = tf.reduce_mean(xentropy) 41 | return loss 42 | 43 | def training(cost, global_step): 44 | tf.summary.scalar("cost", cost) 45 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 46 | train_op = optimizer.minimize(cost, global_step=global_step) 47 | return train_op 48 | 49 | 50 | def evaluate(output, y): 51 | correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)) 52 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 53 | tf.summary.scalar("validation", accuracy) 54 | return accuracy 55 | 56 | if __name__ == '__main__': 57 | 58 | if os.path.exists("mlp_logs/"): 59 | shutil.rmtree("mlp_logs/") 60 | 61 | with tf.Graph().as_default(): 62 | 63 | with tf.variable_scope("mlp_model"): 64 | 65 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 66 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 67 | 68 | 69 | output = inference(x) 70 | 71 | cost = loss(output, y) 72 | 73 | global_step = tf.Variable(0, name='global_step', trainable=False) 74 | 75 | train_op = training(cost, global_step) 76 | 77 | eval_op = evaluate(output, y) 78 | 79 | summary_op = tf.summary.merge_all() 80 | 81 | saver = tf.train.Saver() 82 | 83 | sess = tf.Session() 84 | 85 | summary_writer = tf.summary.FileWriter("mlp_logs/", 86 | graph_def=sess.graph_def) 87 | 88 | 89 | init_op = tf.global_variables_initializer() 90 | 91 | sess.run(init_op) 92 | 93 | # saver.restore(sess, "mlp_logs/model-checkpoint-66000") 94 | 95 | 96 | # Training cycle 97 | for epoch in range(training_epochs): 98 | 99 | avg_cost = 0. 100 | total_batch = int(mnist.train.num_examples/batch_size) 101 | # Loop over all batches 102 | for i in range(total_batch): 103 | minibatch_x, minibatch_y = mnist.train.next_batch(batch_size) 104 | # Fit training using batch data 105 | sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y}) 106 | # Compute average loss 107 | avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch 108 | # Display logs per epoch step 109 | if epoch % display_step == 0: 110 | print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)) 111 | 112 | accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels}) 113 | 114 | print("Validation Error:", (1 - accuracy)) 115 | 116 | summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y}) 117 | summary_writer.add_summary(summary_str, sess.run(global_step)) 118 | 119 | saver.save(sess, "mlp_logs/model-checkpoint", global_step=global_step) 120 | 121 | 122 | print("Optimization Finished!") 123 | 124 | 125 | accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels}) 126 | 127 | print("Test Accuracy:", accuracy) 128 | -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/__init__.py -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model-checkpoint-550000" 2 | all_model_checkpoint_paths: "model-checkpoint-547800" 3 | all_model_checkpoint_paths: "model-checkpoint-548350" 4 | all_model_checkpoint_paths: "model-checkpoint-548900" 5 | all_model_checkpoint_paths: "model-checkpoint-549450" 6 | all_model_checkpoint_paths: "model-checkpoint-550000" 7 | -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/events.out.tfevents.1503341411.Nikhils-MacBook-Pro.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/events.out.tfevents.1503341411.Nikhils-MacBook-Pro.local -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.data-00000-of-00001 -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.index -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.meta -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.data-00000-of-00001 -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.index -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.meta -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.data-00000-of-00001 -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.index -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.meta -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.data-00000-of-00001 -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.index -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.meta -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.data-00000-of-00001 -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.index -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.meta -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter4/linear_interpolation_updated.py: -------------------------------------------------------------------------------- 1 | from fdl_examples.datatools import input_data 2 | mnist = input_data.read_data_sets("../../data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from fdl_examples.chapter3.multilayer_perceptron_updated import inference, loss 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | sess = tf.Session() 11 | 12 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784 13 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes 14 | 15 | 16 | saver = tf.train.import_meta_graph('frozen_mlp_checkpoint/model-checkpoint-547800.meta') 17 | saver.restore(sess, 'frozen_mlp_checkpoint/model-checkpoint-547800') 18 | 19 | var_list_opt = [None, None, None, None, None, None] 20 | name_2_index = { 21 | "mlp_model/hidden_1/W:0" : 0, 22 | "mlp_model/hidden_1/b:0" : 1, 23 | "mlp_model/hidden_2/W:0" : 2, 24 | "mlp_model/hidden_2/b:0" : 3, 25 | "mlp_model/output/W:0" : 4, 26 | "mlp_model/output/b:0" : 5 27 | } 28 | 29 | for x in tf.trainable_variables(): 30 | if x.name in name_2_index: 31 | index = name_2_index[x.name] 32 | var_list_opt[index] = x 33 | 34 | 35 | 36 | with tf.variable_scope("mlp_init") as scope: 37 | 38 | output_rand = inference(x) 39 | cost_rand = loss(output_rand, y) 40 | 41 | scope.reuse_variables() 42 | 43 | var_list_rand = ["hidden_1/W", "hidden_1/b", "hidden_2/W", "hidden_2/b", "output/W", "output/b"] 44 | var_list_rand = [tf.get_variable(v) for v in var_list_rand] 45 | 46 | init_op = tf.variables_initializer(var_list_rand) 47 | 48 | sess.run(init_op) 49 | 50 | 51 | feed_dict = { 52 | x: mnist.test.images, 53 | y: mnist.test.labels, 54 | } 55 | 56 | print(sess.run([cost_opt, cost_rand], feed_dict=feed_dict)) 57 | 58 | with tf.variable_scope("mlp_inter") as scope: 59 | 60 | alpha = tf.placeholder("float", [1, 1]) 61 | 62 | h1_W_inter = var_list_opt[0] * (1 - alpha) + var_list_rand[0] * (alpha) 63 | h1_b_inter = var_list_opt[1] * (1 - alpha) + var_list_rand[1] * (alpha) 64 | h2_W_inter = var_list_opt[2] * (1 - alpha) + var_list_rand[2] * (alpha) 65 | h2_b_inter = var_list_opt[3] * (1 - alpha) + var_list_rand[3] * (alpha) 66 | o_W_inter = var_list_opt[4] * (1 - alpha) + var_list_rand[4] * (alpha) 67 | o_b_inter = var_list_opt[5] * (1 - alpha) + var_list_rand[5] * (alpha) 68 | 69 | h1_inter = tf.nn.relu(tf.matmul(x, h1_W_inter) + h1_b_inter) 70 | h2_inter = tf.nn.relu(tf.matmul(h1_inter, h2_W_inter) + h2_b_inter) 71 | o_inter = tf.nn.relu(tf.matmul(h2_inter, o_W_inter) + o_b_inter) 72 | 73 | cost_inter = loss(o_inter, y) 74 | tf.summary.scalar("interpolated_cost", cost_inter) 75 | 76 | 77 | summary_writer = tf.summary.FileWriter("linear_interp_logs/", 78 | graph_def=sess.graph_def) 79 | summary_op = tf.summary.merge_all() 80 | results = [] 81 | for a in np.arange(-2, 2, 0.01): 82 | feed_dict = { 83 | x: mnist.test.images, 84 | y: mnist.test.labels, 85 | alpha: [[a]], 86 | } 87 | 88 | cost, summary_str = sess.run([cost_inter, summary_op], feed_dict=feed_dict) 89 | summary_writer.add_summary(summary_str, (a + 2)/0.01) 90 | results.append(cost) 91 | 92 | plt.plot(np.arange(-2, 2, 0.01), results, 'ro') 93 | plt.ylabel('Incurred Error') 94 | plt.xlabel('Alpha') 95 | plt.show() 96 | 97 | 98 | -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/chapter9/dqn_plot_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter9/dqn_plot_final.png -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/datatools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/datatools/__init__.py -------------------------------------------------------------------------------- /first_edition_archive/fdl_examples/datatools/input_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Functions for downloading and reading MNIST data.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | import gzip 20 | import os 21 | import tensorflow.python.platform 22 | import numpy 23 | from six.moves import urllib 24 | from six.moves import xrange # pylint: disable=redefined-builtin 25 | import tensorflow as tf 26 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' 27 | def maybe_download(filename, work_directory): 28 | """Download the data from Yann's website, unless it's already here.""" 29 | if not os.path.exists(work_directory): 30 | os.mkdir(work_directory) 31 | filepath = os.path.join(work_directory, filename) 32 | if not os.path.exists(filepath): 33 | filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath) 34 | statinfo = os.stat(filepath) 35 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 36 | return filepath 37 | def _read32(bytestream): 38 | dt = numpy.dtype(numpy.uint32).newbyteorder('>') 39 | return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] 40 | def extract_images(filename): 41 | """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" 42 | print('Extracting', filename) 43 | with gzip.open(filename) as bytestream: 44 | magic = _read32(bytestream) 45 | if magic != 2051: 46 | raise ValueError( 47 | 'Invalid magic number %d in MNIST image file: %s' % 48 | (magic, filename)) 49 | num_images = _read32(bytestream) 50 | rows = _read32(bytestream) 51 | cols = _read32(bytestream) 52 | buf = bytestream.read(rows * cols * num_images) 53 | data = numpy.frombuffer(buf, dtype=numpy.uint8) 54 | data = data.reshape(num_images, rows, cols, 1) 55 | return data 56 | def dense_to_one_hot(labels_dense, num_classes=10): 57 | """Convert class labels from scalars to one-hot vectors.""" 58 | num_labels = labels_dense.shape[0] 59 | index_offset = numpy.arange(num_labels) * num_classes 60 | labels_one_hot = numpy.zeros((num_labels, num_classes)) 61 | labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 62 | return labels_one_hot 63 | def extract_labels(filename, one_hot=False): 64 | """Extract the labels into a 1D uint8 numpy array [index].""" 65 | print('Extracting', filename) 66 | with gzip.open(filename) as bytestream: 67 | magic = _read32(bytestream) 68 | if magic != 2049: 69 | raise ValueError( 70 | 'Invalid magic number %d in MNIST label file: %s' % 71 | (magic, filename)) 72 | num_items = _read32(bytestream) 73 | buf = bytestream.read(num_items) 74 | labels = numpy.frombuffer(buf, dtype=numpy.uint8) 75 | if one_hot: 76 | return dense_to_one_hot(labels) 77 | return labels 78 | class DataSet(object): 79 | def __init__(self, images, labels, fake_data=False, one_hot=False, 80 | dtype=tf.float32): 81 | """Construct a DataSet. 82 | one_hot arg is used only if fake_data is true. `dtype` can be either 83 | `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into 84 | `[0, 1]`. 85 | """ 86 | dtype = tf.as_dtype(dtype).base_dtype 87 | if dtype not in (tf.uint8, tf.float32): 88 | raise TypeError('Invalid image dtype %r, expected uint8 or float32' % 89 | dtype) 90 | if fake_data: 91 | self._num_examples = 10000 92 | self.one_hot = one_hot 93 | else: 94 | assert images.shape[0] == labels.shape[0], ( 95 | 'images.shape: %s labels.shape: %s' % (images.shape, 96 | labels.shape)) 97 | self._num_examples = images.shape[0] 98 | # Convert shape from [num examples, rows, columns, depth] 99 | # to [num examples, rows*columns] (assuming depth == 1) 100 | assert images.shape[3] == 1 101 | images = images.reshape(images.shape[0], 102 | images.shape[1] * images.shape[2]) 103 | if dtype == tf.float32: 104 | # Convert from [0, 255] -> [0.0, 1.0]. 105 | images = images.astype(numpy.float32) 106 | images = numpy.multiply(images, 1.0 / 255.0) 107 | self._images = images 108 | self._labels = labels 109 | self._epochs_completed = 0 110 | self._index_in_epoch = 0 111 | @property 112 | def images(self): 113 | return self._images 114 | @property 115 | def labels(self): 116 | return self._labels 117 | @property 118 | def num_examples(self): 119 | return self._num_examples 120 | @property 121 | def epochs_completed(self): 122 | return self._epochs_completed 123 | def next_batch(self, batch_size, fake_data=False): 124 | """Return the next `batch_size` examples from this data set.""" 125 | if fake_data: 126 | fake_image = [1] * 784 127 | if self.one_hot: 128 | fake_label = [1] + [0] * 9 129 | else: 130 | fake_label = 0 131 | return [fake_image for _ in xrange(batch_size)], [ 132 | fake_label for _ in xrange(batch_size)] 133 | start = self._index_in_epoch 134 | self._index_in_epoch += batch_size 135 | if self._index_in_epoch > self._num_examples: 136 | # Finished epoch 137 | self._epochs_completed += 1 138 | # Shuffle the data 139 | perm = numpy.arange(self._num_examples) 140 | numpy.random.shuffle(perm) 141 | self._images = self._images[perm] 142 | self._labels = self._labels[perm] 143 | # Start next epoch 144 | start = 0 145 | self._index_in_epoch = batch_size 146 | assert batch_size <= self._num_examples 147 | end = self._index_in_epoch 148 | return self._images[start:end], self._labels[start:end] 149 | def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32): 150 | class DataSets(object): 151 | pass 152 | data_sets = DataSets() 153 | if fake_data: 154 | def fake(): 155 | return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) 156 | data_sets.train = fake() 157 | data_sets.validation = fake() 158 | data_sets.test = fake() 159 | return data_sets 160 | TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' 161 | TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' 162 | TEST_IMAGES = 't10k-images-idx3-ubyte.gz' 163 | TEST_LABELS = 't10k-labels-idx1-ubyte.gz' 164 | VALIDATION_SIZE = 5000 165 | local_file = maybe_download(TRAIN_IMAGES, train_dir) 166 | train_images = extract_images(local_file) 167 | local_file = maybe_download(TRAIN_LABELS, train_dir) 168 | train_labels = extract_labels(local_file, one_hot=one_hot) 169 | local_file = maybe_download(TEST_IMAGES, train_dir) 170 | test_images = extract_images(local_file) 171 | local_file = maybe_download(TEST_LABELS, train_dir) 172 | test_labels = extract_labels(local_file, one_hot=one_hot) 173 | validation_images = train_images[:VALIDATION_SIZE] 174 | validation_labels = train_labels[:VALIDATION_SIZE] 175 | train_images = train_images[VALIDATION_SIZE:] 176 | train_labels = train_labels[VALIDATION_SIZE:] 177 | data_sets.train = DataSet(train_images, train_labels, dtype=dtype) 178 | data_sets.validation = DataSet(validation_images, validation_labels, 179 | dtype=dtype) 180 | data_sets.test = DataSet(test_images, test_labels, dtype=dtype) 181 | return data_sets --------------------------------------------------------------------------------