├── .gitignore
├── README.md
├── ch05_implementing_nn_pytorch
    ├── Ch05_PyTorch.ipynb
    └── data
    │   └── train
    │       ├── img_0.jpg
    │       ├── img_1.jpg
    │       ├── img_2.jpg
    │       ├── img_3.jpg
    │       └── labels.npy
├── ch06_beyond_gradient_descent
    └── Ch06_Beyond_GD.ipynb
├── ch07_cnn
    └── Ch07_CNN.ipynb
├── ch08_embedding_representation
    └── Ch08_Embedding.ipynb
├── ch09_models_for_sequence_analysis
    ├── Ch09_01_POS_Tagger.ipynb
    ├── Ch09_02_RNN.ipynb
    ├── Ch09_03_LSTM_Sentiment.ipynb
    └── Ch09_04_Dissecting_NTN.ipynb
├── ch10_generative_models
    └── Ch10_Generative.ipynb
├── ch12_memory_augmented_nn
    └── Ch12_MemoryAugmented.ipynb
├── ch13_deep_reinforcement_learning
    ├── Ch13_01_RL_Pole_Cart.ipynb
    └── Ch13_02_RL_DQN_Breakout.ipynb
└── first_edition_archive
    ├── archive
        ├── README.md
        ├── ast_edits.py
        ├── autoencoder_mnist.py
        ├── autoencoder_tsne.py
        ├── autoencoder_vs_pca.py
        ├── cifar10_input.py
        ├── convnet_cifar.py
        ├── convnet_cifar_bn.py
        ├── convnet_mnist.py
        ├── denoising_autoencoder_mnist.py
        ├── dnc
        │   ├── mem_ops.py
        │   ├── preprocess.py
        │   ├── test_babi.py
        │   └── train_babi.py
        ├── download_tweets.py
        ├── feed_forward_network-[THEANO]
        │   ├── feed_forward_network.py
        │   ├── hidden_layer.py
        │   ├── mnist_feed_forward_sgd.py
        │   └── softmax_layer.py
        ├── feedforward_pos.py
        ├── imdb_bn_lstm.py
        ├── imdb_lstm.py
        ├── imdb_ohlstm.py
        ├── input_data.py
        ├── input_word_data.py
        ├── linear_interpolation.py
        ├── logistic_network-[THEANO]
        │   ├── logistic_network.py
        │   └── mnist_logistic_sgd.py
        ├── logistic_regression.py
        ├── logistic_regression.py.ipynb
        ├── lstm.py
        ├── multilayer_perceptron.ipynb
        ├── multilayer_perceptron.py
        ├── neural_style
        │   ├── main.py
        │   ├── stylize.py
        │   └── vgg.py
        ├── one_layer_autoencoder.py
        ├── optimzer_mlp.py
        ├── random_walk.py
        ├── read_16M_tweet_data.py
        ├── read_imdb_data.py
        ├── read_pos_data.py
        ├── read_tweet_data.py
        ├── report.txt
        ├── requirements.txt
        ├── seq2seq
        │   ├── INFO.md
        │   ├── INTRO.md
        │   ├── data_utils.py
        │   ├── extract_data.py
        │   ├── nmt_lr_plot.png
        │   ├── nmt_perplexity_plot.png
        │   ├── old_seq2seq.py
        │   ├── output.txt
        │   ├── perplexity_data.txt
        │   ├── seq2seq.py
        │   ├── seq2seq_model.py
        │   ├── tmp_seq2seq_model.py
        │   └── translate.py
        ├── skipgram.py
        ├── text8.zip
        ├── tf_upgrade.py
        ├── tsne.png
        ├── twitter_lstm.py
        └── word2vec_fast.py
    └── fdl_examples
        ├── __init__.py
        ├── chapter3
            ├── __init__.py
            ├── logistic_regression_updated.py
            └── multilayer_perceptron_updated.py
        ├── chapter4
            ├── __init__.py
            ├── frozen_mlp_checkpoint
            │   ├── checkpoint
            │   ├── events.out.tfevents.1503341411.Nikhils-MacBook-Pro.local
            │   ├── model-checkpoint-547800.data-00000-of-00001
            │   ├── model-checkpoint-547800.index
            │   ├── model-checkpoint-547800.meta
            │   ├── model-checkpoint-548350.data-00000-of-00001
            │   ├── model-checkpoint-548350.index
            │   ├── model-checkpoint-548350.meta
            │   ├── model-checkpoint-548900.data-00000-of-00001
            │   ├── model-checkpoint-548900.index
            │   ├── model-checkpoint-548900.meta
            │   ├── model-checkpoint-549450.data-00000-of-00001
            │   ├── model-checkpoint-549450.index
            │   ├── model-checkpoint-549450.meta
            │   ├── model-checkpoint-550000.data-00000-of-00001
            │   ├── model-checkpoint-550000.index
            │   └── model-checkpoint-550000.meta
            └── linear_interpolation_updated.py
        ├── chapter9
            ├── dqn.py
            ├── dqn_plot_final.png
            └── policy_gradient_cartpole.py
        └── datatools
            ├── __init__.py
            └── input_data.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Data files
 2 | *.gz
 3 | data/
 4 | !data/README.md
 5 | !ch05_implementing_nn_pytorch/data/
 6 | 
 7 | # babi saved model
 8 | dnc/babi-model
 9 | 
10 | # Python
11 | *.pyc
12 | 
13 | # Mac OS X custom attribute files
14 | .DS_Store
15 | 
16 | # Logs
17 | *logs*/
18 | 
19 | .ipynb_checkpoints
20 | MNIST
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fundamentals of Deep Learning
 2 | 
 3 | This repository is the code companion to [Fundamentals of Deep Learning, Second Edition](https://www.amazon.com/Fundamentals-Deep-Learning-Next-Generation-Intelligence/dp/1491925612 "Fundamentals of Deep Learning") by Nithin Buduma, [Nikhil Buduma](https://github.com/darksigma "Nikhil Buduma"), and [Joe Papa](https://github.com/joe-papa "Joe Papa"), with contributions from [Nicholas Locascio](https://github.com/nicholaslocascio "Nicholas Locascio"). Contributions to the text and code have also been made by [Mostafa Samir](https://github.com/Mostafa-Samir "Mostafa Samir"), [Surya Bhupatiraju](https://github.com/suryabhupa "Surya Bhupatiraju"), and [Anish Athalye](https://github.com/anishathalye "Anish Athalye"). All algorithms in the Second Edition are implemented in [PyTorch](https://www.pytorch.org/ "PyTorch"), one of the most popular machine learning frameworks.
 4 | 
 5 | ## Guide to the repository
 6 | 
 7 | This repo contains code from the Second Edition of Fundamentals of Deep Learning, published in 2022. Code from the First Edition can be found in the first_edition_archive folder. The code from each chapter of the Second Edition can be found in the corresponding folder, if code exists for that chapter. Most of the examples in each chapter are given as Google Colab notebooks. In some cases, .py files have also been included for a more convenient execution of the examples.
 8 | 
 9 | ## Setting up your development environment
10 | You can run the Google Colab notebooks directly from this github repo. [See instructions on Using Google Colab with GitHub here](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb "Using Google Colab with Github").
11 | 
12 | 


--------------------------------------------------------------------------------
/ch05_implementing_nn_pytorch/data/train/img_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_0.jpg


--------------------------------------------------------------------------------
/ch05_implementing_nn_pytorch/data/train/img_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_1.jpg


--------------------------------------------------------------------------------
/ch05_implementing_nn_pytorch/data/train/img_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_2.jpg


--------------------------------------------------------------------------------
/ch05_implementing_nn_pytorch/data/train/img_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/img_3.jpg


--------------------------------------------------------------------------------
/ch05_implementing_nn_pytorch/data/train/labels.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/ch05_implementing_nn_pytorch/data/train/labels.npy


--------------------------------------------------------------------------------
/ch09_models_for_sequence_analysis/Ch09_02_RNN.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ch09_02_RNN.ipynb","provenance":[{"file_id":"1XL97FXDkJDFMjM4M_FjCslCrdcanzDE8","timestamp":1641846356338}],"authorship_tag":"ABX9TyM1mSoOVpEqGcIzGXAfkEg8"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# PyTorch Primitives for RNN Models"],"metadata":{"id":"JdVhVjN-llAA"}},{"cell_type":"code","source":["import torch\n","import torch.nn as nn"],"metadata":{"id":"ZWlouOwETInM"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["cell_1 = nn.RNNCell(input_size = 10,\n","                    hidden_size = 20, \n","                    nonlinearity='tanh')\n","\n","cell_2 = nn.LSTMCell(input_size = 10,\n","                     hidden_size = 20)\n","\n","cell_3 = nn.GRUCell(input_size = 10, \n","                    hidden_size = 20)"],"metadata":{"id":"IWsgDePImGGp"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Stacking recurrent units\n","cell_1 = nn.LSTMCell(input_size = 10, \n","                     hidden_size = 20)\n","cell_2 = nn.LSTMCell(input_size = 20, \n","                     hidden_size = 20)\n","\n","full_cell = nn.Sequential(cell_1, cell_2)"],"metadata":{"id":"czzPCIotmPEU"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Run LSTM Cell"],"metadata":{"id":"7ruPWI82TVHf"}},{"cell_type":"code","source":["input = torch.randn(2, 3, 10) # (time_steps, batch, input_size)\n","hx_init = torch.randn(3, 20) # hidden state of size: (batch_size, hidden_size)\n","# output of output gate\n","cx_init = torch.randn(3, 20) # cell state of size: (batch_size, hidden_size)\n","# output of write gate\n","output = []\n","\n","# loop over time_steps\n","hx, cx = hx_init, cx_init\n","for t in range(input.size()[0]):\n","        hx, cx = cell_1(input[t], (hx, cx)) # input[t] is size (batch_size, input_size)\n","        hx2, cx2 = cell_2(hx, (hx, cx)) # input[t] is size (batch_size, input_size)\n","        output.append(hx2)\n","output = torch.stack(output, dim=0) # shape is (time_steps, batch_size, input_size)"],"metadata":{"id":"ICdrzWnATYlf"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# multi-layer RNN and LSTM\n","multi_layer_rnn = nn.RNN(input_size = 10, \n","                         hidden_size = 20, \n","                         num_layers = 2,\n","                         nonlinearity = 'tanh')\n","\n","multi_layer_lstm = nn.LSTM(input_size = 10, \n","                           hidden_size = 20, \n","                           num_layers = 2)"],"metadata":{"id":"noeIWbGtUuX6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["output2 = []\n","\n","# loop over time_steps\n","hx, cx = hx_init, cx_init\n","for t in range(input.size()[0]):\n","        hx, cx = cell_1(input[t], (hx, cx)) # input[t] is size (batch_size, input_size)\n","        hx2, cx2 = cell_2(hx, (hx, cx)) # input[t] is size (batch_size, input_size)\n","        output2.append(hx2)\n","output2 = torch.stack(output2, dim=0) # shape is (time_steps, batch_size, input_size)"],"metadata":{"id":"CrfO853DUk2j"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["torch.all(output == output2)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9wmZH08eVN9M","executionInfo":{"status":"ok","timestamp":1642042045904,"user_tz":300,"elapsed":93,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}},"outputId":"f0ebbccd-9689-4b14-d1f1-4633e5b70524"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["tensor(True)"]},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":["# multi-layer RNN and LSTM with other settings\n","multi_layer_rnn = nn.RNN(input_size = 10, \n","                         hidden_size = 20, \n","                         num_layers = 2,\n","                         nonlinearity = 'tanh',\n","                         batch_first = False,\n","                         dropout = 0.5)\n","\n","multi_layer_lstm = nn.LSTM(input_size = 10, \n","                           hidden_size = 20, \n","                           num_layers = 2,\n","                           batch_first = False,\n","                           dropout = 0.5)"],"metadata":{"id":"QB1Nwd8dXFNK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Apply dropouts \n","# If dropout parameter is non-zero, the model introduces \n","# a Dropout layer on # the outputs of each LSTM layer \n","# except the last layer, with dropout probability equal to dropout. \n","# Default: 0\n","input_size = 32\n","\n","cell_1 = nn.LSTM(input_size,\n","                 hidden_size = 10, \n","                 num_layers=2,\n","                 dropout = 1.0)"],"metadata":{"id":"HRJSX9fBmPHE"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["rnn = nn.LSTM(input_size = 32, \n","              hidden_size = 20, \n","              num_layers = 1,\n","              batch_first= False)\n","\n","inputs = torch.randn((32, 32, 32))\n","output, states = rnn(inputs)"],"metadata":{"id":"jNQYHSVIRQXZ","executionInfo":{"status":"ok","timestamp":1642046030147,"user_tz":300,"elapsed":130,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}}},"execution_count":25,"outputs":[]},{"cell_type":"code","source":["# LSTM in action"],"metadata":{"id":"6FPrCOwxYOwm","executionInfo":{"status":"ok","timestamp":1642046030981,"user_tz":300,"elapsed":109,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}}},"execution_count":26,"outputs":[]},{"cell_type":"code","source":["input = torch.randn(5, 3, 10) # (time_steps, batch, input_size)\n","h_0 = torch.randn(2, 3, 20) # (n_layers, batch_size, hidden_size)\n","c_0 = torch.randn(2, 3, 20) # (n_layers, batch_size, hidden_size)\n","\n","rnn = nn.LSTM(10, 20, 2) # (input_size, hidden_size, num_layers)\n","output_n, (hn, cn) = rnn(input, (h_0, c_0))"],"metadata":{"id":"2u7pR2sxYrdD","executionInfo":{"status":"ok","timestamp":1642046048985,"user_tz":300,"elapsed":108,"user":{"displayName":"Joe Papa","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"00487850786587503652"}}},"execution_count":28,"outputs":[]},{"cell_type":"code","source":[""],"metadata":{"id":"CaEGEWJRZGK1"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/ch12_memory_augmented_nn/Ch12_MemoryAugmented.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Ch12_MemoryAugmented.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "source": [
 21 |         "#Implementing the DNC in PyTorch"
 22 |       ],
 23 |       "metadata": {
 24 |         "id": "AmTCQQ41h1gs"
 25 |       }
 26 |     },
 27 |     {
 28 |       "cell_type": "markdown",
 29 |       "source": [
 30 |         "# PyTorch Code\n",
 31 |         "Only the code printed in the book has been converted to PyTorch.  This notebook contains that code.\n",
 32 |         "\n",
 33 |         "However, the entire code set needed to run the training and testing of bAbI has not yet been converted.  For reference, the complete codeset can be found at https://github.com/darksigma/Fundamentals-of-Deep-Learning-Book/tree/master/first_edition_archive/archive/dnc\n",
 34 |         "\n",
 35 |         "The folder above contains the files referenced in the book:\n",
 36 |         "- mem_ops.py\n",
 37 |         "- preprocess.py\n",
 38 |         "- train_babi.py\n",
 39 |         "- test_babi.py"
 40 |       ],
 41 |       "metadata": {
 42 |         "id": "2XvSCp2f7foV"
 43 |       }
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "source": [
 48 |         "import torch\n",
 49 |         "def Lt(L, wwt, p, N):\n",
 50 |         "\n",
 51 |         "    L_t = torch.zeros((N,N), dtype=torch.float32)\n",
 52 |         "    for i in range(N):\n",
 53 |         "        for j in range(N):\n",
 54 |         "            if i == j:\n",
 55 |         "                continue\n",
 56 |         "            mask = torch.zeros((N,N), dtype=torch.float32)\n",
 57 |         "            mask[i,j] = 1.0\n",
 58 |         "        \n",
 59 |         "            link_t = (1 - wwt[i] - wwt[j]) * L[i,j] + \\\n",
 60 |         "                      wwt[i] * p[j]\n",
 61 |         "            L_t += mask * link_t\n",
 62 |         "    return L_t"
 63 |       ],
 64 |       "metadata": {
 65 |         "id": "g-Fp-Zyr8lpV"
 66 |       },
 67 |       "execution_count": null,
 68 |       "outputs": []
 69 |     },
 70 |     {
 71 |       "cell_type": "code",
 72 |       "source": [
 73 |         "# sample test code\n",
 74 |         "N = 10\n",
 75 |         "L = torch.randn((N,N))\n",
 76 |         "wwt = torch.randn(N)\n",
 77 |         "p = torch.randn(N)\n",
 78 |         "\n",
 79 |         "L_t = Lt(L, wwt, p, N)"
 80 |       ],
 81 |       "metadata": {
 82 |         "id": "gGxEXcFe8pFd"
 83 |       },
 84 |       "execution_count": null,
 85 |       "outputs": []
 86 |     },
 87 |     {
 88 |       "cell_type": "code",
 89 |       "source": [
 90 |         "def Lt(L, wwt, p, N):\n",
 91 |         "    \"\"\"\n",
 92 |         "    returns the updated link matrix given the previous one along\n",
 93 |         "    with the updated write weightings and the previous precedence\n",
 94 |         "    vector\n",
 95 |         "    \"\"\"\n",
 96 |         "    def pairwise_add(v):\n",
 97 |         "        \"\"\"\n",
 98 |         "        returns the matrix of pairs - adding the elements of v to\n",
 99 |         "        themselves\n",
100 |         "        \"\"\"\n",
101 |         "        n = v.shape[0]\n",
102 |         "        # a NxN matrix of duplicates of u along the columns\n",
103 |         "        V = v.repeat(1,n)  \n",
104 |         "        return V + V\n",
105 |         "\n",
106 |         "    # expand dimensions of wwt and p to make matmul behave as outer\n",
107 |         "    # product\n",
108 |         "    wwt = torch.unsqueeze(wwt, 1)\n",
109 |         "    p = torch.unsqueeze(p, 0)\n",
110 |         "\n",
111 |         "    I = torch.eye(N, dtype=torch.float32)\n",
112 |         "    return (((1 - pairwise_add(wwt)) * L +\n",
113 |         "             torch.matmul(wwt, p)) * (1 - I))"
114 |       ],
115 |       "metadata": {
116 |         "id": "RiUrrSGF_S-V"
117 |       },
118 |       "execution_count": null,
119 |       "outputs": []
120 |     },
121 |     {
122 |       "cell_type": "code",
123 |       "source": [
124 |         "# sample test code\n",
125 |         "N = 10\n",
126 |         "L = torch.randn((N,N))\n",
127 |         "wwt = torch.randn(N)\n",
128 |         "p = torch.randn(N)\n",
129 |         "\n",
130 |         "L_t = Lt(L, wwt, p, N)"
131 |       ],
132 |       "metadata": {
133 |         "id": "gRhe00Pn_X-i"
134 |       },
135 |       "execution_count": null,
136 |       "outputs": []
137 |     },
138 |     {
139 |       "cell_type": "code",
140 |       "source": [
141 |         "# sample test input\n",
142 |         "ut = torch.randn((N))\n",
143 |         "\n",
144 |         "sorted_ut, free_list = torch.topk(-1*ut, N)\n",
145 |         "sorted_ut *= -1"
146 |       ],
147 |       "metadata": {
148 |         "id": "1ZqiwjFz9FEU"
149 |       },
150 |       "execution_count": null,
151 |       "outputs": []
152 |     },
153 |     {
154 |       "cell_type": "code",
155 |       "source": [
156 |         "# sample test code\n",
157 |         "out_of_location_at = torch.rand(N)\n",
158 |         "\n",
159 |         "empty_at = torch.empty(N)\n",
160 |         "a_t = empty_at.scatter(0, free_list, out_of_location_at)"
161 |       ],
162 |       "metadata": {
163 |         "id": "2VU11cTz9Hv0"
164 |       },
165 |       "execution_count": null,
166 |       "outputs": []
167 |     }
168 |   ]
169 | }


--------------------------------------------------------------------------------
/first_edition_archive/archive/README.md:
--------------------------------------------------------------------------------
 1 | # Fundamentals of Deep Learning
 2 | 
 3 | This repository is the code companion to my book "Fundamentals of Deep Learning." All algorithms are implemented in [Tensorflow](https://www.tensorflow.org/ "Tensorflow"), Google's new machine intelligence library. 
 4 | 
 5 | ## TODO
 6 | 
 7 | ### Networks
 8 | 
 9 | - Logistic Regression (Nikhil)
10 | - Multilayer Perceptron (Nikhil)
11 | - Convolutional Network (Nikhil)
12 | - Neural Style (Anish)
13 | - Autoencoder (Hassan)
14 | - Denoising Autoencoder (Hassan)
15 | - Convolutional Autoencoder (Hassan)
16 | - RNN (Nikhil)
17 | - LSTM Network (Nikhil)
18 | - GRU Network (Nikhil)
19 | - LSTM + Attention (Nikhil)
20 | - RCNN (Nikhil)
21 | - Memory Networks (Nikhil)
22 | - Pointer Networks
23 | - Neural Turing Machines
24 | - Neural Programmer
25 | - DQN
26 | - LSTM-DQN
27 | - Deep Convolutional Inverse Graphics Network
28 | - Highway Networks
29 | - Deep Residual Networks
30 | 
31 | ### Embedding
32 | 
33 | - Word2Vec (Nikhil)
34 | - Skip-gram/CBoW
35 | - GloVe (Nikhil)
36 | - Skip-thought Vectors (Nikhil)
37 | 
38 | ### Optimizers
39 | 
40 | - MLP + Momentum
41 | - MLP + RMSProp
42 | - MLP + ADAM
43 | - MLP + FTRL
44 | - MLP + ADADELTA
45 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/autoencoder_tsne.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/autoencoder_tsne.py


--------------------------------------------------------------------------------
/first_edition_archive/archive/autoencoder_vs_pca.py:
--------------------------------------------------------------------------------
 1 | from sklearn import decomposition
 2 | from matplotlib import pyplot as plt
 3 | import tensorflow as tf
 4 | import autoencoder_mnist as ae
 5 | import argparse, input_data
 6 | import numpy as np
 7 | # model-checkpoint-0349-191950
 8 | 
 9 | def scatter(codes, labels):
10 |     colors = [
11 |         ('#27ae60', 'o'),
12 |         ('#2980b9', 'o'),
13 |         ('#8e44ad', 'o'),
14 |         ('#f39c12', 'o'),
15 |         ('#c0392b', 'o'),
16 |         ('#27ae60', 'x'),
17 |         ('#2980b9', 'x'),
18 |         ('#8e44ad', 'x'),
19 |         ('#c0392b', 'x'),
20 |         ('#f39c12', 'x'),
21 |     ]
22 |     for num in xrange(10):
23 |         plt.scatter([codes[:,0][i] for i in xrange(len(labels)) if labels[i] == num],
24 |         [codes[:,1][i] for i in xrange(len(labels)) if labels[i] == num], 7,
25 |         label=str(num), color = colors[num][0], marker=colors[num][1])
26 |     plt.legend()
27 |     plt.show()
28 | 
29 | if __name__ == '__main__':
30 | 
31 |     parser = argparse.ArgumentParser(description='Test various optimization strategies')
32 |     parser.add_argument('savepath', nargs=1, type=str)
33 |     args = parser.parse_args()
34 | 
35 |     print "\nPULLING UP MNIST DATA"
36 |     mnist = input_data.read_data_sets("data/", one_hot=False)
37 |     print mnist.test.labels
38 | 
39 |     # print "\nSTARTING PCA"
40 |     # pca = decomposition.PCA(n_components=2)
41 |     # pca.fit(mnist.train.images)
42 |     #
43 |     # print "\nGENERATING PCA CODES AND RECONSTRUCTION"
44 |     # pca_codes = pca.transform(mnist.test.images)
45 |     # print pca_codes
46 |     #
47 |     # scatter(pca_codes, mnist.test.labels)
48 | 
49 |     with tf.Graph().as_default():
50 | 
51 |         with tf.variable_scope("autoencoder_model"):
52 | 
53 |             x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
54 |             phase_train = tf.placeholder(tf.bool)
55 | 
56 |             code = ae.encoder(x, 2, phase_train)
57 | 
58 |             output = ae.decoder(code, 2, phase_train)
59 | 
60 |             cost, train_summary_op = ae.loss(output, x)
61 | 
62 |             global_step = tf.Variable(0, name='global_step', trainable=False)
63 | 
64 |             train_op = ae.training(cost, global_step)
65 | 
66 |             eval_op, in_im_op, out_im_op, val_summary_op = ae.evaluate(output, x)
67 | 
68 |             saver = tf.train.Saver()
69 | 
70 |             sess = tf.Session()
71 | 
72 | 
73 |             print "\nSTARTING AUTOENCODER\n", args.savepath[0]
74 |             sess = tf.Session()
75 |             saver = tf.train.Saver()
76 |             saver.restore(sess, args.savepath[0])
77 | 
78 |             print "\nGENERATING AE CODES AND RECONSTRUCTION"
79 |             ae_codes, ae_reconstruction = sess.run([code, output], feed_dict={x: mnist.test.images * np.random.randint(2, size=(784)), phase_train: True})
80 | 
81 |             scatter(ae_codes, mnist.test.labels)
82 | 
83 |             plt.imshow(ae_reconstruction[0].reshape((28,28)), cmap=plt.cm.gray)
84 |             plt.show()
85 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/convnet_cifar.py:
--------------------------------------------------------------------------------
  1 | import cifar10_input
  2 | cifar10_input.maybe_download_and_extract()
  3 | 
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import time, os
  7 | 
  8 | # Architecture
  9 | n_hidden_1 = 256
 10 | n_hidden_2 = 256
 11 | 
 12 | # Parameters
 13 | learning_rate = 0.001
 14 | training_epochs = 1000
 15 | batch_size = 128
 16 | display_step = 1
 17 | 
 18 | def inputs(eval_data=True):
 19 |   data_dir = os.path.join('data/cifar10_data', 'cifar-10-batches-bin')
 20 |   return cifar10_input.inputs(eval_data=eval_data, data_dir=data_dir,
 21 |                               batch_size=batch_size)
 22 | 
 23 | def distorted_inputs():
 24 |   data_dir = os.path.join('data/cifar10_data', 'cifar-10-batches-bin')
 25 |   return cifar10_input.distorted_inputs(data_dir=data_dir,
 26 |                                         batch_size=batch_size)
 27 | 
 28 | def filter_summary(V, weight_shape):
 29 |     ix = weight_shape[0]
 30 |     iy = weight_shape[1]
 31 |     cx, cy = 8, 8
 32 |     V_T = tf.transpose(V, (3, 0, 1, 2))
 33 |     tf.image_summary("filters", V_T, max_images=64) 
 34 | 
 35 | def conv2d(input, weight_shape, bias_shape, visualize=False):
 36 |     incoming = weight_shape[0] * weight_shape[1] * weight_shape[2]
 37 |     weight_init = tf.random_normal_initializer(stddev=(2.0/incoming)**0.5)
 38 |     W = tf.get_variable("W", weight_shape, initializer=weight_init)
 39 |     if visualize:
 40 |         filter_summary(W, weight_shape)
 41 |     bias_init = tf.constant_initializer(value=0)
 42 |     b = tf.get_variable("b", bias_shape, initializer=bias_init)
 43 |     return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding='SAME'), b))
 44 | 
 45 | def max_pool(input, k=2):
 46 |     return tf.nn.max_pool(input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
 47 | 
 48 | def layer(input, weight_shape, bias_shape):
 49 |     weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
 50 |     bias_init = tf.constant_initializer(value=0)
 51 |     W = tf.get_variable("W", weight_shape,
 52 |                         initializer=weight_init)
 53 |     b = tf.get_variable("b", bias_shape,
 54 |                         initializer=bias_init)
 55 |     return tf.nn.relu(tf.matmul(input, W) + b)
 56 | 
 57 | def inference(x, keep_prob):
 58 | 
 59 |     with tf.variable_scope("conv_1"):
 60 |         conv_1 = conv2d(x, [5, 5, 3, 64], [64], visualize=True)
 61 |         pool_1 = max_pool(conv_1)
 62 | 
 63 |     with tf.variable_scope("conv_2"):
 64 |         conv_2 = conv2d(pool_1, [5, 5, 64, 64], [64])
 65 |         pool_2 = max_pool(conv_2)
 66 | 
 67 |     with tf.variable_scope("fc_1"):
 68 | 
 69 |         dim = 1
 70 |         for d in pool_2.get_shape()[1:].as_list():
 71 |             dim *= d
 72 | 
 73 |         pool_2_flat = tf.reshape(pool_2, [-1, dim])
 74 |         fc_1 = layer(pool_2_flat, [dim, 384], [384])
 75 |         
 76 |         # apply dropout
 77 |         fc_1_drop = tf.nn.dropout(fc_1, keep_prob)
 78 | 
 79 |     with tf.variable_scope("fc_2"):
 80 | 
 81 |         fc_2 = layer(fc_1_drop, [384, 192], [192])
 82 |         
 83 |         # apply dropout
 84 |         fc_2_drop = tf.nn.dropout(fc_2, keep_prob)
 85 | 
 86 |     with tf.variable_scope("output"):
 87 |         output = layer(fc_2_drop, [192, 10], [10])
 88 | 
 89 |     return output
 90 | 
 91 | 
 92 | def loss(output, y):
 93 |     xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(output, tf.cast(y, tf.int64))    
 94 |     loss = tf.reduce_mean(xentropy)
 95 |     return loss
 96 | 
 97 | def training(cost, global_step):
 98 |     tf.scalar_summary("cost", cost)
 99 |     optimizer = tf.train.AdamOptimizer(learning_rate)
100 |     train_op = optimizer.minimize(cost, global_step=global_step)
101 |     return train_op
102 | 
103 | def evaluate(output, y):
104 |     correct_prediction = tf.equal(tf.cast(tf.argmax(output, 1), dtype=tf.int32), y)
105 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
106 |     tf.scalar_summary("validation error", (1.0 - accuracy))
107 |     return accuracy
108 | 
109 | if __name__ == '__main__':
110 | 
111 | 
112 | 
113 |     with tf.device("/gpu:0"):
114 | 
115 |         with tf.Graph().as_default():
116 | 
117 |             with tf.variable_scope("cifar_conv_model"):
118 | 
119 |                 x = tf.placeholder("float", [None, 24, 24, 3])
120 |                 y = tf.placeholder("int32", [None])
121 |                 keep_prob = tf.placeholder(tf.float32) # dropout probability
122 | 
123 |                 distorted_images, distorted_labels = distorted_inputs()
124 |                 val_images, val_labels = inputs()
125 | 
126 |                 output = inference(x, keep_prob)
127 | 
128 |                 cost = loss(output, y)
129 | 
130 |                 global_step = tf.Variable(0, name='global_step', trainable=False)
131 | 
132 |                 train_op = training(cost, global_step)
133 | 
134 |                 eval_op = evaluate(output, y)
135 | 
136 |                 summary_op = tf.merge_all_summaries()
137 | 
138 |                 saver = tf.train.Saver()
139 | 
140 |                 sess = tf.Session()
141 | 
142 |                 summary_writer = tf.train.SummaryWriter("conv_cifar_logs/",
143 |                                                         graph_def=sess.graph_def)
144 | 
145 |                 
146 |                 init_op = tf.initialize_all_variables()
147 | 
148 |                 sess.run(init_op)
149 | 
150 |                 tf.train.start_queue_runners(sess=sess)
151 | 
152 |                 # Training cycle
153 |                 for epoch in range(training_epochs):
154 | 
155 |                     avg_cost = 0.
156 |                     total_batch = int(cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN/batch_size)
157 |                     # Loop over all batches
158 |                     for i in range(total_batch):
159 |                         # Fit training using batch data
160 | 
161 |                         train_x, train_y = sess.run([distorted_images, distorted_labels])
162 | 
163 |                         _, new_cost = sess.run([train_op, cost], feed_dict={x: train_x, y: train_y, keep_prob: 0.5})
164 |                         # Compute average loss
165 |                         avg_cost += new_cost/total_batch
166 |                         # print "Epoch %d, minibatch %d of %d. Cost = %0.4f." %(epoch, i, total_batch, new_cost)
167 |                     
168 |                     # Display logs per epoch step
169 |                     if epoch % display_step == 0:
170 |                         print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)
171 | 
172 |                         val_x, val_y = sess.run([val_images, val_labels])
173 | 
174 |                         accuracy = sess.run(eval_op, feed_dict={x: val_x, y: val_y, keep_prob: 1})
175 | 
176 |                         print "Validation Error:", (1 - accuracy)
177 | 
178 |                         summary_str = sess.run(summary_op, feed_dict={x: train_x, y: train_y, keep_prob: 1})
179 |                         summary_writer.add_summary(summary_str, sess.run(global_step))
180 | 
181 |                         saver.save(sess, "conv_cifar_logs/model-checkpoint", global_step=global_step)
182 | 
183 | 
184 |                 print "Optimization Finished!"
185 | 
186 |                 val_x, val_y = sess.run([val_images, val_labels])
187 |                 accuracy = sess.run(eval_op, feed_dict={x: val_x, y: val_y, keep_prob: 1})
188 | 
189 |                 print "Test Accuracy:", accuracy
190 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/convnet_mnist.py:
--------------------------------------------------------------------------------
  1 | import input_data
  2 | mnist = input_data.read_data_sets("data/", one_hot=True)
  3 | 
  4 | import tensorflow as tf
  5 | import time
  6 | 
  7 | # Architecture
  8 | n_hidden_1 = 256
  9 | n_hidden_2 = 256
 10 | 
 11 | # Parameters
 12 | learning_rate = 0.0001
 13 | training_epochs = 1000
 14 | batch_size = 100
 15 | display_step = 1
 16 | 
 17 | def conv2d(input, weight_shape, bias_shape):
 18 |     incoming = weight_shape[0] * weight_shape[1] * weight_shape[2]
 19 |     weight_init = tf.random_normal_initializer(stddev=(2.0/incoming)**0.5)
 20 |     W = tf.get_variable("W", weight_shape, initializer=weight_init)
 21 |     bias_init = tf.constant_initializer(value=0)
 22 |     b = tf.get_variable("b", bias_shape, initializer=bias_init)
 23 |     return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding='SAME'), b))
 24 | 
 25 | def max_pool(input, k=2):
 26 |     return tf.nn.max_pool(input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
 27 | 
 28 | def layer(input, weight_shape, bias_shape):
 29 |     weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
 30 |     bias_init = tf.constant_initializer(value=0)
 31 |     W = tf.get_variable("W", weight_shape,
 32 |                         initializer=weight_init)
 33 |     b = tf.get_variable("b", bias_shape,
 34 |                         initializer=bias_init)
 35 |     return tf.nn.relu(tf.matmul(input, W) + b)
 36 | 
 37 | 
 38 | def inference(x, keep_prob):
 39 | 
 40 |     x = tf.reshape(x, shape=[-1, 28, 28, 1])
 41 |     with tf.variable_scope("conv_1"):
 42 |         conv_1 = conv2d(x, [5, 5, 1, 32], [32])
 43 |         pool_1 = max_pool(conv_1)
 44 | 
 45 |     with tf.variable_scope("conv_2"):
 46 |         conv_2 = conv2d(pool_1, [5, 5, 32, 64], [64])
 47 |         pool_2 = max_pool(conv_2)
 48 | 
 49 |     with tf.variable_scope("fc"):
 50 |         pool_2_flat = tf.reshape(pool_2, [-1, 7 * 7 * 64])
 51 |         fc_1 = layer(pool_2_flat, [7*7*64, 1024], [1024])
 52 |         
 53 |         # apply dropout
 54 |         fc_1_drop = tf.nn.dropout(fc_1, keep_prob)
 55 | 
 56 |     with tf.variable_scope("output"):
 57 |         output = layer(fc_1_drop, [1024, 10], [10])
 58 | 
 59 |     return output
 60 | 
 61 | 
 62 | def loss(output, y):
 63 |     xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y)    
 64 |     loss = tf.reduce_mean(xentropy)
 65 |     return loss
 66 | 
 67 | def training(cost, global_step):
 68 |     tf.scalar_summary("cost", cost)
 69 |     optimizer = tf.train.AdamOptimizer(learning_rate)
 70 |     train_op = optimizer.minimize(cost, global_step=global_step)
 71 |     return train_op
 72 | 
 73 | 
 74 | def evaluate(output, y):
 75 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 76 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 77 |     tf.scalar_summary("validation error", (1.0 - accuracy))
 78 |     return accuracy
 79 | 
 80 | if __name__ == '__main__':
 81 | 
 82 |     with tf.device("/gpu:0"):
 83 | 
 84 |         with tf.Graph().as_default():
 85 | 
 86 |             with tf.variable_scope("mnist_conv_model"):
 87 | 
 88 |                 x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
 89 |                 y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
 90 |                 keep_prob = tf.placeholder(tf.float32) # dropout probability
 91 | 
 92 |                 output = inference(x, keep_prob)
 93 | 
 94 |                 cost = loss(output, y)
 95 | 
 96 |                 global_step = tf.Variable(0, name='global_step', trainable=False)
 97 | 
 98 |                 train_op = training(cost, global_step)
 99 | 
100 |                 eval_op = evaluate(output, y)
101 | 
102 |                 summary_op = tf.merge_all_summaries()
103 | 
104 |                 saver = tf.train.Saver()
105 | 
106 |                 sess = tf.Session()
107 | 
108 |                 summary_writer = tf.train.SummaryWriter("conv_mnist_logs/",
109 |                                                     graph_def=sess.graph_def)
110 | 
111 |                 
112 |                 init_op = tf.initialize_all_variables()
113 | 
114 |                 sess.run(init_op)
115 | 
116 | 
117 |                 # Training cycle
118 |                 for epoch in range(training_epochs):
119 | 
120 |                     avg_cost = 0.
121 |                     total_batch = int(mnist.train.num_examples/batch_size)
122 |                     # Loop over all batches
123 |                     for i in range(total_batch):
124 |                         minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
125 |                         # Fit training using batch data
126 |                         sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})
127 |                         # Compute average loss
128 |                         avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})/total_batch
129 |                     # Display logs per epoch step
130 |                     if epoch % display_step == 0:
131 |                         print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)
132 | 
133 |                         accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels, keep_prob: 1})
134 | 
135 |                         print "Validation Error:", (1 - accuracy)
136 | 
137 |                         summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})
138 |                         summary_writer.add_summary(summary_str, sess.run(global_step))
139 | 
140 |                         saver.save(sess, "conv_mnist_logs/model-checkpoint", global_step=global_step)
141 | 
142 | 
143 |                 print "Optimization Finished!"
144 | 
145 | 
146 |                 accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1})
147 | 
148 |                 print "Test Accuracy:", accuracy
149 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/dnc/mem_ops.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | 
  4 | def init_memory(N, W, R):
  5 |     """
  6 |     returns the initial values of the memory matrix, usage vector,
  7 |     precedence vector, link matrix, read weightings, write weightings,
  8 |     and the read vectors
  9 |     """
 10 | 
 11 |     M0 = tf.fill([N, W], 1e-6)
 12 |     u0 = tf.zeros([N])
 13 |     p0 = tf.zeros([N])
 14 |     L0 = tf.zeros([N, N])
 15 |     wr0 = tf.fill([N, R], 1e-6)  # initial read weightings
 16 |     ww0 = tf.fill([N], 1e-6)  # initial write weightings
 17 |     r0 = tf.fill([W, R], 1e-6)  # initial read vector
 18 | 
 19 |     return M0, u0, p0, L0, wr0, ww0, r0
 20 | 
 21 | 
 22 | def parse_interface(zeta, N, W, R):
 23 |     """
 24 |     returns the individual components of the interface vector
 25 |     """
 26 |     cursor = 0  # keeps track of how far we parsed into zeta
 27 |     kr, cursor = tf.reshape(zeta[cursor:cursor + W*R], [W, R]), cursor + W*R
 28 |     br, cursor = zeta[cursor:cursor + R], cursor + R
 29 |     kw, cursor = tf.reshape(zeta[cursor: cursor + W], [W, 1]), cursor + W
 30 |     bw, cursor = zeta[cursor], cursor + 1
 31 |     e, cursor = zeta[cursor: cursor + W], cursor + W
 32 |     v, cursor = zeta[cursor: cursor + W], cursor + W
 33 |     f, cursor = zeta[cursor: cursor + R], cursor + R
 34 |     ga, cursor = zeta[cursor], cursor + 1
 35 |     gw, cursor = zeta[cursor], cursor + 1
 36 |     pi = tf.reshape(zeta[cursor:], [3, R])
 37 | 
 38 |     # transforming the parsed components into their correct values
 39 |     oneplus = lambda z: 1 + tf.nn.softplus(z)
 40 | 
 41 |     e = tf.nn.sigmoid(e)
 42 |     f = tf.nn.sigmoid(f)
 43 |     ga = tf.nn.sigmoid(ga)
 44 |     gw = tf.nn.sigmoid(gw)
 45 |     br = oneplus(br)
 46 |     bw = oneplus(bw)
 47 |     pi = tf.nn.softmax(pi, 0)
 48 | 
 49 |     return kr, br, kw, bw, e, v, f, ga, gw, pi
 50 | 
 51 | 
 52 | def C(M, k, b):
 53 |     """
 54 |     Content-based addressing weightings
 55 |     """
 56 |     M_normalized = tf.nn.l2_normalize(M, 1)
 57 |     k_normalized = tf.nn.l2_normalize(k, 0)
 58 |     similarity = tf.matmul(M_normalized, k_normalized)
 59 | 
 60 |     return tf.nn.softmax(similarity * b, 0)
 61 | 
 62 | 
 63 | def ut(u, f, wr, ww):
 64 |     """
 65 |     returns the updated usage vector given the previous one along with
 66 |     free gates and previous read and write weightings
 67 |     """
 68 |     psi_t = tf.reduce_prod(1 - f * wr, 1)
 69 |     return (u + ww - u * ww) * psi_t
 70 | 
 71 | 
 72 | def at(ut, N):
 73 |     """
 74 |     returns the allocation weighting given the updated usage vector
 75 |     """
 76 |     sorted_ut, free_list = tf.nn.top_k(-1 * ut, N)
 77 |     sorted_ut *= -1  # brings the usages to the original positive values
 78 | 
 79 |     # the exclusive argument makes the first element in the cumulative
 80 |     # product a 1 instead of the first element in the given tensor
 81 |     sorted_ut_cumprod = tf.cumprod(sorted_ut, exclusive=True)
 82 |     out_of_location_at = (1 - sorted_ut) * sorted_ut_cumprod
 83 | 
 84 |     empty_at_container = tf.TensorArray(tf.float32, N)
 85 |     full_at_container = empty_at_container.scatter(free_list, out_of_location_at)
 86 | 
 87 |     return full_at_container.pack()
 88 | 
 89 | 
 90 | def wwt(ct, at, gw, ga):
 91 |     """
 92 |     returns the upadted write weightings given allocation and content-based
 93 |     weightings along with the write and allocation gates
 94 |     """
 95 |     ct = tf.squeeze(ct)
 96 |     return gw * (ga * at + (1 - ga) * ct)
 97 | 
 98 | 
 99 | def Lt(L, wwt, p, N):
100 |     """
101 |     returns the updated link matrix given the previous one along with
102 |     the updated write weightings and the previous precedence vector
103 |     """
104 |     def pairwise_add(v):
105 |         """
106 |         returns the matrix of pairwe-adding the elements of v to themselves
107 |         """
108 |         n = v.get_shape().as_list()[0]
109 |         V = tf.concat(1, [v] * n)  # a NxN matrix of duplicates of u along the columns
110 |         return V + V
111 | 
112 |     # expand dimensions of wwt and p to make matmul behave as outer product
113 |     wwt = tf.expand_dims(wwt, 1)
114 |     p = tf.expand_dims(p, 0)
115 | 
116 |     I = tf.constant(np.identity(N, dtype=np.float32))
117 |     return ((1 - pairwise_add(wwt)) * L + tf.matmul(wwt, p)) * (1 - I)
118 | 
119 | 
120 | def pt(wwt, p):
121 |     """
122 |     returns the updated precedence vector given the new write weightings and
123 |     the previous precedence vector
124 |     """
125 |     return (1 - tf.reduce_sum(wwt)) * p + wwt
126 | 
127 | 
128 | def Mt(M, wwt, e, v):
129 |     """
130 |     returns the updated memory matrix given the previous one, the new write
131 |     weightings, and the erase and write vectors
132 |     """
133 |     # expand the dims of wwt, e, and v to make matmul
134 |     # behave as outer product
135 |     wwt = tf.expand_dims(wwt, 1)
136 |     e = tf.expand_dims(e, 0)
137 |     v = tf.expand_dims(v, 0)
138 | 
139 |     return M * (1 - tf.matmul(wwt, e)) + tf.matmul(wwt, v)
140 | 
141 | 
142 | def wrt(wr, Lt, ct, pi):
143 |     """
144 |     returns the updated read weightings given the previous ones, the new link
145 |     matrix, a content-based weighting, and the read modes
146 |     """
147 |     ft = tf.matmul(Lt, wr)
148 |     bt = tf.matmul(Lt, wr, transpose_a=True)
149 | 
150 |     return pi[0] * bt + pi[1] * ct + pi[2] * ft
151 | 
152 | 
153 | def rt(Mt, wrt):
154 |     """
155 |     returns the new read vectors given the new memory matrix and the new read
156 |     weightings
157 |     """
158 |     return tf.matmul(Mt, wrt, transpose_a=True)
159 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/dnc/preprocess.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import pickle
  3 | import getopt
  4 | import urllib2
  5 | import tarfile
  6 | import numpy as np
  7 | from shutil import rmtree
  8 | from os import listdir, mkdir
  9 | from os.path import join, isfile, isdir, dirname, basename, normpath, realpath, exists, getsize
 10 | 
 11 | def llprint(message):
 12 |     sys.stdout.write(message)
 13 |     sys.stdout.flush()
 14 | 
 15 | 
 16 | def create_dictionary(files_list):
 17 |     """
 18 |     creates a dictionary of unique lexicons in the dataset and their mapping to numbers
 19 | 
 20 |     Parameters:
 21 |     ----------
 22 |     files_list: list
 23 |         the list of files to scan through
 24 | 
 25 |     Returns: dict
 26 |         the constructed dictionary of lexicons
 27 |     """
 28 | 
 29 |     lexicons_dict = {}
 30 |     id_counter = 0
 31 | 
 32 |     llprint("Creating Dictionary ... 0/%d" % (len(files_list)))
 33 | 
 34 |     for indx, filename in enumerate(files_list):
 35 |         with open(filename, 'r') as fobj:
 36 |             for line in fobj:
 37 | 
 38 |                 # first seperate . and ? away from words into seperate lexicons
 39 |                 line = line.replace('.', ' .')
 40 |                 line = line.replace('?', ' ?')
 41 |                 line = line.replace(',', ' ')
 42 | 
 43 |                 for word in line.split():
 44 |                     if not word.lower() in lexicons_dict and word.isalpha():
 45 |                         lexicons_dict[word.lower()] = id_counter
 46 |                         id_counter += 1
 47 | 
 48 |         llprint("\rCreating Dictionary ... %d/%d" % ((indx + 1), len(files_list)))
 49 | 
 50 |     print "\rCreating Dictionary ... Done!"
 51 |     return lexicons_dict
 52 | 
 53 | 
 54 | def encode_data(files_list, lexicons_dictionary, length_limit=None):
 55 |     """
 56 |     encodes the dataset into its numeric form given a constructed dictionary
 57 | 
 58 |     Parameters:
 59 |     ----------
 60 |     files_list: list
 61 |         the list of files to scan through
 62 |     lexicons_dictionary: dict
 63 |         the mappings of unique lexicons
 64 | 
 65 |     Returns: tuple (dict, int)
 66 |         the data in its numeric form, maximum story length
 67 |     """
 68 | 
 69 |     files = {}
 70 |     story_inputs = None
 71 |     story_outputs = None
 72 |     stories_lengths = []
 73 |     answers_flag = False  # a flag to specify when to put data into outputs list
 74 |     limit = length_limit if not length_limit is None else float("inf")
 75 | 
 76 |     llprint("Encoding Data ... 0/%d" % (len(files_list)))
 77 | 
 78 |     for indx, filename in enumerate(files_list):
 79 | 
 80 |         files[filename] = []
 81 | 
 82 |         with open(filename, 'r') as fobj:
 83 |             for line in fobj:
 84 | 
 85 |                 # first seperate . and ? away from words into seperate lexicons
 86 |                 line = line.replace('.', ' .')
 87 |                 line = line.replace('?', ' ?')
 88 |                 line = line.replace(',', ' ')
 89 | 
 90 |                 answers_flag = False  # reset as answers end by end of line
 91 | 
 92 |                 for i, word in enumerate(line.split()):
 93 | 
 94 |                     if word == '1' and i == 0:
 95 |                         # beginning of a new story
 96 |                         if not story_inputs is None:
 97 |                             stories_lengths.append(len(story_inputs))
 98 |                             if len(story_inputs) <= limit:
 99 |                                 files[filename].append({
100 |                                     'inputs':story_inputs,
101 |                                     'outputs': story_outputs
102 |                                 })
103 |                         story_inputs = []
104 |                         story_outputs = []
105 | 
106 |                     if word.isalpha() or word == '?' or word == '.':
107 |                         if not answers_flag:
108 |                             story_inputs.append(lexicons_dictionary[word.lower()])
109 |                         else:
110 |                             story_inputs.append(lexicons_dictionary['-'])
111 |                             story_outputs.append(lexicons_dictionary[word.lower()])
112 | 
113 |                         # set the answers_flags if a question mark is encountered
114 |                         if not answers_flag:
115 |                             answers_flag = (word == '?')
116 | 
117 |         llprint("\rEncoding Data ... %d/%d" % (indx + 1, len(files_list)))
118 | 
119 |     print "\rEncoding Data ... Done!"
120 |     return files, stories_lengths
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     task_dir = dirname(realpath(__file__))
125 |     options,_ = getopt.getopt(sys.argv[1:], '', ['length_limit='])
126 |     data_dir = join(task_dir, "../data/babi-en-10k/")
127 |     joint_train = True
128 |     length_limit = None
129 |     files_list = []
130 | 
131 |     if not exists(join(task_dir, 'data')):
132 |         mkdir(join(task_dir, 'data'))
133 | 
134 |     for opt in options:
135 |         if opt[0] == '--length_limit':
136 |             length_limit = int(opt[1])
137 | 
138 |     """if data_dir is None:
139 |         raise ValueError("data_dir argument cannot be None")"""
140 | 
141 |     for entryname in listdir(data_dir):
142 |         entry_path = join(data_dir, entryname)
143 |         if isfile(entry_path):
144 |             files_list.append(entry_path)
145 | 
146 |     lexicon_dictionary = create_dictionary(files_list)
147 |     lexicon_count = len(lexicon_dictionary)
148 | 
149 |     # append used punctuation to dictionary
150 |     lexicon_dictionary['?'] = lexicon_count
151 |     lexicon_dictionary['.'] = lexicon_count + 1
152 |     lexicon_dictionary['-'] = lexicon_count + 2
153 | 
154 |     encoded_files, stories_lengths = encode_data(files_list, lexicon_dictionary, length_limit)
155 | 
156 |     stories_lengths = np.array(stories_lengths)
157 |     length_limit = np.max(stories_lengths) if length_limit is None else length_limit
158 |     print "Total Number of stories: %d" % (len(stories_lengths))
159 |     print "Number of stories with lengthes > %d: %d (%% %.2f) [discarded]" % (length_limit, np.sum(stories_lengths > length_limit), np.mean(stories_lengths > length_limit) * 100.0)
160 |     print "Number of Remaining Stories: %d" % (len(stories_lengths[stories_lengths <= length_limit]))
161 | 
162 |     processed_data_dir = join(task_dir, 'data', basename(normpath(data_dir)))
163 |     train_data_dir = join(processed_data_dir, 'train')
164 |     test_data_dir = join(processed_data_dir, 'test')
165 |     if exists(processed_data_dir) and isdir(processed_data_dir):
166 |         rmtree(processed_data_dir)
167 | 
168 |     mkdir(processed_data_dir)
169 |     mkdir(train_data_dir)
170 |     mkdir(test_data_dir)
171 | 
172 |     llprint("Saving processed data to disk ... ")
173 | 
174 |     pickle.dump(lexicon_dictionary, open(join(processed_data_dir, 'lexicon-dict.pkl'), 'wb'))
175 | 
176 |     joint_train_data = []
177 | 
178 |     for filename in encoded_files:
179 |         if filename.endswith("test.txt"):
180 |             pickle.dump(encoded_files[filename], open(join(test_data_dir, basename(filename) + '.pkl'), 'wb'))
181 |         elif filename.endswith("train.txt"):
182 |             joint_train_data.extend(encoded_files[filename])
183 | 
184 |     pickle.dump(joint_train_data, open(join(train_data_dir, 'train.pkl'), 'wb'))
185 | 
186 |     llprint("Done!\n")
187 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/dnc/train_babi.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import getopt
  4 | import shutil
  5 | import pickle
  6 | import sys
  7 | import os
  8 | 
  9 | from mem_ops import *
 10 | 
 11 | def llprint(message):
 12 |     sys.stdout.write(message)
 13 |     sys.stdout.flush()
 14 | 
 15 | def load(path):
 16 |     return pickle.load(open(path, 'rb'))
 17 | 
 18 | def onehot(index, size):
 19 |     vec = np.zeros(size, dtype=np.float32)
 20 |     index = int(index)
 21 |     vec[index] = 1.0
 22 |     return vec
 23 | 
 24 | def prepare_sample(sample, target_code, word_space_size):
 25 |     """
 26 |     prepares the input/output sequence of a sample story by encoding it
 27 |     into one-hot vectors and generates the necessary loss weights
 28 |     """
 29 |     input_vec = np.array(sample[0]['inputs'], dtype=np.float32)
 30 |     output_vec = np.array(sample[0]['inputs'], dtype=np.float32)
 31 |     seq_len = input_vec.shape[0]
 32 |     weights_vec = np.zeros(seq_len, dtype=np.float32)
 33 | 
 34 |     target_mask = (input_vec == target_code)
 35 |     output_vec[target_mask] = sample[0]['outputs']
 36 |     weights_vec[target_mask] = 1.0
 37 | 
 38 |     input_vec = np.array([onehot(code, word_space_size) for code in input_vec])
 39 |     output_vec = np.array([onehot(code, word_space_size) for code in output_vec])
 40 | 
 41 |     return (
 42 |         np.reshape(input_vec, (-1, word_space_size)),
 43 |         np.reshape(output_vec, (-1, word_space_size)),
 44 |         seq_len,
 45 |         np.reshape(weights_vec, (-1, 1))
 46 |     )
 47 | 
 48 | task_dir = os.path.dirname(os.path.realpath(__file__))
 49 | llprint("Loading Data ... ")
 50 | lexicon_dict = load(os.path.join(task_dir, "data/babi-en-10k/lexicon-dict.pkl"))
 51 | data = load(os.path.join(task_dir, "data/babi-en-10k/train/train.pkl"))
 52 | llprint("Done!\n")
 53 | 
 54 | # the model parameters
 55 | N = 256; W = 64; R = 4   # memory parameters
 56 | X = Y = 159  # input/output size
 57 | NN = 256  # controller's network output size
 58 | zeta_size = R*W + 3*W + 5*R + 3
 59 | # training parameters
 60 | iterations = 100000
 61 | learning_rate = 1e-4
 62 | momentum = 0.9
 63 | 
 64 | def network(step_input, state):
 65 |     """
 66 |     defines the recurrent neural network operation
 67 |     """
 68 |     global NN
 69 |     step_input = tf.expand_dims(step_input, 0)
 70 |     lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(NN)
 71 | 
 72 |     return lstm_cell(step_input, state)
 73 | 
 74 | # START: Computaional Graph
 75 | graph = tf.Graph()
 76 | with graph.as_default():
 77 |     # optimizer
 78 |     optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum)
 79 | 
 80 |     # placeholders
 81 |     input_data = tf.placeholder(tf.float32, [None, X])
 82 |     target_output = tf.placeholder(tf.float32, [None, Y])
 83 |     loss_weights = tf.placeholder(tf.float32, [None, 1])
 84 |     sequence_length = tf.placeholder(tf.int32)
 85 | 
 86 |     initial_nn_state = tf.nn.rnn_cell.BasicLSTMCell(NN).zero_state(1, tf.float32)
 87 | 
 88 |     empty_unpacked_inputs = tf.TensorArray(tf.float32, sequence_length)
 89 |     unpacked_inputs = empty_unpacked_inputs.unpack(input_data)
 90 |     outputs_container = tf.TensorArray(tf.float32, sequence_length)  # accumelates the step outputs
 91 |     t = tf.constant(0, dtype=tf.int32)
 92 | 
 93 |     def step_op(time, memory_state, controller_state, inputs, outputs):
 94 |         """
 95 |         defines the operation of one step of the sequence
 96 |         """
 97 |         global N, W, R
 98 | 
 99 |         step_input = inputs.read(time)
100 |         M, u, p, L, wr, ww, r = memory_state
101 | 
102 |         with tf.variable_scope('controller'):
103 |             Xt = tf.concat(0, [step_input, tf.reshape(r, [-1])])
104 |             nn_output, nn_state = network(Xt, controller_state)
105 |             std = lambda input_size: np.min(0.01, np.sqrt(2. / input_size))
106 |             W_y = tf.get_variable('W_y', [NN, Y], tf.float32, tf.truncated_normal_initializer(stddev=std(NN)))
107 |             W_zeta = tf.get_variable('W_zeta', [NN, zeta_size], tf.float32, tf.truncated_normal_initializer(stddev=std(NN)))
108 | 
109 |             pre_output = tf.matmul(nn_output, W_y)
110 |             zeta = tf.squeeze(tf.matmul(nn_output, W_zeta))
111 |             kr, br, kw, bw, e, v, f, ga, gw, pi = parse_interface(zeta, N, W, R)
112 | 
113 |             # write head operations
114 |             u_t = ut(u, f, wr, ww)
115 |             a_t = at(u_t, N)
116 |             cw_t = C(M, kw, bw)
117 |             ww_t = wwt(cw_t, a_t, gw, ga)
118 |             M_t = Mt(M, ww_t, e, v)
119 |             L_t = Lt(L, ww_t, p, N)
120 |             p_t = pt(ww_t, p)
121 | 
122 |             # read heads operations
123 |             cr_t = C(M_t, kr, br)
124 |             wr_t = wrt(wr, L_t, cr_t, pi)
125 |             r_t = rt(M_t, wr_t)
126 | 
127 |             W_r = tf.get_variable('W_r', [W*R, Y], tf.float32, tf.truncated_normal_initializer(stddev=std(W*R)))
128 |             flat_rt = tf.reshape(r_t, [-1])
129 |             final_output = pre_output + tf.matmul(tf.expand_dims(flat_rt, 0), W_r)
130 |             updated_outputs = outputs.write(time, tf.squeeze(final_output))
131 | 
132 |             return time + 1, (M_t, u_t, p_t, L_t, wr_t, ww_t, r_t), nn_state, inputs, updated_outputs
133 | 
134 |     _, _, _, _, final_outputs = tf.while_loop(
135 |         cond = lambda time, *_: time < sequence_length,
136 |         body = step_op,
137 |         loop_vars=(t, init_memory(N,W,R), initial_nn_state, unpacked_inputs, outputs_container),
138 |         parallel_iterations=32,
139 |         swap_memory=True
140 |     )
141 | 
142 |     # pack the individual steps outputs into a single (sequence_length x Y) tensor
143 |     packed_output = final_outputs.pack()
144 | 
145 |     loss = tf.reduce_mean(
146 |         loss_weights * tf.nn.softmax_cross_entropy_with_logits(packed_output, target_output)
147 |     )
148 |     gradients = optimizer.compute_gradients(loss)
149 |     # clipping the gradients value to avoid explosion
150 |     for i, (grad, var) in enumerate(gradients):
151 |         if grad is not None:
152 |             gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
153 |     apply_grads = optimizer.apply_gradients(gradients)
154 | # END: Computational Graph
155 | 
156 |     # Reading command line arguments and adapting parameters
157 |     options,_ = getopt.getopt(sys.argv[1:], '', ['iterations='])
158 |     for opt in options:
159 |         iterations = int(opt[1])
160 | 
161 |     with tf.Session(graph=graph) as session:
162 | 
163 |         session.run(tf.initialize_all_variables())
164 | 
165 |         last_100_losses = []
166 |         print ""
167 |         for i in range(iterations):
168 | 
169 |             llprint("\rIteration %d/%d" % (i, iterations))
170 | 
171 |             sample = np.random.choice(data, 1)
172 |             input_seq, target_seq, seq_len, weights = prepare_sample(sample, lexicon_dict['-'], 159)
173 | 
174 |             loss_value,_, = session.run([loss, apply_grads], feed_dict={
175 |                 input_data: input_seq,
176 |                 target_output: target_seq,
177 |                 sequence_length: seq_len,
178 |                 loss_weights: weights
179 |             })
180 | 
181 |             last_100_losses.append(loss_value)
182 |             if i % 100 == 0:
183 |                 print "\n\tAvg. Cross-Entropy Loss: %.6f" % (np.mean(last_100_losses))
184 |                 last_100_losses = []
185 | 
186 |         model_path =  os.path.join(task_dir, 'babi-model')
187 |         if os.path.exists(model_path):
188 |             shutil.rmtree(model_path)
189 |         os.mkdir(model_path)
190 |         tf.train.Saver().save(session, os.path.join(model_path, 'model.ckpt'))
191 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/download_tweets.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import urllib
 5 | import re
 6 | import json
 7 | 
 8 | import socket
 9 | socket.setdefaulttimeout(10)
10 | 
11 | from bs4 import BeautifulSoup
12 | 
13 | cache = {}
14 | 
15 | for line in open(sys.argv[1]):
16 | 	fields = line.rstrip('\n').split('\t')
17 | 	sid = fields[0]
18 | 	uid = fields[1]
19 | 
20 | 	#url = 'http://twitter.com/%s/status/%s' % (uid, sid)
21 | 	#print url
22 | 
23 |         tweet = None
24 | 	text = "Not Available"
25 | 	if cache.has_key(sid):
26 | 		text = cache[sid]
27 | 	else:
28 |                 try:
29 |                         f = urllib.urlopen("http://twitter.com/%s/status/%s" % (uid, sid))
30 |                         #Thanks to Arturo!
31 |                         html = f.read().replace("</html>", "") + "</html>"
32 |                         soup = BeautifulSoup(html)
33 | 
34 | 			jstt   = soup.find_all("p", "js-tweet-text")
35 | 			tweets = list(set([x.get_text() for x in jstt]))
36 | 			#print len(tweets)
37 | 			#print tweets
38 | 			if(len(tweets)) > 1:
39 | 				continue
40 | 
41 | 			text = tweets[0]
42 | 			cache[sid] = tweets[0]
43 | 
44 |                         for j in soup.find_all("input", "json-data", id="init-data"):
45 |                                 js = json.loads(j['value'])
46 |                                 if(js.has_key("embedData")):
47 |                                         tweet = js["embedData"]["status"]
48 |                                         text  = js["embedData"]["status"]["text"]
49 |                                         cache[sid] = text
50 |                                         break
51 |                 except Exception:
52 |                         continue
53 | 
54 |         if(tweet != None and tweet["id_str"] != sid):
55 |                 text = "Not Available"
56 |                 cache[sid] = "Not Available"
57 |         text = text.replace('\n', ' ',)
58 |         text = re.sub(r'\s+', ' ', text)
59 |         #print json.dumps(tweet, indent=2)
60 |         print "\t".join(fields + [text]).encode('utf-8')
61 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/feed_forward_network-[THEANO]/feed_forward_network.py:
--------------------------------------------------------------------------------
  1 | """
  2 | We will use this class to represent a simple feedforward neural 
  3 | network. Here, we'll use this class to crack the MNIST handwritten 
  4 | digit dataset problem, but this class has been constructed so 
  5 | that it can be reappropriated to any use!
  6 | 
  7 | References:
  8 |     - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2
  9 |     - websites: http://deeplearning.net/tutorial, Lisa Lab
 10 | """
 11 | 
 12 | import numpy as np
 13 | import theano.tensor as T 
 14 | import theano
 15 | 
 16 | from hidden_layer import HiddenLayer as HL
 17 | from softmax_layer import SoftmaxLayer as SL
 18 | 
 19 | 
 20 | 
 21 | class FeedForwardNetwork(object):
 22 |     """
 23 |     The feed forward neural network is described mostly input
 24 |     data in the form of a minibatch, a list of hidden layers,
 25 |     and a softmax layer to make predictions. 
 26 |     """
 27 | 
 28 |     def __init__ (self, random_gen, input, input_dim, output_dim, hidden_layer_sizes):
 29 |         """
 30 |         We first initialize the feed forward network with some important
 31 |         information.
 32 | 
 33 |         PARAM random_gen : numpy.random.RandomState
 34 |         A random number generator used to properly initialize the weights
 35 |         of this neural network
 36 | 
 37 |         PARAM input : theano.tensor.TensorType
 38 |         A symbolic variable that we'll use a minibatch of data 
 39 | 
 40 |         PARAM input_dim : int
 41 |         This will represent the number of input neurons in our model (size
 42 |         of a single training example's input vector)
 43 | 
 44 |         PARAM ouptut_dim : int 
 45 |         This will represent the number of neurons in the output layer (i.e. 
 46 |         the number of possible classifications for the input) 
 47 | 
 48 |         Param hidden_layers : List[int]
 49 |         This will represent an ordered list of number of neurons in each
 50 |         hidden layer of our network. The first element corresponds to the
 51 |         first hidden layer and the last element corresponds to the last. 
 52 |         This list cannot be empty
 53 |         """
 54 |         
 55 |         # We'll keep track of these sizes internally in case we need them later
 56 |         self.hidden_layer_sizes = hidden_layer_sizes
 57 | 
 58 |         # Now we'll build all of our hidden layers
 59 |         self.hidden_layers = []
 60 |         for i in xrange(len(hidden_layer_sizes)):
 61 |             if i == 0:
 62 |                 hidden_layer = HL(
 63 |                         input=input,
 64 |                         input_dim=input_dim,
 65 |                         output_dim=hidden_layer_sizes[i],
 66 |                         random_gen=random_gen,
 67 |                     )
 68 |                 self.hidden_layers.append(hidden_layer)
 69 |             else:
 70 |                 hidden_layer = HL(
 71 |                         input=self.hidden_layers[i - 1].output,
 72 |                         input_dim=hidden_layer_sizes[i - 1],
 73 |                         output_dim=hidden_layer_sizes[i],
 74 |                         random_gen=random_gen,
 75 |                     )
 76 | 
 77 |         self.softmax_layer = SL(
 78 |                 input=self.hidden_layers[-1].output,
 79 |                 input_dim=hidden_layer_sizes[-1],
 80 |                 output_dim=output_dim
 81 |             )
 82 | 
 83 |         # Let's grab the output of the softmax layer and use that as our output
 84 |         self.output = self.softmax_layer.output
 85 | 
 86 |         # Now let's look at what our final prediction should be
 87 |         self.predicted = T.argmax(self.output, axis=1)
 88 | 
 89 |     def feed_forward_network_cost(self, y, lambda_l2=0):
 90 |         """
 91 |         Here we express the cost incurred by an example given the correct
 92 |         distribution
 93 | 
 94 |         PARAM y : theano.tensor.TensorType
 95 |         These are the correct answers, and we compute the cost with 
 96 |         respect to this ground truth (over the entire minibatch). This 
 97 |         means that y is of size (minibatch_size, output_dim)
 98 | 
 99 |         PARAM lambda : float
100 |         This is the L2 regularization parameter that we use to penalize large
101 |         values for components of W, thus discouraging potential overfitting
102 |         """
103 |         # Calculate the log probabilities of the softmax output
104 |         log_probabilities = T.log(self.output)
105 | 
106 |         # We use these log probabilities to compute the negative log likelihood
107 |         negative_log_likelihood = -T.mean(log_probabilities[T.arange(y.shape[0]), y])
108 |         
109 |         # Compute the L2 regularization component of the cost function
110 |         hl_squared_sum = (self.hidden_layers[0].W ** 2).sum()
111 |         for hidden_layer in self.hidden_layers[1:]:
112 |             hl_squared_sum += (hidden_layer.W ** 2).sum()
113 | 
114 |         sl_squared_sum = (self.softmax_layer.W ** 2).sum()
115 | 
116 |         l2_regularization = lambda_l2 * (hl_squared_sum + sl_squared_sum)
117 |         
118 |         # Return a symbolic description of the cost function
119 |         return negative_log_likelihood + l2_regularization
120 | 
121 |     def error_rate(self, y):
122 |         """
123 |         Here we return the error rate of the model over a set of given labels
124 |         (perhaps in a minibatch, in the validation set, or the test set)
125 | 
126 |         PARAM y : theano.tensor.TensorType
127 |         These are the correct answers, and we compute the cost with 
128 |         respect to this ground truth (over the entire minibatch). This 
129 |         means that y is of size (minibatch_size, output_dim)
130 |         """
131 | 
132 |         # Make sure y is of the correct dimension 
133 |         assert y.ndim == self.predicted.ndim
134 | 
135 |         # Make sure that y contains values of the correct data type (ints)
136 |         assert y.dtype.startswith('int')
137 | 
138 |         # Return the error rate on the data 
139 |         return T.mean(T.neq(self.predicted, y))
140 | 
141 | 
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/feed_forward_network-[THEANO]/hidden_layer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | We will use this class to represent a tanh hidden layer. 
 3 | This will be a building block for a simplefeedforward neural 
 4 | network.
 5 | 
 6 | References:
 7 |     - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2
 8 |     - websites: http://deeplearning.net/tutorial, Lisa Lab
 9 | """
10 | 
11 | import numpy as np
12 | import theano.tensor as T 
13 | import theano
14 | 
15 | class HiddenLayer(object):
16 |     """
17 |     The hidden layer class is described by two parameters (which
18 |     we will want to learn). The first is a incoming weight matrix. 
19 |     We'll refer to this weight matrix as W. The second is a bias 
20 |     vector b. Refer to the text if you want to learn more about how 
21 |     this layer works. Let's get started!
22 |     """
23 | 
24 |     def __init__(self, input, input_dim, output_dim, random_gen):
25 |         """
26 |         We first initialize the hidden layer object with some important
27 |         information.
28 | 
29 |         PARAM input : theano.tensor.TensorType
30 |         A symbolic variable that we'll use to describe incoming data from
31 |         the previous layer
32 | 
33 |         PARAM input_dim : int
34 |         This will represent the number of neurons in the previous layer
35 | 
36 |         PARAM ouptut_dim : int 
37 |         This will represent the number of neurons in the hidden layer 
38 | 
39 |         PARAM random_gen : numpy.random.RandomState
40 |         A random number generator used to properly initialize the weights. 
41 |         For a tanh activation function, the literature suggests that the
42 |         incoming weights should be sampled from the uniform distribution 
43 |         [-sqrt(6./(input_dim + output_dim)), sqrt(6./(input_dim + output_dim)]
44 |         """
45 | 
46 |         # We initialize the weight matrix W of size (input_dim, output_dim)
47 |         self.W = theano.shared(
48 |                 value=np.asarray(
49 |                         random_gen.uniform(
50 |                                 low=-np.sqrt(6. / (input_dim + output_dim)),
51 |                                 high=np.sqrt(6. / (input_dim + output_dim)),
52 |                                 size=(input_dim, output_dim)
53 |                             ),
54 |                         dtype=theano.config.floatX
55 |                     ),
56 |                 name='W',
57 |                 borrow=True
58 |             )
59 | 
60 |         # We initialize a bias vector for the neurons of the output layer
61 |         self.b = theano.shared(
62 |                 value=np.zeros(output_dim),
63 |                 name='b',
64 |                 borrow='True'
65 |             )
66 | 
67 |         # Symbolic description of the incoming logits
68 |         logit = T.dot(input, self.W) + self.b
69 | 
70 |         # Symbolic description of the outputs of the hidden layer neurons
71 |         self.output = T.tanh(logit)
72 | 
73 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/feed_forward_network-[THEANO]/softmax_layer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | We will use this class to represent a simple softmax layer. 
 3 | This will be a building block for a simplefeedforward neural 
 4 | network.
 5 | 
 6 | References:
 7 |     - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2
 8 |     - websites: http://deeplearning.net/tutorial, Lisa Lab
 9 | """
10 | 
11 | import numpy as np
12 | import theano.tensor as T 
13 | import theano
14 | 
15 | class SoftmaxLayer(object):
16 |     """
17 |     The softmax layer class is described by two parameters (which
18 |     we will want to learn). The first is a incoming weight matrix. 
19 |     We'll refer to this weight matrix as W. The second is a bias 
20 |     vector b. Refer to the text if you want to learn more about how 
21 |     this layer works. Let's get started!
22 |     """
23 | 
24 |     def __init__(self, input, input_dim, output_dim):
25 |         """
26 |         We first initialize the softmax layer object with some important
27 |         information.
28 | 
29 |         PARAM input : theano.tensor.TensorType
30 |         A symbolic variable that we'll use to describe incoming data from
31 |         the previous layer
32 | 
33 |         PARAM input_dim : int
34 |         This will represent the number of neurons in the previous layer
35 | 
36 |         PARAM ouptut_dim : int 
37 |         This will represent the number of neurons in the softmax layer (i.e. 
38 |         the number of possible classifications for the input)
39 |         """
40 | 
41 |         # We initialize the weight matrix W of size (input_dim, output_dim)
42 |         self.W = theano.shared(
43 |                 value=np.zeros((input_dim, output_dim)),
44 |                 name='W',
45 |                 borrow=True
46 |             )
47 | 
48 |         # We initialize a bias vector for the neurons of the output layer
49 |         self.b = theano.shared(
50 |                 value=np.zeros(output_dim),
51 |                 name='b',
52 |                 borrow='True'
53 |             )
54 | 
55 |         # Symbolic description of how to compute class membership probabilities
56 |         self.output = T.nnet.softmax(T.dot(input, self.W) + self.b)
57 | 
58 |         # Symbolic description of the final prediction
59 |         self.predicted = T.argmax(self.output, axis=1)
60 | 
61 | 
62 |         
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/imdb_bn_lstm.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from lstm import BNLSTMCell
  3 | import read_imdb_data as data
  4 | 
  5 | training_epochs = 1000
  6 | batch_size = 32
  7 | display_step = 1
  8 | 
  9 | def embedding_layer(input, weight_shape):
 10 |     weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
 11 |     E = tf.get_variable("E", weight_shape,
 12 |                         initializer=weight_init)
 13 |     # E_exp = tf.expand_dims(E, 0)
 14 |     # E_tiled= tf.tile(E_exp, [32, 1, 1])
 15 |     # return tf.batch_matmul(input, E_exp)
 16 |     incoming = tf.cast(input, tf.int32)
 17 |     embeddings = tf.nn.embedding_lookup(E, incoming)
 18 |     return embeddings
 19 | 
 20 | def lstm(input, hidden_dim, keep_prob, phase_train):
 21 |         lstm = BNLSTMCell(hidden_dim, phase_train)
 22 |         lstm_outputs, state = tf.nn.dynamic_rnn(lstm, input, dtype=tf.float32)
 23 |         return tf.squeeze(tf.slice(lstm_outputs, [0, tf.shape(lstm_outputs)[1]-1, 0], [tf.shape(lstm_outputs)[0], 1, tf.shape(lstm_outputs)[2]]))
 24 | 
 25 | def layer_batch_norm(x, n_out, phase_train):
 26 |     beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32)
 27 |     gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32)
 28 | 
 29 |     beta = tf.get_variable("beta", [n_out], initializer=beta_init)
 30 |     gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init)
 31 | 
 32 |     batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
 33 |     ema = tf.train.ExponentialMovingAverage(decay=0.9)
 34 |     ema_apply_op = ema.apply([batch_mean, batch_var])
 35 |     ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
 36 |     def mean_var_with_update():
 37 |         with tf.control_dependencies([ema_apply_op]):
 38 |             return tf.identity(batch_mean), tf.identity(batch_var)
 39 |     mean, var = tf.cond(phase_train,
 40 |         mean_var_with_update,
 41 |         lambda: (ema_mean, ema_var))
 42 | 
 43 |     reshaped_x = tf.reshape(x, [-1, 1, 1, n_out])
 44 |     normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var,
 45 |         beta, gamma, 1e-3, True)
 46 |     return tf.reshape(normed, [-1, n_out])
 47 | 
 48 | def layer(input, weight_shape, bias_shape, phase_train):
 49 |     weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
 50 |     bias_init = tf.constant_initializer(value=0)
 51 |     W = tf.get_variable("W", weight_shape,
 52 |                         initializer=weight_init)
 53 |     b = tf.get_variable("b", bias_shape,
 54 |                         initializer=bias_init)
 55 |     logits = tf.matmul(input, W) + b
 56 |     return tf.nn.sigmoid(layer_batch_norm(logits, weight_shape[1], phase_train))
 57 | 
 58 | def inference(input, phase_train):
 59 |     embedding = embedding_layer(input, [10000, 128])
 60 |     lstm_output = lstm(embedding, 128, 0.8, phase_train)
 61 |     output = layer(lstm_output, [128, 2], [2], phase_train)
 62 |     return output
 63 | 
 64 | def loss(output, y):
 65 |     xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y)
 66 |     loss = tf.reduce_mean(xentropy)
 67 |     train_loss_summary_op = tf.scalar_summary("train_cost", loss)
 68 |     val_loss_summary_op = tf.scalar_summary("val_cost", loss)
 69 |     return loss, train_loss_summary_op, val_loss_summary_op
 70 | 
 71 | def training(cost, global_step):
 72 |     optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08,
 73 |         use_locking=False, name='Adam')
 74 |     gvs = optimizer.compute_gradients(cost)
 75 |     capped_gvs = [(None if grad is None else tf.clip_by_value(grad, -10., 10.), var) for grad, var in gvs]
 76 |     train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step)
 77 |     return train_op
 78 | 
 79 | def evaluate(output, y):
 80 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 81 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 82 |     accuracy_summary_op = tf.scalar_summary("accuracy", accuracy)
 83 |     return accuracy, accuracy_summary_op
 84 | 
 85 | if __name__ == '__main__':
 86 | 
 87 |     with tf.Graph().as_default():
 88 |         with tf.device('/gpu:0'):
 89 |             x = tf.placeholder("float", [None, 100])
 90 |             y = tf.placeholder("float", [None, 2])
 91 |             phase_train = tf.placeholder(tf.bool)
 92 | 
 93 |             output = inference(x, phase_train)
 94 | 
 95 |             cost, train_loss_summary_op, val_loss_summary_op = loss(output, y)
 96 | 
 97 |             global_step = tf.Variable(0, name='global_step', trainable=False)
 98 | 
 99 |             train_op = training(cost, global_step)
100 | 
101 |             eval_op, eval_summary_op = evaluate(output, y)
102 | 
103 |             saver = tf.train.Saver(max_to_keep=100)
104 | 
105 |             sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))
106 | 
107 |             summary_writer = tf.train.SummaryWriter("imdb_bnlstm_logs/",
108 |                                                 graph=sess.graph)
109 | 
110 |             init_op = tf.initialize_all_variables()
111 | 
112 |             sess.run(init_op)
113 | 
114 |             for epoch in range(training_epochs):
115 | 
116 |                 avg_cost = 0.
117 |                 total_batch = int(data.train.num_examples/batch_size)
118 |                 print "Total of %d minbatches in epoch %d" % (total_batch, epoch)
119 |                 # Loop over all batches
120 |                 for i in range(total_batch):
121 |                     minibatch_x, minibatch_y = data.train.minibatch(batch_size)
122 |                     # Fit training using batch data
123 |                     _, new_cost, train_summary = sess.run([train_op, cost, train_loss_summary_op], feed_dict={x: minibatch_x, y: minibatch_y, phase_train: True})
124 |                     summary_writer.add_summary(train_summary, sess.run(global_step))
125 |                     # Compute average loss
126 |                     avg_cost += new_cost/total_batch
127 |                     print "Training cost for batch %d in epoch %d was:" % (i, epoch), new_cost
128 |                 # Display logs per epoch step
129 |                 if epoch % display_step == 0:
130 |                     print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)
131 |                     val_x, val_y = data.val.minibatch(data.val.num_examples)
132 |                     val_accuracy, val_summary, val_loss_summary = sess.run([eval_op, eval_summary_op, val_loss_summary_op], feed_dict={x: val_x, y: val_y, phase_train: False})
133 |                     summary_writer.add_summary(val_summary, sess.run(global_step))
134 |                     summary_writer.add_summary(val_loss_summary, sess.run(global_step))
135 |                     print "Validation Accuracy:", val_accuracy
136 | 
137 |                     saver.save(sess, "imdb_bnlstm_logs/model-checkpoint-" + '%04d' % (epoch+1), global_step=global_step)
138 | 
139 | 
140 |             print "Optimization Finished!"
141 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/imdb_lstm.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from lstm import LSTMCell
  3 | import read_imdb_data as data
  4 | 
  5 | training_epochs = 1000
  6 | batch_size = 32
  7 | display_step = 1
  8 | 
  9 | def embedding_layer(input, weight_shape):
 10 |     weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
 11 |     E = tf.get_variable("E", weight_shape,
 12 |                         initializer=weight_init)
 13 |     # E_exp = tf.expand_dims(E, 0)
 14 |     # E_tiled= tf.tile(E_exp, [32, 1, 1])
 15 |     # return tf.batch_matmul(input, E_exp)
 16 |     incoming = tf.cast(input, tf.int32)
 17 |     embeddings = tf.nn.embedding_lookup(E, incoming)
 18 |     return embeddings
 19 | 
 20 | def lstm(input, hidden_dim, keep_prob, phase_train):
 21 |         lstm = tf.nn.rnn_cell.BasicLSTMCell(hidden_dim)
 22 |         dropout_lstm = tf.nn.rnn_cell.DropoutWrapper(lstm, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
 23 |         # stacked_lstm = tf.nn.rnn_cell.MultiRNNCell([dropout_lstm] * 2, state_is_tuple=True)
 24 |         lstm_outputs, state = tf.nn.dynamic_rnn(dropout_lstm, input, dtype=tf.float32)
 25 |         #return tf.squeeze(tf.slice(lstm_outputs, [0, tf.shape(lstm_outputs)[1]-1, 0], [tf.shape(lstm_outputs)[0], 1, tf.shape(lstm_outputs)[2]]))
 26 |         return tf.reduce_max(lstm_outputs, reduction_indices=[1])
 27 | 
 28 | def layer_batch_norm(x, n_out, phase_train):
 29 |     beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32)
 30 |     gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32)
 31 | 
 32 |     beta = tf.get_variable("beta", [n_out], initializer=beta_init)
 33 |     gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init)
 34 | 
 35 |     batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
 36 |     ema = tf.train.ExponentialMovingAverage(decay=0.9)
 37 |     ema_apply_op = ema.apply([batch_mean, batch_var])
 38 |     ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
 39 |     def mean_var_with_update():
 40 |         with tf.control_dependencies([ema_apply_op]):
 41 |             return tf.identity(batch_mean), tf.identity(batch_var)
 42 |     mean, var = tf.cond(phase_train,
 43 |         mean_var_with_update,
 44 |         lambda: (ema_mean, ema_var))
 45 | 
 46 |     reshaped_x = tf.reshape(x, [-1, 1, 1, n_out])
 47 |     normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var,
 48 |         beta, gamma, 1e-3, True)
 49 |     return tf.reshape(normed, [-1, n_out])
 50 | 
 51 | def layer(input, weight_shape, bias_shape, phase_train):
 52 |     weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
 53 |     bias_init = tf.constant_initializer(value=0)
 54 |     W = tf.get_variable("W", weight_shape,
 55 |                         initializer=weight_init)
 56 |     b = tf.get_variable("b", bias_shape,
 57 |                         initializer=bias_init)
 58 |     logits = tf.matmul(input, W) + b
 59 |     return tf.nn.sigmoid(layer_batch_norm(logits, weight_shape[1], phase_train))
 60 | 
 61 | def inference(input, phase_train):
 62 |     embedding = embedding_layer(input, [30000, 512])
 63 |     lstm_output = lstm(embedding, 512, 0.5, phase_train)
 64 |     output = layer(lstm_output, [512, 2], [2], phase_train)
 65 |     return output
 66 | 
 67 | def loss(output, y):
 68 |     xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y)
 69 |     loss = tf.reduce_mean(xentropy)
 70 |     train_loss_summary_op = tf.scalar_summary("train_cost", loss)
 71 |     val_loss_summary_op = tf.scalar_summary("val_cost", loss)
 72 |     return loss, train_loss_summary_op, val_loss_summary_op
 73 | 
 74 | def training(cost, global_step):
 75 |     optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08,
 76 |         use_locking=False, name='Adam')
 77 |     train_op = optimizer.minimize(cost, global_step=global_step)
 78 |     return train_op
 79 | 
 80 | def evaluate(output, y):
 81 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 82 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 83 |     accuracy_summary_op = tf.scalar_summary("accuracy", accuracy)
 84 |     return accuracy, accuracy_summary_op
 85 | 
 86 | if __name__ == '__main__':
 87 | 
 88 |     with tf.Graph().as_default():
 89 |         with tf.device('/gpu:0'):
 90 |             x = tf.placeholder("float", [None, 500])
 91 |             y = tf.placeholder("float", [None, 2])
 92 |             phase_train = tf.placeholder(tf.bool)
 93 | 
 94 |             output = inference(x, phase_train)
 95 | 
 96 |             cost, train_loss_summary_op, val_loss_summary_op = loss(output, y)
 97 | 
 98 |             global_step = tf.Variable(0, name='global_step', trainable=False)
 99 | 
100 |             train_op = training(cost, global_step)
101 | 
102 |             eval_op, eval_summary_op = evaluate(output, y)
103 | 
104 |             saver = tf.train.Saver(max_to_keep=100)
105 | 
106 |             sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))
107 | 
108 |             summary_writer = tf.train.SummaryWriter("imdb_lstm_logs/",
109 |                                                 graph=sess.graph)
110 | 
111 |             init_op = tf.initialize_all_variables()
112 | 
113 |             sess.run(init_op)
114 | 
115 |             for epoch in range(training_epochs):
116 | 
117 |                 avg_cost = 0.
118 |                 total_batch = int(data.train.num_examples/batch_size)
119 |                 print "Total of %d minbatches in epoch %d" % (total_batch, epoch)
120 |                 # Loop over all batches
121 |                 for i in range(total_batch):
122 |                     minibatch_x, minibatch_y = data.train.minibatch(batch_size)
123 |                     # Fit training using batch data
124 |                     _, new_cost, train_summary = sess.run([train_op, cost, train_loss_summary_op], feed_dict={x: minibatch_x, y: minibatch_y, phase_train: True})
125 |                     summary_writer.add_summary(train_summary, sess.run(global_step))
126 |                     # Compute average loss
127 |                     avg_cost += new_cost/total_batch
128 |                     print "Training cost for batch %d in epoch %d was:" % (i, epoch), new_cost
129 |                     if i % 100 == 0:
130 |                         print "Epoch:", '%04d' % (epoch+1), "Minibatch:", '%04d' % (i+1), "cost =", "{:.9f}".format((avg_cost * total_batch)/(i+1))
131 |                         val_x, val_y = data.val.minibatch(data.val.num_examples)
132 |                         val_accuracy, val_summary, val_loss_summary = sess.run([eval_op, eval_summary_op, val_loss_summary_op], feed_dict={x: val_x, y: val_y, phase_train: False})
133 |                         summary_writer.add_summary(val_summary, sess.run(global_step))
134 |                         summary_writer.add_summary(val_loss_summary, sess.run(global_step))
135 |                         print "Validation Accuracy:", val_accuracy
136 | 
137 |                         saver.save(sess, "imdb_lstm_logs/model-checkpoint-" + '%04d' % (epoch+1), global_step=global_step)
138 |                 # Display logs per epoch step
139 |                 # if epoch % display_step == 0:
140 |                 #     print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)
141 |                 #     val_x, val_y = data.val.minibatch(data.val.num_examples)
142 |                 #     val_accuracy, val_summary, val_loss_summary = sess.run([eval_op, eval_summary_op, val_loss_summary_op], feed_dict={x: val_x, y: val_y, phase_train: False})
143 |                 #     summary_writer.add_summary(val_summary, sess.run(global_step))
144 |                 #     summary_writer.add_summary(val_loss_summary, sess.run(global_step))
145 |                 #     print "Validation Accuracy:", val_accuracy
146 |                 #
147 |                 #     saver.save(sess, "imdb_lstm_logs/model-checkpoint-" + '%04d' % (epoch+1), global_step=global_step)
148 | 
149 | 
150 |             print "Optimization Finished!"
151 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/input_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Functions for downloading and reading MNIST data."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | import gzip
 20 | import os
 21 | import tensorflow.python.platform
 22 | import numpy
 23 | from six.moves import urllib
 24 | from six.moves import xrange  # pylint: disable=redefined-builtin
 25 | import tensorflow as tf
 26 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
 27 | def maybe_download(filename, work_directory):
 28 |   """Download the data from Yann's website, unless it's already here."""
 29 |   if not os.path.exists(work_directory):
 30 |     os.mkdir(work_directory)
 31 |   filepath = os.path.join(work_directory, filename)
 32 |   if not os.path.exists(filepath):
 33 |     filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)
 34 |     statinfo = os.stat(filepath)
 35 |     print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
 36 |   return filepath
 37 | def _read32(bytestream):
 38 |   dt = numpy.dtype(numpy.uint32).newbyteorder('>')
 39 |   return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
 40 | def extract_images(filename):
 41 |   """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
 42 |   print('Extracting', filename)
 43 |   with gzip.open(filename) as bytestream:
 44 |     magic = _read32(bytestream)
 45 |     if magic != 2051:
 46 |       raise ValueError(
 47 |           'Invalid magic number %d in MNIST image file: %s' %
 48 |           (magic, filename))
 49 |     num_images = _read32(bytestream)
 50 |     rows = _read32(bytestream)
 51 |     cols = _read32(bytestream)
 52 |     buf = bytestream.read(rows * cols * num_images)
 53 |     data = numpy.frombuffer(buf, dtype=numpy.uint8)
 54 |     data = data.reshape(num_images, rows, cols, 1)
 55 |     return data
 56 | def dense_to_one_hot(labels_dense, num_classes=10):
 57 |   """Convert class labels from scalars to one-hot vectors."""
 58 |   num_labels = labels_dense.shape[0]
 59 |   index_offset = numpy.arange(num_labels) * num_classes
 60 |   labels_one_hot = numpy.zeros((num_labels, num_classes))
 61 |   labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
 62 |   return labels_one_hot
 63 | def extract_labels(filename, one_hot=False):
 64 |   """Extract the labels into a 1D uint8 numpy array [index]."""
 65 |   print('Extracting', filename)
 66 |   with gzip.open(filename) as bytestream:
 67 |     magic = _read32(bytestream)
 68 |     if magic != 2049:
 69 |       raise ValueError(
 70 |           'Invalid magic number %d in MNIST label file: %s' %
 71 |           (magic, filename))
 72 |     num_items = _read32(bytestream)
 73 |     buf = bytestream.read(num_items)
 74 |     labels = numpy.frombuffer(buf, dtype=numpy.uint8)
 75 |     if one_hot:
 76 |       return dense_to_one_hot(labels)
 77 |     return labels
 78 | class DataSet(object):
 79 |   def __init__(self, images, labels, fake_data=False, one_hot=False,
 80 |                dtype=tf.float32):
 81 |     """Construct a DataSet.
 82 |     one_hot arg is used only if fake_data is true.  `dtype` can be either
 83 |     `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into
 84 |     `[0, 1]`.
 85 |     """
 86 |     dtype = tf.as_dtype(dtype).base_dtype
 87 |     if dtype not in (tf.uint8, tf.float32):
 88 |       raise TypeError('Invalid image dtype %r, expected uint8 or float32' %
 89 |                       dtype)
 90 |     if fake_data:
 91 |       self._num_examples = 10000
 92 |       self.one_hot = one_hot
 93 |     else:
 94 |       assert images.shape[0] == labels.shape[0], (
 95 |           'images.shape: %s labels.shape: %s' % (images.shape,
 96 |                                                  labels.shape))
 97 |       self._num_examples = images.shape[0]
 98 |       # Convert shape from [num examples, rows, columns, depth]
 99 |       # to [num examples, rows*columns] (assuming depth == 1)
100 |       assert images.shape[3] == 1
101 |       images = images.reshape(images.shape[0],
102 |                               images.shape[1] * images.shape[2])
103 |       if dtype == tf.float32:
104 |         # Convert from [0, 255] -> [0.0, 1.0].
105 |         images = images.astype(numpy.float32)
106 |         images = numpy.multiply(images, 1.0 / 255.0)
107 |     self._images = images
108 |     self._labels = labels
109 |     self._epochs_completed = 0
110 |     self._index_in_epoch = 0
111 |   @property
112 |   def images(self):
113 |     return self._images
114 |   @property
115 |   def labels(self):
116 |     return self._labels
117 |   @property
118 |   def num_examples(self):
119 |     return self._num_examples
120 |   @property
121 |   def epochs_completed(self):
122 |     return self._epochs_completed
123 |   def next_batch(self, batch_size, fake_data=False):
124 |     """Return the next `batch_size` examples from this data set."""
125 |     if fake_data:
126 |       fake_image = [1] * 784
127 |       if self.one_hot:
128 |         fake_label = [1] + [0] * 9
129 |       else:
130 |         fake_label = 0
131 |       return [fake_image for _ in xrange(batch_size)], [
132 |           fake_label for _ in xrange(batch_size)]
133 |     start = self._index_in_epoch
134 |     self._index_in_epoch += batch_size
135 |     if self._index_in_epoch > self._num_examples:
136 |       # Finished epoch
137 |       self._epochs_completed += 1
138 |       # Shuffle the data
139 |       perm = numpy.arange(self._num_examples)
140 |       numpy.random.shuffle(perm)
141 |       self._images = self._images[perm]
142 |       self._labels = self._labels[perm]
143 |       # Start next epoch
144 |       start = 0
145 |       self._index_in_epoch = batch_size
146 |       assert batch_size <= self._num_examples
147 |     end = self._index_in_epoch
148 |     return self._images[start:end], self._labels[start:end]
149 | def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32):
150 |   class DataSets(object):
151 |     pass
152 |   data_sets = DataSets()
153 |   if fake_data:
154 |     def fake():
155 |       return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)
156 |     data_sets.train = fake()
157 |     data_sets.validation = fake()
158 |     data_sets.test = fake()
159 |     return data_sets
160 |   TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
161 |   TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
162 |   TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
163 |   TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
164 |   VALIDATION_SIZE = 5000
165 |   local_file = maybe_download(TRAIN_IMAGES, train_dir)
166 |   train_images = extract_images(local_file)
167 |   local_file = maybe_download(TRAIN_LABELS, train_dir)
168 |   train_labels = extract_labels(local_file, one_hot=one_hot)
169 |   local_file = maybe_download(TEST_IMAGES, train_dir)
170 |   test_images = extract_images(local_file)
171 |   local_file = maybe_download(TEST_LABELS, train_dir)
172 |   test_labels = extract_labels(local_file, one_hot=one_hot)
173 |   validation_images = train_images[:VALIDATION_SIZE]
174 |   validation_labels = train_labels[:VALIDATION_SIZE]
175 |   train_images = train_images[VALIDATION_SIZE:]
176 |   train_labels = train_labels[VALIDATION_SIZE:]
177 |   data_sets.train = DataSet(train_images, train_labels, dtype=dtype)
178 |   data_sets.validation = DataSet(validation_images, validation_labels,
179 |                                  dtype=dtype)
180 |   data_sets.test = DataSet(test_images, test_labels, dtype=dtype)
181 |   return data_sets


--------------------------------------------------------------------------------
/first_edition_archive/archive/input_word_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import collections
 21 | import math
 22 | import os
 23 | import random
 24 | import zipfile
 25 | 
 26 | import numpy as np
 27 | from six.moves import urllib
 28 | from six.moves import xrange  # pylint: disable=redefined-builtin
 29 | import tensorflow as tf
 30 | 
 31 | # Step 1: Download the data.
 32 | url = 'http://mattmahoney.net/dc/'
 33 | 
 34 | def maybe_download(filename, expected_bytes):
 35 |   """Download a file if not present, and make sure it's the right size."""
 36 |   if not os.path.exists(filename):
 37 |     filename, _ = urllib.request.urlretrieve(url + filename, filename)
 38 |   statinfo = os.stat(filename)
 39 |   if statinfo.st_size == expected_bytes:
 40 |     print('Found and verified', filename)
 41 |   else:
 42 |     print(statinfo.st_size)
 43 |     raise Exception(
 44 |         'Failed to verify ' + filename + '. Can you get to it with a browser?')
 45 |   return filename
 46 | 
 47 | filename = maybe_download('text8.zip', 31344016)
 48 | 
 49 | 
 50 | # Read the data into a list of strings.
 51 | def read_data(filename):
 52 |   """Extract the first file enclosed in a zip file as a list of words"""
 53 |   with zipfile.ZipFile(filename) as f:
 54 |     data = tf.compat.as_str(f.read(f.namelist()[0])).split()
 55 |   return data
 56 | 
 57 | words = read_data(filename)
 58 | data_size = len(words)
 59 | print('Data size', data_size)
 60 | 
 61 | # Step 2: Build the dictionary and replace rare words with UNK token.
 62 | vocabulary_size = 10000
 63 | 
 64 | def build_dataset(words):
 65 |   count = [['UNK', -1]]
 66 |   count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
 67 |   dictionary = dict()
 68 |   for word, _ in count:
 69 |     dictionary[word] = len(dictionary)
 70 |   data = list()
 71 |   unk_count = 0
 72 |   for word in words:
 73 |     if word in dictionary:
 74 |       index = dictionary[word]
 75 |     else:
 76 |       index = 0  # dictionary['UNK']
 77 |       unk_count += 1
 78 |     data.append(index)
 79 |   count[0][1] = unk_count
 80 |   reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
 81 |   return data, count, dictionary, reverse_dictionary
 82 | 
 83 | data, count, dictionary, reverse_dictionary = build_dataset(words)
 84 | del words  # Hint to reduce memory.
 85 | print('Most common words (+UNK)', count[:5])
 86 | print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
 87 | 
 88 | data_index = 0
 89 | 
 90 | 
 91 | # Step 3: Function to generate a training batch for the skip-gram model.
 92 | def generate_batch(batch_size, num_skips, skip_window):
 93 |   global data_index
 94 |   assert batch_size % num_skips == 0
 95 |   assert num_skips <= 2 * skip_window
 96 |   batch = np.ndarray(shape=(batch_size), dtype=np.int32)
 97 |   labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
 98 |   span = 2 * skip_window + 1 # [ skip_window target skip_window ]
 99 |   buffer = collections.deque(maxlen=span)
100 |   for _ in range(span):
101 |     buffer.append(data[data_index])
102 |     data_index = (data_index + 1) % len(data)
103 |   for i in range(batch_size // num_skips):
104 |     target = skip_window  # target label at the center of the buffer
105 |     targets_to_avoid = [ skip_window ]
106 |     for j in range(num_skips):
107 |       while target in targets_to_avoid:
108 |         target = random.randint(0, span - 1)
109 |       targets_to_avoid.append(target)
110 |       batch[i * num_skips + j] = buffer[skip_window]
111 |       labels[i * num_skips + j, 0] = buffer[target]
112 |     buffer.append(data[data_index])
113 |     data_index = (data_index + 1) % len(data)
114 |   return batch, labels
115 | 
116 | batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
117 | for i in range(8):
118 |   print(batch[i], reverse_dictionary[batch[i]],
119 |       '->', labels[i, 0], reverse_dictionary[labels[i, 0]])
120 |       
121 | def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
122 |     assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
123 |     plt.figure(figsize=(18, 18))  #in inches
124 |     for i, label in enumerate(labels):
125 |         x, y = low_dim_embs[i,:]
126 |         plt.scatter(x, y)
127 |         plt.annotate(label,
128 |                      xy=(x, y),
129 |                      xytext=(5, 2),
130 |                      textcoords='offset points',
131 |                      ha='right',
132 |                      va='bottom')
133 | 
134 |     plt.savefig(filename)
135 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/linear_interpolation.py:
--------------------------------------------------------------------------------
 1 | from fdl_examples.datatools import input_data
 2 | mnist = input_data.read_data_sets("data/", one_hot=True)
 3 | 
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | from fdl_examples.chapter3.multilayer_perceptron_updated import inference, loss
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | 
10 | sess = tf.Session()
11 | 
12 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
13 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
14 | 
15 | with tf.variable_scope("mlp_model") as scope:
16 | 
17 | 	output_opt = inference(x)
18 | 	cost_opt = loss(output_opt, y)
19 | 
20 | 	saver = tf.train.Saver()
21 | 
22 | 	scope.reuse_variables()
23 | 
24 | 	var_list_opt = ["hidden_1/W", "hidden_1/b", "hidden_2/W", "hidden_2/b", "output/W", "output/b"]
25 | 	var_list_opt = [tf.get_variable(v) for v in var_list_opt]
26 | 
27 | 	saver.restore(sess, "model-checkpoint-547800")
28 | 
29 | 
30 | with tf.variable_scope("mlp_init") as scope:
31 | 
32 | 	output_rand = inference(x)
33 | 	cost_rand = loss(output_rand, y)
34 | 
35 | 	scope.reuse_variables()
36 | 
37 | 	var_list_rand = ["hidden_1/W", "hidden_1/b", "hidden_2/W", "hidden_2/b", "output/W", "output/b"]
38 | 	var_list_rand = [tf.get_variable(v) for v in var_list_rand]
39 | 
40 | 	init_op = tf.initialize_variables(var_list_rand)
41 | 
42 | 	sess.run(init_op)
43 | 
44 | 
45 | feed_dict = {
46 | 	x: mnist.test.images,
47 | 	y: mnist.test.labels,
48 | }
49 | 
50 | print(sess.run([cost_opt, cost_rand], feed_dict=feed_dict))
51 | 
52 | with tf.variable_scope("mlp_inter") as scope:
53 | 
54 | 	alpha = tf.placeholder("float", [1, 1])
55 | 
56 | 	h1_W_inter = var_list_opt[0] * (1 - alpha) + var_list_rand[0] * (alpha)
57 | 	h1_b_inter = var_list_opt[1] * (1 - alpha) + var_list_rand[1] * (alpha)
58 | 	h2_W_inter = var_list_opt[2] * (1 - alpha) + var_list_rand[2] * (alpha)
59 | 	h2_b_inter = var_list_opt[3] * (1 - alpha) + var_list_rand[3] * (alpha)
60 | 	o_W_inter = var_list_opt[4] * (1 - alpha) + var_list_rand[4] * (alpha)
61 | 	o_b_inter = var_list_opt[5] * (1 - alpha) + var_list_rand[5] * (alpha)
62 | 
63 | 	h1_inter = tf.nn.relu(tf.matmul(x, h1_W_inter) + h1_b_inter)
64 | 	h2_inter = tf.nn.relu(tf.matmul(h1_inter, h2_W_inter) + h2_b_inter)
65 | 	o_inter = tf.nn.relu(tf.matmul(h2_inter, o_W_inter) + o_b_inter)
66 | 
67 | 	cost_inter = loss(o_inter, y)
68 | 	tf.scalar_summary("interpolated_cost", cost_inter)
69 | 
70 | 
71 | summary_writer = tf.train.SummaryWriter("linear_interp_logs/",
72 |                                         graph_def=sess.graph_def)
73 | summary_op = tf.merge_all_summaries()
74 | results = []
75 | for a in np.arange(-2, 2, 0.01):
76 | 	feed_dict = {
77 | 		x: mnist.test.images,
78 | 		y: mnist.test.labels,
79 | 		alpha: [[a]],
80 | 	}
81 | 
82 | 	cost, summary_str = sess.run([cost_inter, summary_op], feed_dict=feed_dict)
83 | 	summary_writer.add_summary(summary_str, (a + 2)/0.01)
84 | 	results.append(cost)
85 | 
86 | plt.plot(np.arange(-2, 2, 0.01), results, 'ro')
87 | plt.ylabel('Incurred Error')
88 | plt.xlabel('Alpha')
89 | plt.show()
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/logistic_network-[THEANO]/logistic_network.py:
--------------------------------------------------------------------------------
  1 | """
  2 | We will use this class to represent a simple logistic regression
  3 | classifier. We'll represent this in Theano as a neural network 
  4 | with no hidden layers. This is our first attempt at building a 
  5 | neural network model to solve interesting problems. Here, we'll
  6 | use this class to crack the MNIST handwritten digit dataset problem,
  7 | but this class has been constructed so that it can be reappropriated
  8 | to any use!
  9 | 
 10 | References:
 11 |     - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2
 12 |     - websites: http://deeplearning.net/tutorial, Lisa Lab
 13 | """
 14 | 
 15 | import numpy as np
 16 | import theano.tensor as T 
 17 | import theano
 18 | 
 19 | class LogisticNetwork(object):
 20 |     """
 21 |     The logistic regression class is described by two parameters (which
 22 |     we will want to learn). The first is a weight matrix. We'll refer to
 23 |     this weight matrix as W. The second is a bias vector b. Refer to the 
 24 |     text if you want to learn more about how this network works. Let's get
 25 |     started!
 26 |     """
 27 | 
 28 |     def __init__(self, input, input_dim, output_dim):
 29 |         """
 30 |         We first initialize the logistic network object with some important
 31 |         information.
 32 | 
 33 |         PARAM input : theano.tensor.TensorType
 34 |         A symbolic variable that we'll use to represent one minibatch of our
 35 |         dataset
 36 | 
 37 |         PARAM input_dim : int
 38 |         This will represent the number of input neurons in our model
 39 | 
 40 |         PARAM ouptut_dim : int 
 41 |         This will represent the number of neurons in the output layer (i.e. 
 42 |         the number of possible classifications for the input)
 43 |         """
 44 | 
 45 |         # We initialize the weight matrix W of size (input_dim, output_dim)
 46 |         self.W = theano.shared(
 47 |                 value=np.zeros((input_dim, output_dim)),
 48 |                 name='W',
 49 |                 borrow=True
 50 |             )
 51 | 
 52 |         # We initialize a bias vector for the neurons of the output layer
 53 |         self.b = theano.shared(
 54 |                 value=np.zeros(output_dim),
 55 |                 name='b',
 56 |                 borrow='True'
 57 |             )
 58 | 
 59 |         # Symbolic description of how to compute class membership probabilities
 60 |         self.output = T.nnet.softmax(T.dot(input, self.W) + self.b)
 61 | 
 62 |         # Symbolic description of the final prediction
 63 |         self.predicted = T.argmax(self.output, axis=1)
 64 | 
 65 |     def logistic_network_cost(self, y, lambda_l2=0):
 66 |         """
 67 |         Here we express the cost incurred by an example given the correct
 68 |         distribution
 69 | 
 70 |         PARAM y : theano.tensor.TensorType
 71 |         These are the correct answers, and we compute the cost with 
 72 |         respect to this ground truth (over the entire minibatch). This 
 73 |         means that y is of size (minibatch_size, output_dim)
 74 | 
 75 |         PARAM lambda : float
 76 |         This is the L2 regularization parameter that we use to penalize large
 77 |         values for components of W, thus discouraging potential overfitting
 78 |         """
 79 |         # Calculate the log probabilities of the softmax output
 80 |         log_probabilities = T.log(self.output)
 81 | 
 82 |         # We use these log probabilities to compute the negative log likelihood
 83 |         negative_log_likelihood = -T.mean(log_probabilities[T.arange(y.shape[0]), y])
 84 |         
 85 |         # Compute the L2 regularization component of the cost function
 86 |         l2_regularization = lambda_l2 * (self.W ** 2).sum()
 87 |         
 88 |         # Return a symbolic description of the cost function
 89 |         return negative_log_likelihood + l2_regularization
 90 | 
 91 |     def error_rate(self, y):
 92 |         """
 93 |         Here we return the error rate of the model over a set of given labels
 94 |         (perhaps in a minibatch, in the validation set, or the test set)
 95 | 
 96 |         PARAM y : theano.tensor.TensorType
 97 |         These are the correct answers, and we compute the cost with 
 98 |         respect to this ground truth (over the entire minibatch). This 
 99 |         means that y is of size (minibatch_size, output_dim)
100 |         """
101 | 
102 |         # Make sure y is of the correct dimension 
103 |         assert y.ndim == self.predicted.ndim
104 | 
105 |         # Make sure that y contains values of the correct data type (ints)
106 |         assert y.dtype.startswith('int')
107 | 
108 |         # Return the error rate on the data 
109 |         return T.mean(T.neq(self.predicted, y))
110 | 
111 |         
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/logistic_network-[THEANO]/mnist_logistic_sgd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | We'll now use the LogisticNetwork object we built in logistic_network.py in 
  3 | order to tackle the MNIST dataset challenge. We will use minibatch gradient
  4 | descent to train this simplistic network model. 
  5 | 
  6 | References:
  7 |     - textbooks: "Pattern Recognition and Machine Learning", Christopher M. Bishop, section 4.3.2
  8 |     - websites: http://deeplearning.net/tutorial, Lisa Lab
  9 | """
 10 | 
 11 | __docformat__ = 'restructedtext en'
 12 | 
 13 | import cPickle
 14 | import gzip
 15 | import os
 16 | import time
 17 | import urllib
 18 | from theano import function, shared, config
 19 | import theano.tensor as T 
 20 | import numpy as np
 21 | import logistic_network
 22 | 
 23 | 
 24 | # Let's start off by defining some constants
 25 | # EXPERIMENT!!! Play around the the learning rate!
 26 | LEARNING_RATE = 0.2
 27 | N_EPOCHS = 1000
 28 | DATASET = 'mnist.pkl.gz'
 29 | BATCH_SIZE = 600
 30 | 
 31 | # Time to check if we have the data and if we don't, let's download it 
 32 | print "... LOADING DATA ..." 
 33 | 
 34 | data_path = os.path.join(
 35 |         os.path.split(__file__)[0],
 36 |         "..",
 37 |         "data",
 38 |         DATASET
 39 |     )
 40 | 
 41 | if (not os.path.isfile(data_path)):
 42 |     import urllib
 43 |     origin = (
 44 |             'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
 45 |         )
 46 |     print 'Downloading data from %s' % origin
 47 |     urllib.urlretrieve(origin, data_path)
 48 | 
 49 | # Time to build our models
 50 | print "... BUILDING MODEL ..."
 51 | 
 52 | # Load the dataset
 53 | data_file = gzip.open(data_path, 'rb')
 54 | training_set, validation_set, test_set = cPickle.load(data_file)
 55 | data_file.close()
 56 | 
 57 | # Define a quick function to established a shared dataset (for efficiency)
 58 | 
 59 | def shared_dataset(data_xy):
 60 |     """
 61 |     We store the data in a shared variable because it allows Theano to copy it
 62 |     into GPU memory (if GPU utilization is enabled). By default, if a variable is
 63 |     not shared, it is moved to GPU at every use. This results in a big performance
 64 |     hit because that means the data will be copied one minibatch at a time. Instead,
 65 |     if we use shared variables, we don't have to worry about copying data 
 66 |     repeatedly.
 67 |     """
 68 | 
 69 |     data_x, data_y = data_xy
 70 |     shared_x = shared(np.asarray(data_x, dtype=config.floatX), borrow=True)
 71 |     shared_y = shared(np.asarray(data_y, dtype='int32'), borrow=True)
 72 |     return shared_x, shared_y
 73 | 
 74 | # We now instantiate the shared datasets
 75 | training_set_x , training_set_y = shared_dataset(training_set)
 76 | validation_set_x, validation_set_y = shared_dataset(validation_set)
 77 | test_set_x, test_set_y = shared_dataset(test_set) 
 78 | 
 79 | # Lets compute the number of minibatches for training, validation, and testing
 80 | n_training_batches = training_set_x.get_value(borrow=True).shape[0] / BATCH_SIZE
 81 | n_validation_batches = validation_set_x.get_value(borrow=True).shape[0] / BATCH_SIZE
 82 | n_test_batches = test_set_x.get_value(borrow=True).shape[0] / BATCH_SIZE
 83 | 
 84 | # Now it's time for us to build the model! 
 85 | #Let's start of with an index to the minibatch we're using
 86 | index = T.lscalar() 
 87 | 
 88 | # Generate symbolic variables for the input (a minibatch)
 89 | x = T.dmatrix('x')
 90 | y = T.ivector('y')
 91 | 
 92 | # Construct the logistic network model
 93 | # Keep in mind MNIST image is of size (28, 28)
 94 | # Also number of output class is is 10 (digits 0, 1, ..., 9)
 95 | model = logistic_network.LogisticNetwork(input=x, input_dim=28*28, output_dim=10)
 96 | 
 97 | # Obtain a symbolic expression for the objective function
 98 | # EXPERIMENT!!! Play around with L2 regression parameter!
 99 | objective = model.logistic_network_cost(y, lambda_l2=0.0001)
100 | 
101 | # Obtain a symbolic expression for the error incurred
102 | error = model.error_rate(y)
103 | 
104 | # Compute symbolic gradients of objective with respect to model parameters
105 | dW, db = T.grad(objective, model.W), T.grad(objective, model.b)
106 | 
107 | # Compile theano function for training with a minibatch
108 | train_model = function(
109 |         inputs=[index],
110 |         outputs=objective, 
111 |         updates=[
112 |             (model.W, model.W - LEARNING_RATE * dW),
113 |             (model.b, model.b - LEARNING_RATE * db)
114 |         ],
115 |         givens={
116 |             x : training_set_x[index * BATCH_SIZE : (index + 1) * BATCH_SIZE],
117 |             y : training_set_y[index * BATCH_SIZE : (index + 1) * BATCH_SIZE]
118 |         }
119 |     )
120 | 
121 | # Compile theano functions for validation and testing
122 | validate_model = function(
123 |         inputs=[index],
124 |         outputs=error,
125 |         givens={
126 |             x : validation_set_x[index * BATCH_SIZE : (index + 1) * BATCH_SIZE],
127 |             y : validation_set_y[index * BATCH_SIZE : (index + 1) * BATCH_SIZE]
128 |         }
129 |     )
130 | 
131 | test_model = function(
132 |         inputs=[index],
133 |         outputs=error,
134 |         givens={
135 |             x : test_set_x[index * BATCH_SIZE : (index + 1) * BATCH_SIZE],
136 |             y : test_set_y[index * BATCH_SIZE : (index + 1) * BATCH_SIZE]
137 |         }
138 |     )
139 | 
140 | 
141 | # Let's set up the early stopping parameters (based on the validation set)
142 | 
143 | # Must look at this many examples no matter what
144 | patience = 5000
145 | 
146 | # Wait this much longer if a new best is found                     
147 | patience_increase = 2
148 | 
149 | # This is when an improvement is significant
150 | improvement_threshold = 0.995
151 | 
152 | # We go through this number of minbatches before we check on the validation set
153 | validation_freq = min(n_training_batches, patience / 2)
154 | 
155 | # We keep of the best loss on the validation set here
156 | best_loss = np.inf
157 | 
158 | # We also keep track of the epoch we are in
159 | epoch = 0
160 | 
161 | # A boolean flag that propagates when patience has been exceeded
162 | exceeded_patience = False
163 | 
164 | # Now we're ready to start training the model
165 | print "... TRAINING MODEL ..."
166 | start_time = time.clock()
167 | while (epoch < N_EPOCHS) and not exceeded_patience:
168 |     epoch = epoch + 1
169 |     for minibatch_index in xrange(n_training_batches):
170 |         minibatch_objective = train_model(minibatch_index)
171 |         iteration = (epoch - 1) * n_training_batches + minibatch_index
172 | 
173 |         if (iteration + 1) % validation_freq == 0:
174 |             # Compute loss on validation set
175 |             validation_losses = [validate_model(i) for i in xrange(n_validation_batches)]
176 |             validation_loss = np.mean(validation_losses)
177 | 
178 |             print 'epoch %i, minibatch %i/%i, validation error: %f %%' % (
179 |                     epoch,
180 |                     minibatch_index + 1,
181 |                     n_training_batches,
182 |                     validation_loss * 100
183 |                 )
184 | 
185 |             if validation_loss < best_loss:
186 |                 if validation_loss < best_loss * improvement_threshold:
187 |                     patience = max(patience, iteration * patience_increase)
188 |                 best_loss = validation_loss
189 | 
190 |         if patience <= iteration:
191 |             exceeded_patience = True
192 |             break
193 | end_time = time.clock()
194 | 
195 | # Let's compute how well we do on the test set
196 | test_losses = [test_model(i) for i in xrange(n_test_batches)]
197 | test_loss = np.mean(test_losses)
198 | 
199 | # Print out the results!
200 | print '\n'
201 | print 'Optimization complete with best validation score of %f %%' % (best_loss * 100)
202 | print 'And with a test score of %f %%' % (test_loss * 100)
203 | print '\n'
204 | print 'The code ran for %d epochs and for a total time of %.1f seconds' % (epoch, end_time - start_time)
205 | print '\n'
206 | 
207 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/logistic_regression.py:
--------------------------------------------------------------------------------
  1 | import input_data
  2 | mnist = input_data.read_data_sets("data/", one_hot=True)
  3 | 
  4 | import tensorflow as tf
  5 | import time
  6 | 
  7 | 
  8 | # Parameters
  9 | learning_rate = 0.01
 10 | training_epochs = 60
 11 | batch_size = 100
 12 | display_step = 1
 13 | 
 14 | def inference(x):
 15 |     init = tf.constant_initializer(value=0)
 16 |     W = tf.get_variable("W", [784, 10],
 17 |                          initializer=init)
 18 |     b = tf.get_variable("b", [10],
 19 |                          initializer=init)
 20 |     output = tf.nn.softmax(tf.matmul(x, W) + b)
 21 | 
 22 |     w_hist = tf.histogram_summary("weights", W)
 23 |     b_hist = tf.histogram_summary("biases", b)
 24 |     y_hist = tf.histogram_summary("output", output)
 25 | 
 26 |     return output
 27 | 
 28 | def loss(output, y):
 29 |     dot_product = y * tf.log(output)
 30 | 
 31 |     # Reduction along axis 0 collapses each column into a single
 32 |     # value, whereas reduction along axis 1 collapses each row 
 33 |     # into a single value. In general, reduction along axis i 
 34 |     # collapses the ith dimension of a tensor to size 1.
 35 |     xentropy = -tf.reduce_sum(dot_product, reduction_indices=1)
 36 |      
 37 |     loss = tf.reduce_mean(xentropy)
 38 | 
 39 |     return loss
 40 | 
 41 | def training(cost, global_step):
 42 | 
 43 |     tf.scalar_summary("cost", cost)
 44 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 45 |     train_op = optimizer.minimize(cost, global_step=global_step)
 46 | 
 47 |     return train_op
 48 | 
 49 | 
 50 | def evaluate(output, y):
 51 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 52 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 53 | 
 54 |     tf.scalar_summary("validation error", (1.0 - accuracy))
 55 | 
 56 |     return accuracy
 57 | 
 58 | if __name__ == '__main__':
 59 | 
 60 |     with tf.Graph().as_default():
 61 | 
 62 |         x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
 63 |         y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
 64 | 
 65 | 
 66 |         output = inference(x)
 67 | 
 68 |         cost = loss(output, y)
 69 | 
 70 |         global_step = tf.Variable(0, name='global_step', trainable=False)
 71 | 
 72 |         train_op = training(cost, global_step)
 73 | 
 74 |         eval_op = evaluate(output, y)
 75 | 
 76 |         summary_op = tf.merge_all_summaries()
 77 | 
 78 |         saver = tf.train.Saver()
 79 | 
 80 |         sess = tf.Session()
 81 | 
 82 |         summary_writer = tf.train.SummaryWriter("logistic_logs/",
 83 |                                             graph_def=sess.graph_def)
 84 | 
 85 |         
 86 |         init_op = tf.initialize_all_variables()
 87 | 
 88 |         sess.run(init_op)
 89 | 
 90 | 
 91 |         # Training cycle
 92 |         for epoch in range(training_epochs):
 93 | 
 94 |             avg_cost = 0.
 95 |             total_batch = int(mnist.train.num_examples/batch_size)
 96 |             # Loop over all batches
 97 |             for i in range(total_batch):
 98 |                 minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
 99 |                 # Fit training using batch data
100 |                 sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})
101 |                 # Compute average loss
102 |                 avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch
103 |             # Display logs per epoch step
104 |             if epoch % display_step == 0:
105 |                 print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost))
106 | 
107 |                 accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels})
108 | 
109 |                 print("Validation Error:", (1 - accuracy))
110 | 
111 |                 summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
112 |                 summary_writer.add_summary(summary_str, sess.run(global_step))
113 | 
114 |                 saver.save(sess, "logistic_logs/model-checkpoint", global_step=global_step)
115 | 
116 | 
117 |         print("Optimization Finished!")
118 | 
119 | 
120 |         accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels})
121 | 
122 |         print("Test Accuracy:", accuracy)
123 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/lstm.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from tensorflow.python.ops.rnn_cell import RNNCell
  5 | 
  6 | class LSTMCell(RNNCell):
  7 |     '''Vanilla LSTM implemented with same initializations as BN-LSTM'''
  8 |     def __init__(self, num_units):
  9 |         self.num_units = num_units
 10 | 
 11 |     @property
 12 |     def state_size(self):
 13 |         return (self.num_units, self.num_units)
 14 | 
 15 |     @property
 16 |     def output_size(self):
 17 |         return self.num_units
 18 | 
 19 |     def __call__(self, x, state, scope=None):
 20 |         with tf.variable_scope(scope or type(self).__name__):
 21 |             c, h = state
 22 | 
 23 |             # Keep W_xh and W_hh separate here as well to reuse initialization methods
 24 |             x_size = x.get_shape().as_list()[1]
 25 |             print x.get_shape().as_list()
 26 |             W_xh = tf.get_variable('W_xh',
 27 |                 [x_size, 4 * self.num_units],
 28 |                 initializer=orthogonal_initializer())
 29 |             W_hh = tf.get_variable('W_hh',
 30 |                 [self.num_units, 4 * self.num_units],
 31 |                 initializer=bn_lstm_identity_initializer(0.95))
 32 |             bias = tf.get_variable('bias', [4 * self.num_units])
 33 | 
 34 |             # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias
 35 |             # improve speed by concat.
 36 |             concat = tf.concat(1, [x, h])
 37 |             W_both = tf.concat(0, [W_xh, W_hh])
 38 |             hidden = tf.matmul(concat, W_both) + bias
 39 | 
 40 |             i, j, f, o = tf.split(1, 4, hidden)
 41 | 
 42 |             new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
 43 |             new_h = tf.tanh(new_c) * tf.sigmoid(o)
 44 | 
 45 |             return new_h, (new_c, new_h)
 46 | 
 47 | class BNLSTMCell(RNNCell):
 48 |     '''Batch normalized LSTM as described in arxiv.org/abs/1603.09025'''
 49 |     def __init__(self, num_units, training):
 50 |         self.num_units = num_units
 51 |         self.training = training
 52 | 
 53 |     @property
 54 |     def state_size(self):
 55 |         return (self.num_units, self.num_units)
 56 | 
 57 |     @property
 58 |     def output_size(self):
 59 |         return self.num_units
 60 | 
 61 |     def __call__(self, x, state, scope=None):
 62 |         with tf.variable_scope(scope or type(self).__name__):
 63 |             c, h = state
 64 | 
 65 |             x_size = x.get_shape().as_list()[1]
 66 |             W_xh = tf.get_variable('W_xh',
 67 |                 [x_size, 4 * self.num_units],
 68 |                 initializer=orthogonal_initializer())
 69 |             W_hh = tf.get_variable('W_hh',
 70 |                 [self.num_units, 4 * self.num_units],
 71 |                 initializer=bn_lstm_identity_initializer(0.95))
 72 |             bias = tf.get_variable('bias', [4 * self.num_units])
 73 | 
 74 |             xh = tf.matmul(x, W_xh)
 75 |             hh = tf.matmul(h, W_hh)
 76 | 
 77 |             bn_xh = batch_norm(xh, 'xh', self.training)
 78 |             bn_hh = batch_norm(hh, 'hh', self.training)
 79 | 
 80 |             hidden = bn_xh + bn_hh + bias
 81 | 
 82 |             i, j, f, o = tf.split(1, 4, hidden)
 83 | 
 84 |             new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
 85 |             bn_new_c = batch_norm(new_c, 'c', self.training)
 86 | 
 87 |             new_h = tf.tanh(bn_new_c) * tf.sigmoid(o)
 88 | 
 89 |             return new_h, (new_c, new_h)
 90 | 
 91 | def orthogonal(shape):
 92 |     flat_shape = (shape[0], np.prod(shape[1:]))
 93 |     a = np.random.normal(0.0, 1.0, flat_shape)
 94 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 95 |     q = u if u.shape == flat_shape else v
 96 |     return q.reshape(shape)
 97 | 
 98 | def bn_lstm_identity_initializer(scale):
 99 |     def _initializer(shape, dtype=tf.float32, partition_info=None):
100 |         '''Ugly cause LSTM params calculated in one matrix multiply'''
101 |         size = shape[0]
102 |         # gate (j) is identity
103 |         t = np.zeros(shape)
104 |         t[:, size:size * 2] = np.identity(size) * scale
105 |         t[:, :size] = orthogonal([size, size])
106 |         t[:, size * 2:size * 3] = orthogonal([size, size])
107 |         t[:, size * 3:] = orthogonal([size, size])
108 |         return tf.constant(t, dtype)
109 | 
110 |     return _initializer
111 | 
112 | def orthogonal_initializer():
113 |     def _initializer(shape, dtype=tf.float32, partition_info=None):
114 |         return tf.constant(orthogonal(shape), dtype)
115 |     return _initializer
116 | 
117 | def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999):
118 |     '''Assume 2d [batch, values] tensor'''
119 | 
120 |     with tf.variable_scope(name_scope):
121 |         size = x.get_shape().as_list()[1]
122 | 
123 |         scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1))
124 |         offset = tf.get_variable('offset', [size])
125 | 
126 |         pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer, trainable=False)
127 |         pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer, trainable=False)
128 |         batch_mean, batch_var = tf.nn.moments(x, [0])
129 | 
130 |         train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
131 |         train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
132 | 
133 |         def batch_statistics():
134 |             with tf.control_dependencies([train_mean_op, train_var_op]):
135 |                 return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon)
136 | 
137 |         def population_statistics():
138 |             return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon)
139 | 
140 |         return tf.cond(training, batch_statistics, population_statistics)
141 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/multilayer_perceptron.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "'''\n",
 12 |     "A Multilayer Perceptron implementation example using TensorFlow library.\n",
 13 |     "This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/)\n",
 14 |     "\n",
 15 |     "Author: Aymeric Damien\n",
 16 |     "Project: https://github.com/aymericdamien/TensorFlow-Examples/\n",
 17 |     "'''\n",
 18 |     "\n",
 19 |     "# Import MINST data\n",
 20 |     "import input_data\n",
 21 |     "mnist = input_data.read_data_sets(\"/tmp/data/\", one_hot=True)\n",
 22 |     "\n",
 23 |     "import tensorflow as tf\n",
 24 |     "\n",
 25 |     "# Parameters\n",
 26 |     "learning_rate = 0.001\n",
 27 |     "training_epochs = 15\n",
 28 |     "batch_size = 100\n",
 29 |     "display_step = 1\n",
 30 |     "\n",
 31 |     "# Network Parameters\n",
 32 |     "n_hidden_1 = 256 # 1st layer num features\n",
 33 |     "n_hidden_2 = 256 # 2nd layer num features\n",
 34 |     "n_input = 784 # MNIST data input (img shape: 28*28)\n",
 35 |     "n_classes = 10 # MNIST total classes (0-9 digits)\n",
 36 |     "\n",
 37 |     "# tf Graph input\n",
 38 |     "x = tf.placeholder(\"float\", [None, n_input])\n",
 39 |     "y = tf.placeholder(\"float\", [None, n_classes])\n",
 40 |     "\n",
 41 |     "# Create model\n",
 42 |     "def multilayer_perceptron(_X, _weights, _biases):\n",
 43 |     "    layer_1 = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])) #Hidden layer with RELU activation\n",
 44 |     "    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])) #Hidden layer with RELU activation\n",
 45 |     "    return tf.matmul(layer_2, _weights['out']) + _biases['out']\n",
 46 |     "\n",
 47 |     "# Store layers weight & bias\n",
 48 |     "weights = {\n",
 49 |     "    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),\n",
 50 |     "    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),\n",
 51 |     "    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))\n",
 52 |     "}\n",
 53 |     "biases = {\n",
 54 |     "    'b1': tf.Variable(tf.random_normal([n_hidden_1])),\n",
 55 |     "    'b2': tf.Variable(tf.random_normal([n_hidden_2])),\n",
 56 |     "    'out': tf.Variable(tf.random_normal([n_classes]))\n",
 57 |     "}\n",
 58 |     "\n",
 59 |     "# Construct model\n",
 60 |     "pred = multilayer_perceptron(x, weights, biases)\n",
 61 |     "\n",
 62 |     "# Define loss and optimizer\n",
 63 |     "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss\n",
 64 |     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer\n",
 65 |     "\n",
 66 |     "# Initializing the variables\n",
 67 |     "init = tf.initialize_all_variables()\n",
 68 |     "\n",
 69 |     "# Launch the graph\n",
 70 |     "with tf.Session() as sess:\n",
 71 |     "    sess.run(init)\n",
 72 |     "\n",
 73 |     "    # Training cycle\n",
 74 |     "    for epoch in range(training_epochs):\n",
 75 |     "        avg_cost = 0.\n",
 76 |     "        total_batch = int(mnist.train.num_examples/batch_size)\n",
 77 |     "        # Loop over all batches\n",
 78 |     "        for i in range(total_batch):\n",
 79 |     "            batch_xs, batch_ys = mnist.train.next_batch(batch_size)\n",
 80 |     "            # Fit training using batch data\n",
 81 |     "            sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})\n",
 82 |     "            # Compute average loss\n",
 83 |     "            avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/total_batch\n",
 84 |     "        # Display logs per epoch step\n",
 85 |     "        if epoch % display_step == 0:\n",
 86 |     "            print \"Epoch:\", '%04d' % (epoch+1), \"cost=\", \"{:.9f}\".format(avg_cost)\n",
 87 |     "\n",
 88 |     "    print \"Optimization Finished!\"\n",
 89 |     "\n",
 90 |     "    # Test model\n",
 91 |     "    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))\n",
 92 |     "    # Calculate accuracy\n",
 93 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
 94 |     "    print \"Accuracy:\", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})\n"
 95 |    ]
 96 |   }
 97 |  ],
 98 |  "metadata": {
 99 |   "kernelspec": {
100 |    "display_name": "Python 2",
101 |    "language": "python",
102 |    "name": "python2"
103 |   },
104 |   "language_info": {
105 |    "codemirror_mode": {
106 |     "name": "ipython",
107 |     "version": 2
108 |    },
109 |    "file_extension": ".py",
110 |    "mimetype": "text/x-python",
111 |    "name": "python",
112 |    "nbconvert_exporter": "python",
113 |    "pygments_lexer": "ipython2",
114 |    "version": "2.7.9"
115 |   }
116 |  },
117 |  "nbformat": 4,
118 |  "nbformat_minor": 0
119 | }
120 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/multilayer_perceptron.py:
--------------------------------------------------------------------------------
  1 | from fdl_examples.datatools import input_data
  2 | mnist = input_data.read_data_sets("data/", one_hot=True)
  3 | 
  4 | import tensorflow as tf
  5 | import time, shutil, os
  6 | 
  7 | # Architecture
  8 | n_hidden_1 = 256
  9 | n_hidden_2 = 256
 10 | 
 11 | # Parameters
 12 | learning_rate = 0.01
 13 | training_epochs = 1000
 14 | batch_size = 100
 15 | display_step = 1
 16 | 
 17 | def layer(input, weight_shape, bias_shape):
 18 |     weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
 19 |     bias_init = tf.constant_initializer(value=0)
 20 |     W = tf.get_variable("W", weight_shape,
 21 |                         initializer=weight_init)
 22 |     b = tf.get_variable("b", bias_shape,
 23 |                         initializer=bias_init)
 24 |     return tf.nn.relu(tf.matmul(input, W) + b)
 25 | 
 26 | def inference(x):
 27 |     with tf.variable_scope("hidden_1"):
 28 |         hidden_1 = layer(x, [784, n_hidden_1], [n_hidden_1])
 29 |      
 30 |     with tf.variable_scope("hidden_2"):
 31 |         hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2])
 32 |      
 33 |     with tf.variable_scope("output"):
 34 |         output = layer(hidden_2, [n_hidden_2, 10], [10])
 35 | 
 36 |     return output
 37 | 
 38 | def loss(output, y):
 39 |     xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y)    
 40 |     loss = tf.reduce_mean(xentropy)
 41 |     return loss
 42 | 
 43 | def training(cost, global_step):
 44 |     tf.scalar_summary("cost", cost)
 45 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 46 |     train_op = optimizer.minimize(cost, global_step=global_step)
 47 |     return train_op
 48 | 
 49 | 
 50 | def evaluate(output, y):
 51 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 52 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 53 |     tf.scalar_summary("validation error", (1.0 - accuracy))
 54 |     return accuracy
 55 | 
 56 | if __name__ == '__main__':
 57 |     
 58 |     if os.path.exists("mlp_logs/"):
 59 |         shutil.rmtree("mlp_logs/")
 60 | 
 61 |     with tf.Graph().as_default():
 62 | 
 63 |         with tf.variable_scope("mlp_model"):
 64 | 
 65 |             x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
 66 |             y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
 67 | 
 68 | 
 69 |             output = inference(x)
 70 | 
 71 |             cost = loss(output, y)
 72 | 
 73 |             global_step = tf.Variable(0, name='global_step', trainable=False)
 74 | 
 75 |             train_op = training(cost, global_step)
 76 | 
 77 |             eval_op = evaluate(output, y)
 78 | 
 79 |             summary_op = tf.merge_all_summaries()
 80 | 
 81 |             saver = tf.train.Saver()
 82 | 
 83 |             sess = tf.Session()
 84 | 
 85 |             summary_writer = tf.train.SummaryWriter("mlp_logs/",
 86 |                                                 graph_def=sess.graph_def)
 87 | 
 88 |             
 89 |             init_op = tf.initialize_all_variables()
 90 | 
 91 |             sess.run(init_op)
 92 | 
 93 |             # saver.restore(sess, "mlp_logs/model-checkpoint-66000")
 94 | 
 95 | 
 96 |             # Training cycle
 97 |             for epoch in range(training_epochs):
 98 | 
 99 |                 avg_cost = 0.
100 |                 total_batch = int(mnist.train.num_examples/batch_size)
101 |                 # Loop over all batches
102 |                 for i in range(total_batch):
103 |                     minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
104 |                     # Fit training using batch data
105 |                     sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})
106 |                     # Compute average loss
107 |                     avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch
108 |                 # Display logs per epoch step
109 |                 if epoch % display_step == 0:
110 |                     print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost))
111 | 
112 |                     accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels})
113 | 
114 |                     print("Validation Error:", (1 - accuracy))
115 | 
116 |                     summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
117 |                     summary_writer.add_summary(summary_str, sess.run(global_step))
118 | 
119 |                     saver.save(sess, "mlp_logs/model-checkpoint", global_step=global_step)
120 | 
121 | 
122 |             print("Optimization Finished!")
123 | 
124 | 
125 |             accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels})
126 | 
127 |             print("Test Accuracy:", accuracy)
128 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/neural_style/main.py:
--------------------------------------------------------------------------------
  1 | #####################################################################################
  2 | #   Code borrowed with permissions for reuse with modification from Anish Athalye   #
  3 | #####################################################################################
  4 | 
  5 | 
  6 | from stylize import *
  7 | 
  8 | import numpy as np
  9 | import scipy.misc
 10 | 
 11 | import math
 12 | from argparse import ArgumentParser
 13 | 
 14 | # default arguments
 15 | CONTENT_WEIGHT = 5e0
 16 | STYLE_WEIGHT = 1e2
 17 | TV_WEIGHT = 1e2
 18 | LEARNING_RATE = 1e1
 19 | STYLE_SCALE = 1.0
 20 | ITERATIONS = 1000
 21 | VGG_PATH = 'imagenet-vgg-verydeep-19.mat'
 22 | 
 23 | 
 24 | def build_parser():
 25 |     parser = ArgumentParser()
 26 |     parser.add_argument('--content',
 27 |             dest='content', help='content image',
 28 |             metavar='CONTENT', required=True)
 29 |     parser.add_argument('--styles',
 30 |             dest='styles',
 31 |             nargs='+', help='one or more style images',
 32 |             metavar='STYLE', required=True)
 33 |     parser.add_argument('--output',
 34 |             dest='output', help='output path',
 35 |             metavar='OUTPUT', required=True)
 36 |     parser.add_argument('--iterations', type=int,
 37 |             dest='iterations', help='iterations',
 38 |             metavar='ITERATIONS', default=ITERATIONS)
 39 |     parser.add_argument('--width', type=int,
 40 |             dest='width', help='output width',
 41 |             metavar='WIDTH')
 42 |     parser.add_argument('--style-scales', type=float,
 43 |             dest='style_scales',
 44 |             nargs='+', help='one or more style scales',
 45 |             metavar='STYLE_SCALE')
 46 |     parser.add_argument('--network',
 47 |             dest='network', help='path to network parameters',
 48 |             metavar='VGG_PATH', default=VGG_PATH)
 49 |     parser.add_argument('--content-weight', type=float,
 50 |             dest='content_weight', help='content weight',
 51 |             metavar='CONTENT_WEIGHT', default=CONTENT_WEIGHT)
 52 |     parser.add_argument('--style-weight', type=float,
 53 |             dest='style_weight', help='style weight',
 54 |             metavar='STYLE_WEIGHT', default=STYLE_WEIGHT)
 55 |     parser.add_argument('--style-blend-weights', type=float,
 56 |             dest='style_blend_weights', help='style blending weights',
 57 |             nargs='+', metavar='STYLE_BLEND_WEIGHT')
 58 |     parser.add_argument('--tv-weight', type=float,
 59 |             dest='tv_weight', help='total variation regularization weight',
 60 |             metavar='TV_WEIGHT', default=TV_WEIGHT)
 61 |     parser.add_argument('--learning-rate', type=float,
 62 |             dest='learning_rate', help='learning rate',
 63 |             metavar='LEARNING_RATE', default=LEARNING_RATE)
 64 |     parser.add_argument('--initial',
 65 |             dest='initial', help='initial image',
 66 |             metavar='INITIAL')
 67 |     parser.add_argument('--print-iterations', type=int,
 68 |             dest='print_iterations', help='statistics printing frequency',
 69 |             metavar='PRINT_ITERATIONS')
 70 |     parser.add_argument('--checkpoint-iterations', type=int,
 71 |             dest='checkpoint_iterations', help='checkpoint frequency',
 72 |             metavar='CHECKPOINT_ITERATIONS')
 73 |     return parser
 74 | 
 75 | 
 76 | def main():
 77 |     parser = build_parser()
 78 |     options = parser.parse_args()
 79 | 
 80 |     content_image = imread(options.content)
 81 |     style_images = [imread(style) for style in options.styles]
 82 | 
 83 |     width = options.width
 84 |     if width is not None:
 85 |         new_shape = (int(math.floor(float(content_image.shape[0]) /
 86 |                 content_image.shape[1] * width)), width)
 87 |         content_image = scipy.misc.imresize(content_image, new_shape)
 88 |     target_shape = content_image.shape
 89 |     for i in range(len(style_images)):
 90 |         style_scale = STYLE_SCALE
 91 |         if options.style_scales is not None:
 92 |             style_scale = options.style_scales[i]
 93 |         style_images[i] = scipy.misc.imresize(style_images[i], style_scale *
 94 |                 target_shape[1] / style_images[i].shape[1])
 95 | 
 96 |     style_blend_weights = options.style_blend_weights
 97 |     if style_blend_weights is None:
 98 |         # default is equal weights
 99 |         style_blend_weights = [1.0/len(style_images) for _ in style_images]
100 |     else:
101 |         total_blend_weight = sum(style_blend_weights)
102 |         style_blend_weights = [weight/total_blend_weight
103 |                                for weight in style_blend_weights]
104 | 
105 |     initial = options.initial
106 |     if initial is not None:
107 |         initial = scipy.misc.imresize(imread(initial), content_image.shape[:2])
108 | 
109 |     image = stylize(options.network, initial, content_image, style_images,
110 |             options.iterations, options.content_weight, options.style_weight,
111 |             style_blend_weights, options.tv_weight, options.learning_rate,
112 |             print_iterations=options.print_iterations,
113 |             checkpoint_iterations=options.checkpoint_iterations)
114 |     imsave(options.output, image)
115 | 
116 | 
117 | def imread(path):
118 |     return scipy.misc.imread(path).astype(np.float)
119 | 
120 | 
121 | def imsave(path, img):
122 |     img = np.clip(img, 0, 255).astype(np.uint8)
123 |     scipy.misc.imsave(path, img)
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     main()


--------------------------------------------------------------------------------
/first_edition_archive/archive/neural_style/stylize.py:
--------------------------------------------------------------------------------
  1 | #####################################################################################
  2 | #   Code borrowed with permissions for reuse with modification from Anish Athalye   #
  3 | #####################################################################################
  4 | 
  5 | import vgg
  6 | 
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | 
 10 | from sys import stderr
 11 | 
 12 | CONTENT_LAYER = 'relu4_2'
 13 | STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
 14 | 
 15 | 
 16 | def stylize(network, initial, content, styles, iterations,
 17 |         content_weight, style_weight, style_blend_weights, tv_weight,
 18 |         learning_rate, print_iterations=None, checkpoint_iterations=None):
 19 |     shape = (1,) + content.shape
 20 |     style_shapes = [(1,) + style.shape for style in styles]
 21 |     content_features = {}
 22 |     style_features = [{} for _ in styles]
 23 | 
 24 |     # compute content features in feedforward mode
 25 |     g = tf.Graph()
 26 |     with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
 27 |         image = tf.placeholder('float', shape=shape)
 28 |         net, mean_pixel = vgg.net(network, image)
 29 |         content_pre = np.array([vgg.preprocess(content, mean_pixel)])
 30 |         content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
 31 |                 feed_dict={image: content_pre})
 32 | 
 33 |     # compute style features in feedforward mode
 34 |     for i in range(len(styles)):
 35 |         g = tf.Graph()
 36 |         with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
 37 |             image = tf.placeholder('float', shape=style_shapes[i])
 38 |             net, _ = vgg.net(network, image)
 39 |             style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
 40 |             for layer in STYLE_LAYERS:
 41 |                 features = net[layer].eval(feed_dict={image: style_pre})
 42 |                 features = np.reshape(features, (-1, features.shape[3]))
 43 |                 gram = np.matmul(features.T, features) / features.size
 44 |                 style_features[i][layer] = gram
 45 | 
 46 |     # make stylized image using backpropogation
 47 |     with tf.Graph().as_default():
 48 |         if initial is None:
 49 |             noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
 50 |             initial = tf.random_normal(shape) * 0.256
 51 |         else:
 52 |             initial = np.array([vgg.preprocess(initial, mean_pixel)])
 53 |             initial = initial.astype('float32')
 54 |         image = tf.Variable(initial)
 55 |         net, _ = vgg.net(network, image)
 56 | 
 57 |         # content loss
 58 |         content_loss = content_weight * (2 * tf.nn.l2_loss(
 59 |                 net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
 60 |                 content_features[CONTENT_LAYER].size)
 61 |         # style loss
 62 |         style_loss = 0
 63 |         for i in range(len(styles)):
 64 |             style_losses = []
 65 |             for style_layer in STYLE_LAYERS:
 66 |                 layer = net[style_layer]
 67 |                 _, height, width, number = map(lambda i: i.value, layer.get_shape())
 68 |                 size = height * width * number
 69 |                 feats = tf.reshape(layer, (-1, number))
 70 |                 gram = tf.matmul(tf.transpose(feats), feats) / size
 71 |                 style_gram = style_features[i][style_layer]
 72 |                 style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
 73 |             style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
 74 |         # total variation denoising
 75 |         tv_y_size = _tensor_size(image[:,1:,:,:])
 76 |         tv_x_size = _tensor_size(image[:,:,1:,:])
 77 |         tv_loss = tv_weight * 2 * (
 78 |                 (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
 79 |                     tv_y_size) +
 80 |                 (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
 81 |                     tv_x_size))
 82 |         # overall loss
 83 |         loss = content_loss + style_loss + tv_loss
 84 | 
 85 |         # optimizer setup
 86 |         train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
 87 | 
 88 |         def print_progress(i, last=False):
 89 |             if print_iterations is not None:
 90 |                 if i is not None and i % print_iterations == 0 or last:
 91 |                     print >> stderr, '  content loss: %g' % content_loss.eval()
 92 |                     print >> stderr, '    style loss: %g' % style_loss.eval()
 93 |                     print >> stderr, '       tv loss: %g' % tv_loss.eval()
 94 |                     print >> stderr, '    total loss: %g' % loss.eval()
 95 | 
 96 |         # optimization
 97 |         best_loss = float('inf')
 98 |         best = None
 99 |         with tf.Session() as sess:
100 |             sess.run(tf.initialize_all_variables())
101 |             for i in range(iterations):
102 |                 print_progress(i)
103 |                 print >> stderr, 'Iteration %d/%d' % (i + 1, iterations)
104 |                 train_step.run()
105 |                 if (checkpoint_iterations is not None and
106 |                         i % checkpoint_iterations == 0) or i == iterations - 1:
107 |                     this_loss = loss.eval()
108 |                     if this_loss < best_loss:
109 |                         best_loss = this_loss
110 |                         best = image.eval()
111 |                 print_progress(None, i == iterations - 1)
112 |             return vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
113 | 
114 | 
115 | def _tensor_size(tensor):
116 |     from operator import mul
117 |     return reduce(mul, (d.value for d in tensor.get_shape()), 1)


--------------------------------------------------------------------------------
/first_edition_archive/archive/neural_style/vgg.py:
--------------------------------------------------------------------------------
 1 | #####################################################################################
 2 | #   Code borrowed with permissions for reuse with modification from Anish Athalye   #
 3 | #####################################################################################
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | import scipy.io
 8 | 
 9 | 
10 | def net(data_path, input_image):
11 |     layers = (
12 |         'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
13 | 
14 |         'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
15 | 
16 |         'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
17 |         'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
18 | 
19 |         'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
20 |         'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
21 | 
22 |         'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
23 |         'relu5_3', 'conv5_4', 'relu5_4'
24 |     )
25 | 
26 |     data = scipy.io.loadmat(data_path)
27 |     mean = data['normalization'][0][0][0]
28 |     mean_pixel = np.mean(mean, axis=(0, 1))
29 |     weights = data['layers'][0]
30 | 
31 |     net = {}
32 |     current = input_image
33 |     for i, name in enumerate(layers):
34 |         kind = name[:4]
35 |         if kind == 'conv':
36 |             kernels, bias = weights[i][0][0][0][0]
37 |             # matconvnet: weights are [width, height, in_channels, out_channels]
38 |             # tensorflow: weights are [height, width, in_channels, out_channels]
39 |             kernels = np.transpose(kernels, (1, 0, 2, 3))
40 |             bias = bias.reshape(-1)
41 |             current = _conv_layer(current, kernels, bias)
42 |         elif kind == 'relu':
43 |             current = tf.nn.relu(current)
44 |         elif kind == 'pool':
45 |             current = _pool_layer(current)
46 |         net[name] = current
47 | 
48 |     assert len(net) == len(layers)
49 |     return net, mean_pixel
50 | 
51 | 
52 | def _conv_layer(input, weights, bias):
53 |     conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
54 |             padding='SAME')
55 |     return tf.nn.bias_add(conv, bias)
56 | 
57 | 
58 | def _pool_layer(input):
59 |     return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
60 |             padding='SAME')
61 | 
62 | 
63 | def preprocess(image, mean_pixel):
64 |     return image - mean_pixel
65 | 
66 | 
67 | def unprocess(image, mean_pixel):
68 |     return image + mean_pixel


--------------------------------------------------------------------------------
/first_edition_archive/archive/one_layer_autoencoder.py:
--------------------------------------------------------------------------------
 1 | ''' A one-layer autoencoder using TensorFlow library'''
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import math
 5 | #import pandas as pd
 6 | #import sys
 7 | 
 8 | input = np.array([[2.0, 1.0, 1.0, 2.0],
 9 |                  [-2.0, 1.0, -1.0, 2.0],
10 |                  [0.0, 1.0, 0.0, 2.0],
11 |                  [0.0, -1.0, 0.0, -2.0],
12 |                  [0.0, -1.0, 0.0, -2.0]])
13 | 
14 | # Code here for importing data from file
15 | 
16 | noisy_input = input + .2 * np.random.random_sample((input.shape)) - .1
17 | output = input
18 | 
19 | # Scale to [0,1]
20 | scaled_input_1 = np.divide((noisy_input-noisy_input.min()), (noisy_input.max()-noisy_input.min()))
21 | scaled_output_1 = np.divide((output-output.min()), (output.max()-output.min()))
22 | # Scale to [-1,1]
23 | scaled_input_2 = (scaled_input_1*2)-1
24 | scaled_output_2 = (scaled_output_1*2)-1
25 | 
26 | input_data = scaled_input_2
27 | output_data = scaled_output_2
28 | 
29 | # Autoencoder with 1 hidden layer
30 | n_samp, n_input = input_data.shape 
31 | n_hidden = 2
32 | 
33 | x = tf.placeholder("float", [None, n_input])
34 | # Weights and biases to hidden layer
35 | Wh = tf.Variable(tf.random_uniform((n_input, n_hidden), -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input)))
36 | bh = tf.Variable(tf.zeros([n_hidden]))
37 | h = tf.nn.tanh(tf.matmul(x,Wh) + bh)
38 | # Weights and biases to output layer
39 | Wo = tf.transpose(Wh) # tied weights
40 | bo = tf.Variable(tf.zeros([n_input]))
41 | y = tf.nn.tanh(tf.matmul(h,Wo) + bo)
42 | # Objective functions
43 | y_ = tf.placeholder("float", [None,n_input])
44 | cross_entropy = -tf.reduce_sum(y_*tf.log(y))
45 | meansq = tf.reduce_mean(tf.square(y_-y))
46 | train_step = tf.train.GradientDescentOptimizer(0.05).minimize(meansq)
47 | 
48 | init = tf.initialize_all_variables()
49 | sess = tf.Session()
50 | sess.run(init)
51 | 
52 | n_rounds = 5000
53 | batch_size = min(50, n_samp)
54 | 
55 | for i in range(n_rounds):
56 |     sample = np.random.randint(n_samp, size=batch_size)
57 |     batch_xs = input_data[sample][:]
58 |     batch_ys = output_data[sample][:]
59 |     sess.run(train_step, feed_dict={x: batch_xs, y_:batch_ys})
60 |     if i % 100 == 0:
61 |         print i, sess.run(cross_entropy, feed_dict={x: batch_xs, y_:batch_ys}), sess.run(meansq, feed_dict={x: batch_xs, y_:batch_ys})
62 | 
63 | print "Target:"
64 | print output_data
65 | print "Final activations:"
66 | print sess.run(y, feed_dict={x: input_data})
67 | print "Final weights (input => hidden layer)"
68 | print sess.run(Wh)
69 | print "Final biases (input => hidden layer)"
70 | print sess.run(bh)
71 | print "Final biases (hidden layer => output)"
72 | print sess.run(bo)
73 | print "Final activations of hidden layer"
74 | print sess.run(h, feed_dict={x: input_data})
75 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/optimzer_mlp.py:
--------------------------------------------------------------------------------
  1 | import input_data
  2 | mnist = input_data.read_data_sets("data/", one_hot=True)
  3 | 
  4 | import tensorflow as tf
  5 | import time, argparse
  6 | 
  7 | # Architecture
  8 | n_hidden_1 = 256
  9 | n_hidden_2 = 256
 10 | 
 11 | # Parameters
 12 | training_epochs = 500
 13 | batch_size = 100
 14 | display_step = 1
 15 | 
 16 | def layer(input, weight_shape, bias_shape):
 17 |     weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
 18 |     bias_init = tf.constant_initializer(value=0)
 19 |     W = tf.get_variable("W", weight_shape,
 20 |                         initializer=weight_init)
 21 |     b = tf.get_variable("b", bias_shape,
 22 |                         initializer=bias_init)
 23 |     return tf.nn.relu(tf.matmul(input, W) + b)
 24 | 
 25 | def inference(x):
 26 |     with tf.variable_scope("hidden_1"):
 27 |         hidden_1 = layer(x, [784, n_hidden_1], [n_hidden_1])
 28 |      
 29 |     with tf.variable_scope("hidden_2"):
 30 |         hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2])
 31 |      
 32 |     with tf.variable_scope("output"):
 33 |         output = layer(hidden_2, [n_hidden_2, 10], [10])
 34 | 
 35 |     return output
 36 | 
 37 | def loss(output, y):
 38 |     xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y)    
 39 |     loss = tf.reduce_mean(xentropy)
 40 |     return loss
 41 | 
 42 | def training(cost, global_step, optimizer):
 43 |     tf.scalar_summary("cost", cost)
 44 |     train_op = None
 45 |     print optimizer
 46 |     if optimizer == "sgd":
 47 |         learning_rate = 0.01
 48 |         optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 49 |         train_op = optimizer.minimize(cost, global_step=global_step)
 50 |     if optimizer == "momentum":
 51 |         learning_rate = 0.01
 52 |         momentum = 0.9
 53 |         optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
 54 |         train_op = optimizer.minimize(cost, global_step=global_step)
 55 |     return train_op
 56 | 
 57 | 
 58 | def evaluate(output, y):
 59 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 60 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 61 |     tf.scalar_summary("validation error", (1.0 - accuracy))
 62 |     return accuracy
 63 | 
 64 | if __name__ == '__main__':
 65 | 
 66 |     parser = argparse.ArgumentParser(description='Test various optimization strategies')
 67 |     parser.add_argument('optimizer', nargs=1, type=str)
 68 |     args = parser.parse_args()
 69 | 
 70 |     with tf.Graph().as_default():
 71 | 
 72 |         with tf.variable_scope("mlp_model"):
 73 | 
 74 |             x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
 75 |             y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
 76 | 
 77 | 
 78 |             output = inference(x)
 79 | 
 80 |             cost = loss(output, y)
 81 | 
 82 |             global_step = tf.Variable(0, name='global_step', trainable=False)
 83 | 
 84 |             train_op = training(cost, global_step, args.optimizer[0])
 85 | 
 86 |             eval_op = evaluate(output, y)
 87 | 
 88 |             summary_op = tf.merge_all_summaries()
 89 | 
 90 |             saver = tf.train.Saver()
 91 | 
 92 |             sess = tf.Session()
 93 | 
 94 |             summary_writer = tf.train.SummaryWriter("mlp_logs_%s/" % args.optimizer[0],
 95 |                                                 graph_def=sess.graph_def)
 96 | 
 97 |             
 98 |             init_op = tf.initialize_all_variables()
 99 | 
100 |             sess.run(init_op)
101 | 
102 |             # saver.restore(sess, "mlp_logs/model-checkpoint-66000")
103 | 
104 | 
105 |             # Training cycle
106 |             for epoch in range(training_epochs):
107 | 
108 |                 avg_cost = 0.
109 |                 total_batch = int(mnist.train.num_examples/batch_size)
110 |                 # Loop over all batches
111 |                 for i in range(total_batch):
112 |                     minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
113 |                     # Fit training using batch data
114 |                     sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})
115 |                     # Compute average loss
116 |                     avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch
117 |                 # Display logs per epoch step
118 |                 if epoch % display_step == 0:
119 |                     print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)
120 | 
121 |                     accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels})
122 | 
123 |                     print "Validation Error:", (1 - accuracy)
124 | 
125 |                     summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
126 |                     summary_writer.add_summary(summary_str, sess.run(global_step))
127 | 
128 |                     saver.save(sess, "mlp_logs_%s/model-checkpoint" % args.optimizer[0], global_step=global_step)
129 | 
130 | 
131 |             print "Optimization Finished!"
132 | 
133 | 
134 |             accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels})
135 | 
136 |             print "Test Accuracy:", accuracy
137 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/random_walk.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import matplotlib.pyplot as plt 
 3 | import matplotlib.gridspec as gridspec
 4 | 
 5 | step_range = 10
 6 | momentum_range = [0.1, 0.5, 0.9, 0.99]
 7 | 
 8 | step_choices = range(-1 * step_range, step_range + 1)
 9 | rand_walk = [random.choice(step_choices) for x in xrange(100)]
10 | 
11 | 
12 | x = range(len(rand_walk))
13 | zeros = [0 for i in x]
14 | 
15 | import numpy as np
16 | yrange =  1.5 * np.max(rand_walk)
17 | 
18 | fig = plt.figure(1)
19 | gs = gridspec.GridSpec(3, 4)
20 | ax = plt.subplot(gs[0, 1:3])
21 | ax.set_title("No Momentum")
22 | plt.xlabel("steps")
23 | plt.plot(x, rand_walk, 'b', x, zeros, 'k')
24 | plt.ylim((-yrange, yrange))
25 | 
26 | 
27 | momentum = momentum_range[0]
28 | momentum_rand_walk = [random.choice(step_choices)]
29 | 
30 | for i in xrange(len(rand_walk) - 1):
31 | 	prev = momentum_rand_walk[-1]
32 | 	momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices))
33 | 
34 | ax = plt.subplot(gs[1,:2])
35 | ax.set_title("Momentum = %s" % momentum_range[0])
36 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k')
37 | plt.ylim((-yrange, yrange))
38 | 
39 | momentum = momentum_range[1]
40 | momentum_rand_walk = [random.choice(step_choices)]
41 | 
42 | for i in xrange(len(rand_walk) - 1):
43 | 	prev = momentum_rand_walk[-1]
44 | 	momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices))
45 | 
46 | ax = plt.subplot(gs[1,2:])
47 | ax.set_title("Momentum = %s" % momentum_range[1])
48 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k')
49 | plt.ylim((-yrange, yrange))
50 | 
51 | momentum = momentum_range[2]
52 | momentum_rand_walk = [random.choice(step_choices)]
53 | 
54 | for i in xrange(len(rand_walk) - 1):
55 | 	prev = momentum_rand_walk[-1]
56 | 	momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices))
57 | 
58 | ax = plt.subplot(gs[2,:2])
59 | ax.set_title("Momentum = %s" % momentum_range[2])
60 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k')
61 | plt.ylim((-yrange, yrange))
62 | 
63 | momentum = momentum_range[3]
64 | momentum_rand_walk = [random.choice(step_choices)]
65 | 
66 | for i in xrange(len(rand_walk) - 1):
67 | 	prev = momentum_rand_walk[-1]
68 | 	momentum_rand_walk.append(momentum * prev + (1 - momentum) * random.choice(step_choices))
69 | 
70 | ax = plt.subplot(gs[2,2:])
71 | ax.set_title("Momentum = %s" % momentum_range[3])
72 | plt.plot(x, momentum_rand_walk, 'r', x, zeros, 'k')
73 | plt.ylim((-yrange, yrange))
74 | 
75 | fig.tight_layout()
76 | 
77 | plt.show()
78 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/read_16M_tweet_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cPickle as pickle
  3 | import csv, leveldb, os
  4 | 
  5 | char_2_index = {}
  6 | index_2_char = {}
  7 | 
  8 | sentiment_2_index = {}
  9 | index_2_sentiment = {}
 10 | 
 11 | POSITIVE = 'positive'
 12 | NEGATIVE = 'negative'
 13 | 
 14 | train_dataset_raw = []
 15 | val_dataset_raw = []
 16 | 
 17 | db = None
 18 | train_minibatches = 0
 19 | 
 20 | if not os.path.isdir("data/twitter/tweetdb"):
 21 | 
 22 |     db = leveldb.LevelDB("data/twitter/tweetdb")
 23 | 
 24 | 
 25 |     with open('data/twitter/training.1600000.processed.noemoticon.csv', 'rb') as f:
 26 |         reader = csv.reader(f)
 27 |         train_dataset_raw = list(reader)
 28 |         np.random.shuffle(train_dataset_raw)
 29 | 
 30 |     with open('data/twitter/testdata.manual.2009.06.14.csv', 'rb') as f:
 31 |         reader = csv.reader(f)
 32 |         val_dataset_raw = list(reader)
 33 |         np.random.shuffle(val_dataset_raw)
 34 | 
 35 |     counter_c = 0
 36 |     counter_s = 0
 37 |     max_row = 0
 38 |     for row in train_dataset_raw + val_dataset_raw:
 39 |         # print row[0], row[5]
 40 |         max_row = max(len(row[5]), max_row)
 41 |         if row[0] not in sentiment_2_index:
 42 |             sentiment_2_index[row[0]] = counter_s
 43 |             index_2_sentiment[counter_s] = row[0]
 44 |             counter_s += 1
 45 |         for char in row[5]:
 46 |             if char not in char_2_index:
 47 |                 char_2_index[char] = counter_c
 48 |                 index_2_char[counter_c] = char
 49 |                 counter_c += 1
 50 | 
 51 |     print "Dataset has max length %d" % max_row
 52 | 
 53 |     print index_2_char
 54 | 
 55 |     print index_2_sentiment
 56 | 
 57 |     train_minibatches = 0
 58 |     inputs = []
 59 |     tags = []
 60 |     for row in train_dataset_raw:
 61 |         if len(row[5]) > 200:
 62 |             continue
 63 |         # print row[1]
 64 |         if row[0] == '4':
 65 |             tags.append(1)
 66 |         elif row[0] == '0':
 67 |             tags.append(0)
 68 |         else:
 69 |             print "ERROR ON:", row
 70 |             continue
 71 | 
 72 |         print row[0], row[5]
 73 | 
 74 |         cur_input = []
 75 |         for char in row[5]:
 76 |             cur_input.append(char_2_index[char])
 77 |         cur_input = np.eye(len(char_2_index.keys()))[cur_input]
 78 |         init_len = len(cur_input)
 79 |         if 200 - init_len > 0:
 80 |             zero = np.zeros((200 - init_len, len(char_2_index.keys())))
 81 |             cur_input = np.concatenate((cur_input, zero))
 82 |         inputs.append(cur_input)
 83 | 
 84 |         print len(tags)
 85 | 
 86 |         if len(inputs) == 256:
 87 |             print "FINISH MINIBATCH %d, INSERT INTO DB" % train_minibatches
 88 |             inputs = np.array(inputs, dtype=np.float32)
 89 |             tags = np.eye(2, dtype=np.float32)[tags]
 90 |             db.Put("train_inputs_" + str(train_minibatches), inputs)
 91 |             db.Put("train_tags_" + str(train_minibatches), tags)
 92 |             train_minibatches += 1
 93 | 
 94 |             inputs = []
 95 |             tags = []
 96 | 
 97 |     db.Put("n_minibatches", pickle.dumps(train_minibatches))
 98 | 
 99 |     inputs = []
100 |     tags = []
101 | 
102 |     for row in val_dataset_raw:
103 |         if len(row[5]) > 200:
104 |             continue
105 |         # print row[1]
106 |         if row[0] == '4':
107 |             tags.append(1)
108 |         elif row[0] == '0':
109 |             tags.append(0)
110 |         else:
111 |             print "ERROR ON:", row
112 |             continue
113 |         cur_input = []
114 |         for char in row[5]:
115 |             cur_input.append(char_2_index[char])
116 |         cur_input = np.eye(len(char_2_index.keys()))[cur_input]
117 |         init_len = len(cur_input)
118 |         if 200 - init_len > 0:
119 |             zero = np.zeros((200 - init_len, len(char_2_index.keys())))
120 |             cur_input = np.concatenate((cur_input, zero))
121 |         inputs.append(cur_input)
122 | 
123 |         if len(inputs) == 256:
124 |             inputs = np.array(inputs, dtype=np.float32)
125 |             tags = np.eye(2, dtype=np.float32)[tags]
126 |             db.Put("val_inputs_0", inputs)
127 |             db.Put("val_tags_0", tags)
128 |             break
129 | else:
130 |     db = leveldb.LevelDB("data/twitter/tweetdb")
131 |     train_minibatches = pickle.loads(db.Get("n_minibatches"))
132 | 
133 | 
134 | 
135 | 
136 | 
137 | class TweetDataset:
138 |     def __init__(self, db, max_minibatch, prefix):
139 |         self.ptr = 0
140 |         self.prefix = prefix
141 |         self.max_minibatch = max_minibatch
142 | 
143 |     def minibatch(self):
144 |         inputs, tags = np.fromstring(db.Get(self.prefix + "_inputs_" + str(self.ptr)), dtype=np.float32).reshape((-1, 200, 194)), np.fromstring(db.Get(self.prefix + "_tags_" + str(self.ptr)), dtype=np.float32).reshape((-1, 2))
145 |         self.ptr = (self.ptr + 1) % self.max_minibatch
146 |         return inputs, tags
147 | 
148 | 
149 | print "Start train dataset loading"
150 | 
151 | train = TweetDataset(db, train_minibatches, "train")
152 | 
153 | print "Start val dataset loading"
154 | 
155 | val = TweetDataset(db, 1, "val")
156 | 
157 | print "Finish dataset loading"
158 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/read_imdb_data.py:
--------------------------------------------------------------------------------
 1 | import tflearn
 2 | from tflearn.data_utils import to_categorical, pad_sequences
 3 | from tflearn.datasets import imdb
 4 | import numpy as np
 5 | 
 6 | # IMDB Dataset loading
 7 | train, test, _ = imdb.load_data(path='data/imdb.pkl', n_words=30000,
 8 |                                 valid_portion=0.1)
 9 | trainX, trainY = train
10 | testX, testY = test
11 | 
12 | # Data preprocessing
13 | # Sequence padding
14 | trainX = pad_sequences(trainX, maxlen=500, value=0.)
15 | testX = pad_sequences(testX, maxlen=500, value=0.)
16 | # Converting labels to binary vectors
17 | trainY = to_categorical(trainY, nb_classes=2)
18 | testY = to_categorical(testY, nb_classes=2)
19 | 
20 | 
21 | 
22 | class IMDBDataset():
23 |     def __init__(self, X, Y):
24 |         self.num_examples = len(X)
25 |         self.inputs = X
26 |         self.tags = Y
27 |         self.ptr = 0
28 | 
29 | 
30 |     def minibatch(self, size):
31 |         ret = None
32 |         if self.ptr + size < len(self.inputs):
33 |             ret =  self.inputs[self.ptr:self.ptr+size], self.tags[self.ptr:self.ptr+size]
34 |         else:
35 |             ret = np.concatenate((self.inputs[self.ptr:], self.inputs[:size-len(self.inputs[self.ptr:])])), np.concatenate((self.tags[self.ptr:], self.tags[:size-len(self.tags[self.ptr:])]))
36 |         self.ptr = (self.ptr + size) % len(self.inputs)
37 | 
38 |         return ret
39 |         # return np.eye(10000)[ret[0]], ret[1]
40 | 
41 | 
42 | train = IMDBDataset(trainX, trainY)
43 | val = IMDBDataset(testX, testY)
44 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/read_pos_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import gensim, leveldb, os, re
  3 | 
  4 | db = None
  5 | 
  6 | tags_to_index = {}
  7 | index_to_tags = {}
  8 | train_dataset_raw = {}
  9 | train_dataset = []
 10 | test_dataset_raw = {}
 11 | test_dataset = []
 12 | dataset_vocab = {}
 13 | 
 14 | print "LOADING PRETRAINED WORD2VEC MODEL... "
 15 | if not os.path.isdir("data/word2vecdb"):
 16 |     model = gensim.models.Word2Vec.load_word2vec_format('/Users/nikhilbuduma/Downloads/GoogleNews-vectors-negative300.bin', binary=True)
 17 |     db = leveldb.LevelDB("data/word2vecdb")
 18 | 
 19 |     try:
 20 |         os.remove("data/pos_data/pos.train.processed.txt")
 21 |     except OSError:
 22 |         pass
 23 | 
 24 |     try:
 25 |         os.remove("data/pos_data/pos.test.processed.txt")
 26 |     except OSError:
 27 |         pass
 28 | 
 29 |     with open("data/pos_data/pos.train.txt") as f:
 30 |         train_dataset_raw = f.readlines()
 31 |         train_dataset_raw = [element.split() for element in train_dataset_raw if len(element.split()) > 0]
 32 | 
 33 |     counter = 0
 34 |     while counter < len(train_dataset_raw):
 35 |         pair = train_dataset_raw[counter]
 36 |         if counter < len(train_dataset_raw) - 1:
 37 |             next_pair = train_dataset_raw[counter + 1]
 38 |             if (pair[0] + "_" + next_pair[0] in model) and (pair[1] == next_pair[1]):
 39 |                 train_dataset.append([pair[0] + "_" + next_pair[0], pair[1]])
 40 |                 counter += 2
 41 |                 continue
 42 | 
 43 |         word = re.sub("\d", "#", pair[0])
 44 |         word = re.sub("-", "_", word)
 45 | 
 46 |         if word in model:
 47 |             train_dataset.append([word, pair[1]])
 48 |             counter += 1
 49 |             continue
 50 | 
 51 |         if "_" in word:
 52 |             subwords = word.split("_")
 53 |             for subword in subwords:
 54 |                 if not (subword.isspace() or len(subword) == 0):
 55 |                     train_dataset.append([subword, pair[1]])
 56 |             counter += 1
 57 |             continue
 58 | 
 59 |         train_dataset.append([word, pair[1]])
 60 |         counter += 1
 61 | 
 62 |     with open('data/pos_data/pos.train.processed.txt', 'w') as train_file:
 63 |         for item in train_dataset:
 64 |             train_file.write("%s\n" % (item[0] + " " + item[1]))
 65 | 
 66 | 
 67 |     with open("data/pos_data/pos.test.txt") as f:
 68 |         test_dataset_raw = f.readlines()
 69 |         test_dataset_raw = [element.split() for element in test_dataset_raw if len(element.split()) > 0]
 70 | 
 71 |     counter = 0
 72 |     while counter < len(test_dataset_raw):
 73 |         pair = test_dataset_raw[counter]
 74 |         if counter < len(test_dataset_raw) - 1:
 75 |             next_pair = test_dataset_raw[counter + 1]
 76 |             if (pair[0] + "_" + next_pair[0] in model) and (pair[1] == next_pair[1]):
 77 |                 test_dataset.append([pair[0] + "_" + next_pair[0], pair[1]])
 78 |                 counter += 2
 79 |                 continue
 80 | 
 81 |         word = re.sub("\d", "#", pair[0])
 82 |         word = re.sub("-", "_", word)
 83 | 
 84 |         if word in model:
 85 |             test_dataset.append([word, pair[1]])
 86 |             counter += 1
 87 |             continue
 88 | 
 89 |         if "_" in word:
 90 |             subwords = word.split("_")
 91 |             for subword in subwords:
 92 |                 if not (subword.isspace() or len(subword) == 0):
 93 |                     test_dataset.append([subword, pair[1]])
 94 |             counter += 1
 95 |             continue
 96 | 
 97 |         test_dataset.append([word, pair[1]])
 98 |         counter += 1
 99 | 
100 |     with open('data/pos_data/pos.test.processed.txt', 'w') as test_file:
101 |         for item in test_dataset:
102 |             test_file.write("%s\n" % (item[0] + " " + item[1]))
103 | 
104 |     counter = 0
105 |     for pair in train_dataset + test_dataset:
106 |         dataset_vocab[pair[0]] = 1
107 |         if pair[1] not in tags_to_index:
108 |             tags_to_index[pair[1]] = counter
109 |             index_to_tags[counter] = pair[1]
110 |             counter += 1
111 | 
112 |     nonmodel_cache = {}
113 | 
114 |     counter = 1
115 |     total = len(dataset_vocab.keys())
116 |     for word in dataset_vocab:
117 |         if counter % 100 == 0:
118 |             print "Inserted %d words out of %d total" % (counter, total)
119 |         if word in model:
120 |             db.Put(word, model[word])
121 |         elif word in nonmodel_cache:
122 |             db.Put(word, nonmodel_cache[word])
123 |         else:
124 |             print word
125 |             nonmodel_cache[word] = np.random.uniform(-0.25, 0.25, 300).astype(np.float32)
126 |             db.Put(word, nonmodel_cache[word])
127 |         counter += 1
128 | else:
129 |     db = leveldb.LevelDB("data/word2vecdb")
130 | 
131 |     with open("data/pos_data/pos.train.processed.txt") as f:
132 |         train_dataset = f.readlines()
133 |         train_dataset = [element.split() for element in train_dataset if len(element.split()) > 0]
134 | 
135 |     with open("data/pos_data/pos.test.processed.txt") as f:
136 |         test_dataset = f.readlines()
137 |         test_dataset = [element.split() for element in test_dataset if len(element.split()) > 0]
138 | 
139 |     counter = 0
140 |     for pair in train_dataset + test_dataset:
141 |         dataset_vocab[pair[0]] = 1
142 |         if pair[1] not in tags_to_index:
143 |             tags_to_index[pair[1]] = counter
144 |             index_to_tags[counter] = pair[1]
145 |             counter += 1
146 | 
147 | 
148 | 
149 | 
150 | class POSDataset():
151 |     def __init__(self, db, dataset, tags_to_index, get_all=False):
152 |         self.db = db
153 |         self.inputs = []
154 |         self.tags = []
155 |         self.ptr = 0
156 |         self.n = 0
157 |         self.get_all = get_all
158 | 
159 |         for pair in dataset:
160 |             self.inputs.append(np.fromstring(db.Get(pair[0]), dtype=np.float32))
161 |             self.tags.append(tags_to_index[pair[1]])
162 | 
163 |         self.inputs = np.array(self.inputs, dtype=np.float32)
164 |         self.tags = np.eye(len(tags_to_index.keys()))[self.tags]
165 | 
166 |     def prepare_n_gram(self, n):
167 |         self.n = n
168 | 
169 |     def minibatch(self, size):
170 |         batch_inputs = []
171 |         batch_tags = []
172 |         if self.get_all:
173 |             counter = 0
174 |             while counter < len(self.inputs) - self.n + 1:
175 |                 batch_inputs.append(self.inputs[counter:counter+self.n].flatten())
176 |                 batch_tags.append(self.tags[counter + self.n - 1])
177 |                 counter += 1
178 |         elif self.ptr + size < len(self.inputs) - self.n:
179 |             counter = self.ptr
180 |             while counter < self.ptr + size:
181 |                 batch_inputs.append(self.inputs[counter:counter+self.n].flatten())
182 |                 batch_tags.append(self.tags[counter + self.n - 1])
183 |                 counter += 1
184 |         else:
185 |             counter = self.ptr
186 |             while counter < len(self.inputs) - self.n + 1:
187 |                 batch_inputs.append(self.inputs[counter:counter+self.n].flatten())
188 |                 batch_tags.append(self.tags[counter + self.n - 1])
189 |                 counter += 1
190 | 
191 |             counter2 = 0
192 |             while counter2 < size - counter + self.ptr:
193 |                 batch_inputs.append(self.inputs[counter2:counter2+self.n].flatten())
194 |                 batch_tags.append(self.tags[counter2 + self.n - 1])
195 |                 counter2 += 1
196 | 
197 |         self.ptr = (self.ptr + size) % (len(self.inputs) - self.n)
198 |         return np.array(batch_inputs, dtype=np.float32), np.array(batch_tags)
199 | 
200 | 
201 | 
202 | train = POSDataset(db, train_dataset, tags_to_index)
203 | test = POSDataset(db, test_dataset, tags_to_index, get_all=True)
204 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/read_tweet_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cPickle as pickle
  3 | import csv, leveldb, os
  4 | 
  5 | char_2_index = {}
  6 | index_2_char = {}
  7 | 
  8 | sentiment_2_index = {}
  9 | index_2_sentiment = {}
 10 | 
 11 | POSITIVE = 'positive'
 12 | NEGATIVE = 'negative'
 13 | 
 14 | dataset_original = []
 15 | train_dataset_raw = []
 16 | val_dataset_raw = []
 17 | 
 18 | db = None
 19 | train_minibatches = 0
 20 | 
 21 | if not os.path.isdir("data/twitter/tweetdb"):
 22 | 
 23 |     db = leveldb.LevelDB("data/twitter/tweetdb")
 24 | 
 25 | 
 26 |     with open('data/twitter/airlines_tweets.csv', 'rb') as f:
 27 |         reader = csv.reader(f)
 28 |         dataset_original = list(reader)
 29 |         dataset_original = dataset_original[1:]
 30 |         np.random.shuffle(dataset_original)
 31 | 
 32 | 
 33 |     counter_c = 0
 34 |     counter_s = 0
 35 |     max_row = 0
 36 | 
 37 |     val_neg = []
 38 |     val_pos = []
 39 | 
 40 |     for row in dataset_original:
 41 |         print row[1], row[10]
 42 |         if (row[1] == "positive" or row[1] == "negative") and len(row[10])<200 :
 43 |             if row[1] == "positive" and len(val_pos) < 128:
 44 |                 val_pos.append(row)
 45 |             elif row[1] == "negative" and len(val_neg) < 128:
 46 |                 val_neg.append(row)
 47 |             else:
 48 |                 train_dataset_raw.append(row)
 49 |             max_row = max(len(row[10]), max_row)
 50 |             if row[1] not in sentiment_2_index:
 51 |                 sentiment_2_index[row[1]] = counter_s
 52 |                 index_2_sentiment[counter_s] = row[1]
 53 |                 counter_s += 1
 54 |             for char in row[10]:
 55 |                 if char not in char_2_index:
 56 |                     char_2_index[char] = counter_c
 57 |                     index_2_char[counter_c] = char
 58 |                     counter_c += 1
 59 | 
 60 | 
 61 |     val_dataset_raw = val_neg + val_pos
 62 | 
 63 |     np.random.shuffle(train_dataset_raw)
 64 |     np.random.shuffle(val_dataset_raw)
 65 | 
 66 | 
 67 |     print "Dataset has max length %d" % max_row
 68 | 
 69 |     print index_2_char
 70 | 
 71 |     print index_2_sentiment
 72 | 
 73 |     train_minibatches = 0
 74 |     inputs = []
 75 |     tags = []
 76 |     for row in train_dataset_raw:
 77 |         if len(row[10]) > 200:
 78 |             continue
 79 |         # print row[1]
 80 |         if row[1] == 'positive':
 81 |             tags.append(1)
 82 |         elif row[1] == 'negative':
 83 |             tags.append(0)
 84 |         else:
 85 |             print "ERROR ON:", row
 86 |             continue
 87 | 
 88 |         print row[1], row[10]
 89 | 
 90 |         cur_input = []
 91 |         for char in row[10]:
 92 |             cur_input.append(char_2_index[char])
 93 |         cur_input = np.eye(len(char_2_index.keys()))[cur_input]
 94 |         init_len = len(cur_input)
 95 |         if 200 - init_len > 0:
 96 |             zero = np.zeros((200 - init_len, len(char_2_index.keys())))
 97 |             cur_input = np.concatenate((cur_input, zero))
 98 |         inputs.append(cur_input)
 99 | 
100 |         print len(tags)
101 | 
102 |         if len(inputs) == 256:
103 |             print "FINISH MINIBATCH %d, INSERT INTO DB" % train_minibatches
104 |             inputs = np.array(inputs, dtype=np.float32)
105 |             tags = np.eye(2, dtype=np.float32)[tags]
106 |             db.Put("train_inputs_" + str(train_minibatches), inputs)
107 |             db.Put("train_tags_" + str(train_minibatches), tags)
108 |             train_minibatches += 1
109 | 
110 |             inputs = []
111 |             tags = []
112 | 
113 |     db.Put("n_minibatches", pickle.dumps(train_minibatches))
114 | 
115 |     inputs = []
116 |     tags = []
117 | 
118 |     for row in val_dataset_raw:
119 |         if len(row[10]) > 200:
120 |             continue
121 |         # print row[1]
122 |         if row[1] == 'positive':
123 |             tags.append(1)
124 |         elif row[1] == 'negative':
125 |             tags.append(0)
126 |         else:
127 |             print "ERROR ON:", row
128 |             continue
129 |         cur_input = []
130 |         for char in row[10]:
131 |             cur_input.append(char_2_index[char])
132 |         cur_input = np.eye(len(char_2_index.keys()))[cur_input]
133 |         init_len = len(cur_input)
134 |         if 200 - init_len > 0:
135 |             zero = np.zeros((200 - init_len, len(char_2_index.keys())))
136 |             cur_input = np.concatenate((cur_input, zero))
137 |         inputs.append(cur_input)
138 | 
139 |         if len(inputs) == 256:
140 |             inputs = np.array(inputs, dtype=np.float32)
141 |             tags = np.eye(2, dtype=np.float32)[tags]
142 |             db.Put("val_inputs_0", inputs)
143 |             db.Put("val_tags_0", tags)
144 |             break
145 | else:
146 |     db = leveldb.LevelDB("data/twitter/tweetdb")
147 |     train_minibatches = pickle.loads(db.Get("n_minibatches"))
148 | 
149 | 
150 | 
151 | 
152 | 
153 | class TweetDataset:
154 |     def __init__(self, db, max_minibatch, prefix):
155 |         self.ptr = 0
156 |         self.prefix = prefix
157 |         self.max_minibatch = max_minibatch
158 | 
159 |     def minibatch(self):
160 |         inputs, tags = np.fromstring(db.Get(self.prefix + "_inputs_" + str(self.ptr)), dtype=np.float32).reshape((-1, 200, 155)), np.fromstring(db.Get(self.prefix + "_tags_" + str(self.ptr)), dtype=np.float32).reshape((-1, 2))
161 |         self.ptr = (self.ptr + 1) % self.max_minibatch
162 |         return inputs, tags
163 | 
164 | 
165 | print "Start train dataset loading"
166 | 
167 | train = TweetDataset(db, train_minibatches, "train")
168 | 
169 | print "Start val dataset loading"
170 | 
171 | val = TweetDataset(db, 1, "val")
172 | 
173 | print "Finish dataset loading"
174 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/report.txt:
--------------------------------------------------------------------------------
 1 | --------------------------------------------------------------------------------
 2 | Processing file 'linear_interpolation.py'
 3 |  outputting to 'linear_interpolation_updated.py'
 4 | --------------------------------------------------------------------------------
 5 | 
 6 | 'linear_interpolation.py' Line 40
 7 | --------------------------------------------------------------------------------
 8 | 
 9 | Renamed function 'tf.initialize_variables' to 'tf.variables_initializer'
10 | 
11 |     Old: 	init_op = tf.initialize_variables(var_list_rand)
12 |                     ~~~~~~~~~~~~~~~~~~~~~~~                
13 |     New: 	init_op = tf.variables_initializer(var_list_rand)
14 |                     ~~~~~~~~~~~~~~~~~~~~~~~~                
15 | 
16 | 'linear_interpolation.py' Line 73
17 | --------------------------------------------------------------------------------
18 | 
19 | Renamed function 'tf.merge_all_summaries' to 'tf.summary.merge_all'
20 | 
21 |     Old: summary_op = tf.merge_all_summaries()
22 |                       ~~~~~~~~~~~~~~~~~~~~~~   
23 |     New: summary_op = tf.summary.merge_all()
24 |                       ~~~~~~~~~~~~~~~~~~~~   
25 | 
26 | 'linear_interpolation.py' Line 68
27 | --------------------------------------------------------------------------------
28 | 
29 | Renamed function 'tf.scalar_summary' to 'tf.summary.scalar'
30 | 
31 |     Old: 	tf.scalar_summary("interpolated_cost", cost_inter)
32 |           ~~~~~~~~~~~~~~~~~                                  
33 |     New: 	tf.summary.scalar("interpolated_cost", cost_inter)
34 |           ~~~~~~~~~~~~~~~~~                                  
35 | 
36 | 'linear_interpolation.py' Line 71
37 | --------------------------------------------------------------------------------
38 | 
39 | Renamed function 'tf.train.SummaryWriter' to 'tf.summary.FileWriter'
40 | 
41 |     Old: summary_writer = tf.train.SummaryWriter("linear_interp_logs/",
42 |                           ~~~~~~~~~~~~~~~~~~~~~~                        
43 |     New: summary_writer = tf.summary.FileWriter("linear_interp_logs/",
44 |                           ~~~~~~~~~~~~~~~~~~~~~                        
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/requirements.txt:
--------------------------------------------------------------------------------
 1 | appnope==0.1.0
 2 | bleach==1.5.0
 3 | cycler==0.10.0
 4 | decorator==4.1.2
 5 | Django==1.11.4
 6 | h5py==2.7.0
 7 | html5lib==0.9999999
 8 | image==1.5.11
 9 | ipython==6.1.0
10 | ipython-genutils==0.2.0
11 | jedi==0.10.2
12 | Markdown==2.6.9
13 | matplotlib==2.0.2
14 | numpy==1.13.1
15 | olefile==0.44
16 | pexpect==4.2.1
17 | pickleshare==0.7.4
18 | Pillow==4.2.1
19 | prompt-toolkit==1.0.15
20 | protobuf==3.4.0
21 | ptyprocess==0.5.2
22 | Pygments==2.2.0
23 | pyparsing==2.2.0
24 | python-dateutil==2.6.1
25 | pytz==2017.2
26 | simplegeneric==0.8.1
27 | six==1.10.0
28 | tensorflow==1.3.0
29 | tensorflow-tensorboard==0.1.4
30 | traitlets==4.3.2
31 | wcwidth==0.1.7
32 | Werkzeug==0.12.2
33 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/seq2seq/INTRO.md:
--------------------------------------------------------------------------------
 1 | Sequence-to-sequence, or Seq2Seq, problems have become a mainstay of modern deep learning. These are problems where the input is a non-empty variable-length sequence of tokens or data points, and our output is similarly a non-empty variable-length sequence of tokens. Many interesting and relevant problems can be phrased as sequence-to-sequence problems. For example, the canonical example of a such a problem is machine translation. Here, we are tasked with designing a system that is able to translate phrases or sentences in one language to another automatically. In this case, the input sequence is a sequence of words in our source language, while the output sequence is a sequence of words in our target language.
 2 | 
 3 | More generally, we can imagine different scenarios where we want to translate from one representation of data to another. For instance, part-of-speech tagging can be solved as a sequence-to-sequence problem, where the input sequence is a sequence of tokens, and the output sequence is the label for each token, such as a noun, verb, preposition, or something else. Other tasks can be considered as special cases of sequence-to-sequence tasks, such as sentiment analysis, where our output sequence is simply a single token denoting some metric of sentiment.
 4 | 
 5 | However, sequence-to-sequence tasks are not restricted to language related tasks. Arithmetic operations can be considered sequence-to-sequence problems, where the input sequence can be an array of numbers and the output is their sum, for instance. Some simple algorithmic examples include inputting an array of integers, and outputting the sorted array. Even more complex problems, such as the traveling salesman problem, can be thought of sequence-to-sequence problems. In this case, the input is a list of cities and their coordinates on a map, and the output is the ordering of the cities that minimizes the total distance traveled if we were to visit each city in the specified order. Finally, we need not have explicitly sequential data in order for us to use the techniques that are used to solve sequence-to-sequence problems. As long as we process our data or input sequentially, it possible to use these techniques to tackle these problems. For example, we can process images by sequentially considering pixels or adjacent regions of the image, and then label smaller parts of the image.
 6 | 
 7 | There are many more problems that can be cast, albeit sometimes inefficiently, as sequence-to-sequence problems. We encourage the reader to think of other problems that can be treated as sequence-to-sequence problems and to try using the techniques discussed in these following sections.
 8 | 
 9 | With these problems in mind, it is now important to consider classes of models that would be useful in tackling these problems. Recurrent neural networks, such as LSTMs, are well equipped for handling sequential data, such as text or time-series data. They are amenable to learning from variable-length sequences in the input, output, or both, making them suitable to apply to any of the problems described above. Moreover, as we've seen, LSTMs are designed to handle longer-term dependencies between the inputs, and this is especially important for some of the problems described above. With these preliminaries in place, we can introduce specific architectures used by researchers to solve these problems.
10 | 
11 | The most common approach in solving sequence-to-sequence network is to design an encoder-decoder network. With this, we use the first part of the network, or the encoder network, to compress the input sequence as a fixed-size embedding vector. We use the second part of the network, or the decoder network, to decode this embedding vector into the output sequence. A high-level diagram of this process can be seen in Figure X.
12 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/seq2seq/extract_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def main():
 4 |   fname = "perplexity_data.txt"
 5 |   content = []
 6 |   with open(fname) as f:
 7 |     content = f.readlines()
 8 | 
 9 |   steps, lr, st, perpl = [], [], [], []
10 |   for line in content:
11 |     if "global step" in line:
12 |       line_arr = line.split(" ")
13 |       steps.append(int(line_arr[2]))
14 |       lr.append(float(line_arr[5]))
15 |       st.append(float(line_arr[7]))
16 |       perpl.append(float(line_arr[9]))
17 | 
18 |   # for i in range(len(steps)):
19 |     # print lr[i]
20 |   # for i in range(len(steps)):
21 |     # print st[i]
22 |   for i in range(len(steps)):
23 |     print perpl[i]
24 | 
25 | main()
26 | 
27 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/seq2seq/nmt_lr_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/seq2seq/nmt_lr_plot.png


--------------------------------------------------------------------------------
/first_edition_archive/archive/seq2seq/nmt_perplexity_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/seq2seq/nmt_perplexity_plot.png


--------------------------------------------------------------------------------
/first_edition_archive/archive/seq2seq/output.txt:
--------------------------------------------------------------------------------
1 | Preparing WMT data in /hdfs/mscog/t-subhup/seq2seq/data
2 | Extracting tar file /hdfs/mscog/t-subhup/seq2seq/data/training-giga-fren.tar
3 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/skipgram.py:
--------------------------------------------------------------------------------
  1 | import input_word_data as data
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from sklearn.manifold import TSNE
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | # TRAINING PARAMETERS
  8 | batch_size = 32                                             # Number of training examples per batch
  9 | embedding_size = 128                                        # Dimension of embedding vectors
 10 | skip_window = 5                                             # Window size for context to the left and right of target
 11 | num_skips = 4                                               # How many times to reuse target to generate a label for context.
 12 | batches_per_epoch = data.data_size*num_skips/batch_size     # Number of batches per epoch of training
 13 | training_epochs = 5                                         # Number of epochs to utilize for training
 14 | neg_size = 64                                               # Number of negative samples to use for NCE
 15 | display_step = 2000                                         # Frequency with which to print statistics
 16 | val_step = 10000                                            # Frequency with which to perform validation
 17 | learning_rate = 0.1                                         # Learning rate for SGD
 18 | 
 19 | print "Epochs: %d, Batches per epoch: %d, Examples per batch: %d" % (training_epochs, batches_per_epoch, batch_size)
 20 | 
 21 | # NEAREST NEIGHBORS VALIDATION PARAMETERS
 22 | val_size = 20
 23 | val_dist_span = 500
 24 | val_examples = np.random.choice(val_dist_span, val_size, replace=False)
 25 | top_match = 8
 26 | plot_num = 500
 27 | 
 28 | 
 29 | def embedding_layer(x, embedding_shape):
 30 |     with tf.variable_scope("embedding"):
 31 |         embedding_init = tf.random_uniform(embedding_shape, -1.0, 1.0)
 32 |         embedding_matrix = tf.get_variable("E", initializer=embedding_init)
 33 |         return tf.nn.embedding_lookup(embedding_matrix, x), embedding_matrix
 34 | 
 35 | def noise_contrastive_loss(embedding_lookup, weight_shape, bias_shape, y):
 36 |     with tf.variable_scope("nce"):
 37 |         nce_weight_init = tf.truncated_normal(weight_shape, stddev=1.0/(weight_shape[1])**0.5)
 38 |         nce_bias_init = tf.zeros(bias_shape)
 39 |         nce_W = tf.get_variable("W", initializer=nce_weight_init)
 40 |         nce_b = tf.get_variable("b", initializer=nce_bias_init)
 41 | 
 42 |         total_loss = tf.nn.nce_loss(nce_W, nce_b, embedding_lookup, y, neg_size, data.vocabulary_size)
 43 |         return tf.reduce_mean(total_loss)
 44 | 
 45 | def training(cost, global_step):
 46 |     with tf.variable_scope("training"):
 47 |         summary_op = tf.scalar_summary("cost", cost)
 48 |         optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 49 |         train_op = optimizer.minimize(cost, global_step=global_step)
 50 |         return train_op, summary_op
 51 | 
 52 | def validation(embedding_matrix, x_val):
 53 |     norm = tf.reduce_sum(embedding_matrix**2, 1, keep_dims=True)**0.5
 54 |     normalized = embedding_matrix/norm
 55 |     val_embeddings = tf.nn.embedding_lookup(normalized, x_val)
 56 |     cosine_similarity = tf.matmul(val_embeddings, normalized, transpose_b=True)
 57 |     return normalized, cosine_similarity
 58 | 
 59 | if __name__ == '__main__':
 60 | 
 61 |     with tf.Graph().as_default():
 62 | 
 63 |         with tf.variable_scope("skipgram_model"):
 64 | 
 65 |             x = tf.placeholder(tf.int32, shape=[batch_size])
 66 |             y = tf.placeholder(tf.int32, [batch_size, 1])
 67 |             val = tf.constant(val_examples, dtype=tf.int32)
 68 |             global_step = tf.Variable(0, name='global_step', trainable=False)
 69 | 
 70 |             e_lookup, e_matrix = embedding_layer(x, [data.vocabulary_size, embedding_size])
 71 | 
 72 |             cost = noise_contrastive_loss(e_lookup, [data.vocabulary_size, embedding_size], [data.vocabulary_size], y)
 73 | 
 74 |             train_op, summary_op = training(cost, global_step)
 75 | 
 76 |             val_op = validation(e_matrix, val)
 77 | 
 78 |             sess = tf.Session()
 79 | 
 80 |             train_writer = tf.train.SummaryWriter("skipgram_logs/", graph=sess.graph)
 81 | 
 82 |             init_op = tf.initialize_all_variables()
 83 | 
 84 |             sess.run(init_op)
 85 | 
 86 |             step = 0
 87 |             avg_cost = 0
 88 | 
 89 |             for epoch in xrange(training_epochs):
 90 |                 for minibatch in xrange(batches_per_epoch):
 91 | 
 92 |                     step +=1
 93 | 
 94 |                     minibatch_x, minibatch_y = data.generate_batch(batch_size, num_skips, skip_window)
 95 |                     feed_dict = {x : minibatch_x, y : minibatch_y}
 96 | 
 97 |                     _, new_cost, train_summary = sess.run([train_op, cost, summary_op], feed_dict=feed_dict)
 98 |                     train_writer.add_summary(train_summary, sess.run(global_step))
 99 |                     # Compute average loss
100 |                     avg_cost += new_cost/display_step
101 | 
102 |                     if step % display_step == 0:
103 |                         print "Elapsed:", str(step), "batches. Cost =", "{:.9f}".format(avg_cost)
104 |                         avg_cost = 0
105 | 
106 |                     if step % val_step == 0:
107 |                         _, similarity = sess.run(val_op)
108 |                         for i in xrange(val_size):
109 |                             val_word = data.reverse_dictionary[val_examples[i]]
110 |                             neighbors = (-similarity[i, :]).argsort()[1:top_match+1]
111 |                             print_str = "Nearest neighbor of %s:" % val_word
112 |                             for k in xrange(top_match):
113 |                                 print_str += " %s," % data.reverse_dictionary[neighbors[k]]
114 |                             print print_str[:-1]
115 | 
116 |             final_embeddings, _ = sess.run(val_op)
117 | 
118 | 
119 |     tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
120 |     plot_embeddings = np.asfarray(final_embeddings[:plot_num,:], dtype='float')
121 |     low_dim_embs = tsne.fit_transform(plot_embeddings)
122 |     labels = [reverse_dictionary[i] for i in xrange(plot_only)]
123 |     data.plot_with_labels(low_dim_embs, labels)
124 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/text8.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/text8.zip


--------------------------------------------------------------------------------
/first_edition_archive/archive/tsne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/archive/tsne.png


--------------------------------------------------------------------------------
/first_edition_archive/archive/twitter_lstm.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import uuid
  3 | import os
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from tensorflow.python import control_flow_ops
  7 | from tensorflow.python.ops.rnn import dynamic_rnn
  8 | from tensorflow.python.ops.rnn_cell import MultiRNNCell
  9 | from lstm import LSTMCell, BNLSTMCell, orthogonal_initializer
 10 | import read_tweet_data as data
 11 | from sklearn.metrics import confusion_matrix
 12 | 
 13 | batch_size = 256
 14 | hidden_size = 32
 15 | 
 16 | def layer_batch_norm(x, n_out, phase_train):
 17 |     beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32)
 18 |     gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32)
 19 | 
 20 |     beta = tf.get_variable("beta", [n_out], initializer=beta_init)
 21 |     gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init)
 22 | 
 23 |     batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
 24 |     ema = tf.train.ExponentialMovingAverage(decay=0.9)
 25 |     ema_apply_op = ema.apply([batch_mean, batch_var])
 26 |     ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
 27 |     def mean_var_with_update():
 28 |         with tf.control_dependencies([ema_apply_op]):
 29 |             return tf.identity(batch_mean), tf.identity(batch_var)
 30 |     mean, var = control_flow_ops.cond(phase_train,
 31 |         mean_var_with_update,
 32 |         lambda: (ema_mean, ema_var))
 33 | 
 34 |     reshaped_x = tf.reshape(x, [-1, 1, 1, n_out])
 35 |     normed = tf.nn.batch_norm_with_global_normalization(reshaped_x, mean, var,
 36 |         beta, gamma, 1e-3, True)
 37 |     return tf.reshape(normed, [-1, n_out])
 38 | 
 39 | def layer(input, weight_shape, bias_shape, phase_train):
 40 |     weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
 41 |     bias_init = tf.constant_initializer(value=0)
 42 |     W = tf.get_variable("W", weight_shape,
 43 |                         initializer=weight_init)
 44 |     b = tf.get_variable("b", bias_shape,
 45 |                         initializer=bias_init)
 46 |     logits = tf.matmul(input, W) + b
 47 |     return tf.nn.sigmoid(layer_batch_norm(logits, weight_shape[1], phase_train))
 48 | 
 49 | with tf.device('/gpu:0'):
 50 |     x_inp = tf.placeholder(tf.float32, [None, 200, 155])
 51 |     training = tf.placeholder(tf.bool)
 52 | 
 53 |     lstm = BNLSTMCell(hidden_size, training)
 54 | 
 55 |     #c, h
 56 |     initialState = (
 57 |         tf.random_normal([batch_size, hidden_size], stddev=0.1),
 58 |         tf.random_normal([batch_size, hidden_size], stddev=0.1))
 59 | 
 60 |     outputs, state = dynamic_rnn(lstm, x_inp, initial_state=initialState)
 61 | 
 62 |     _, final_hidden = state
 63 | 
 64 |     intermediary = layer(final_hidden, [hidden_size, 2], [2], training)
 65 | 
 66 |     y = tf.nn.softmax(intermediary)
 67 | 
 68 |     y_ = tf.placeholder(tf.float32, [None, 2])
 69 | 
 70 |     cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
 71 | 
 72 |     optimizer = tf.train.AdamOptimizer()
 73 |     gvs = optimizer.compute_gradients(cross_entropy)
 74 |     capped_gvs = [(None if grad is None else tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
 75 |     train_step = optimizer.apply_gradients(capped_gvs)
 76 | 
 77 |     correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
 78 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 79 | 
 80 |     # Summaries
 81 |     a_summary = tf.scalar_summary("accuracy", accuracy)
 82 |     tr_acc = tf.scalar_summary("train_accuracy", accuracy)
 83 |     xe_summary = tf.scalar_summary("xe_loss", cross_entropy)
 84 |     val_summary_op = tf.scalar_summary("val_loss", cross_entropy)
 85 |     for (grad, var), (capped_grad, _) in zip(gvs, capped_gvs):
 86 |         if grad is not None:
 87 |             tf.histogram_summary('grad/{}'.format(var.name), capped_grad)
 88 |             tf.histogram_summary('capped_fraction/{}'.format(var.name),
 89 |                 tf.nn.zero_fraction(grad - capped_grad))
 90 |             tf.histogram_summary('weight/{}'.format(var.name), var)
 91 | 
 92 |     init = tf.initialize_all_variables()
 93 | 
 94 |     sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))
 95 |     sess.run(init)
 96 | 
 97 |     logdir = 'airline_logs/'
 98 |     print('logging to ' + logdir)
 99 |     writer = tf.train.SummaryWriter(logdir, sess.graph)
100 | 
101 |     current_time = time.time()
102 |     print("Using population statistics (training: False) at test time gives worse results than batch statistics")
103 | 
104 |     for i in range(100000):
105 |         t_batch_xs, t_batch_ys = data.train.minibatch()
106 |         loss, xe_str, _, train_preds, train_acc = sess.run([cross_entropy, xe_summary, train_step, y, tr_acc], feed_dict={x_inp: t_batch_xs, y_: t_batch_ys, training: True})
107 |         step_time = time.time() - current_time
108 |         writer.add_summary(xe_str, i)
109 |         writer.add_summary(train_acc, i)
110 |         current_time = time.time()
111 |         if i % 100 == 0:
112 |             batch_xs, batch_ys = data.val.minibatch()
113 |             a_str, val_summary, preds = sess.run([a_summary, val_summary_op, y], feed_dict={x_inp: batch_xs, y_: batch_ys, training: False})
114 | 
115 | 
116 |             print train_preds[:10], t_batch_ys[:10]
117 | 
118 | 
119 |             cnf_matrix = confusion_matrix(np.argmax(train_preds, axis=1), np.argmax(t_batch_ys, axis=1))
120 |             print "Traning Confusion Matrix:", cnf_matrix.tolist()
121 | 
122 | 
123 |             print preds[:10], batch_ys[:10]
124 | 
125 | 
126 |             cnf_matrix = confusion_matrix(np.argmax(preds, axis=1), np.argmax(batch_ys, axis=1))
127 |             print "Validation Confusion Matrix:", cnf_matrix.tolist()
128 |             writer.add_summary(a_str, i)
129 |             writer.add_summary(val_summary, i)
130 |         print(loss, step_time)
131 | 


--------------------------------------------------------------------------------
/first_edition_archive/archive/word2vec_fast.py:
--------------------------------------------------------------------------------
 1 | import dbm, os
 2 | import cPickle as pickle
 3 | from gensim.models import Word2Vec
 4 | import numpy as np
 5 | 
 6 | def save_model(model, directory):
 7 |     model.init_sims() # making sure syn0norm is initialised
 8 |     if not os.path.exists(directory):
 9 |         os.makedirs(directory)
10 |     # Saving indexes as DBM'ed dictionary
11 |     word_to_index = dbm.open(os.path.join(directory, 'word_to_index'), 'n')
12 |     index_to_word = dbm.open(os.path.join(directory, 'index_to_word'), 'n')
13 |     for key in model.vocab.keys():
14 |         word_to_index[key.encode('utf8')] = pickle.dumps(model.vocab[key])
15 |         index_to_word[str(model.vocab[key].index)] = key.encode('utf8')
16 |     word_to_index.close()
17 |     index_to_word.close()
18 |     # Memory-mapping normalised word vectors
19 |     syn0norm_m = np.memmap(os.path.join(directory, 'syn0norm.dat'), dtype='float32', mode='w+', shape=model.syn0norm.shape)
20 |     syn0norm_m[:] = model.syn0norm[:]
21 |     syn0norm_m.flush()
22 |     # And pickling model object, witout data
23 |     vocab, syn0norm, syn0, index2word = model.vocab, model.syn0norm, model.syn0, model.index2word
24 |     model.vocab, model.syn0norm, model.syn0, model.index2word = None, None, None, None
25 |     model_f = open(os.path.join(directory, 'model.pickle'), 'w')
26 |     pickle.dump(model, model_f)
27 |     model_f.close()
28 |     model.vocab, model.syn0norm, model.syn0, model.index2word = vocab, syn0norm, syn0, index2word
29 | 
30 | def load_model(directory):
31 |     model = pickle.load(open(os.path.join(directory, 'model.pickle')))
32 |     model.vocab = DBMPickledDict(os.path.join(directory, 'word_to_index'))
33 |     model.index2word = DBMPickledDict(os.path.join(directory, 'index_to_word'))
34 |     model.syn0norm = np.memmap(os.path.join(directory, 'syn0norm.dat'), dtype='float32', mode='r', shape=(len(model.vocab.keys()), model.layer1_size))
35 |     model.syn0 = model.syn0norm
36 |     return model
37 | 
38 | 
39 | class DBMPickledDict(dict):
40 |     def __init__(self, dbm_file):
41 |         self._dbm = dbm.open(dbm_file, 'r')
42 |     def __setitem__(self, key, value):
43 |         raise Exception("Read-only vocabulary")
44 |     def __delitem__(self, key):
45 |         raise Exception("Read-only vocabulary")
46 |     def __iter__(self):
47 |         return iter(self._dbm.keys())
48 |     def __len__(self):
49 |         return len(self._dbm)
50 |     def __contains__(self, key):
51 |         if isinstance(key, int):
52 |             key = str(key)
53 |         return key in self._dbm
54 |     def __getitem__(self, key):
55 |         if isinstance(key, int):
56 |             key = str(key)
57 |             return self._dbm[key]
58 |         else:
59 |             return pickle.loads(self._dbm[key])
60 |     def keys(self):
61 |         return self._dbm.keys()
62 |     def values(self):
63 |         return [self._dbm[key] for key in self._dbm.keys()]
64 |     def itervalues(self):
65 |         return (self._dbm[key] for key in self._dbm.keys())
66 | 


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/__init__.py


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter3/__init__.py


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter3/logistic_regression_updated.py:
--------------------------------------------------------------------------------
  1 | from fdl_examples.datatools import input_data
  2 | mnist = input_data.read_data_sets("../../data/", one_hot=True)
  3 | 
  4 | import tensorflow as tf
  5 | import time, shutil, os
  6 | from fdl_examples.chapter3.multilayer_perceptron import inference, loss
  7 | 
  8 | # Parameters
  9 | learning_rate = 0.01
 10 | training_epochs = 60
 11 | batch_size = 100
 12 | display_step = 1
 13 | 
 14 | def inference(x):
 15 |     init = tf.constant_initializer(value=0)
 16 |     W = tf.get_variable("W", [784, 10],
 17 |                          initializer=init)
 18 |     b = tf.get_variable("b", [10],
 19 |                          initializer=init)
 20 |     output = tf.nn.softmax(tf.matmul(x, W) + b)
 21 | 
 22 |     w_hist = tf.summary.histogram("weights", W)
 23 |     b_hist = tf.summary.histogram("biases", b)
 24 |     y_hist = tf.summary.histogram("output", output)
 25 | 
 26 |     return output
 27 | 
 28 | def loss(output, y):
 29 |     dot_product = y * tf.log(output)
 30 | 
 31 |     # Reduction along axis 0 collapses each column into a single
 32 |     # value, whereas reduction along axis 1 collapses each row 
 33 |     # into a single value. In general, reduction along axis i 
 34 |     # collapses the ith dimension of a tensor to size 1.
 35 |     xentropy = -tf.reduce_sum(dot_product, axis=1)
 36 |      
 37 |     loss = tf.reduce_mean(xentropy)
 38 | 
 39 |     return loss
 40 | 
 41 | def training(cost, global_step):
 42 | 
 43 |     tf.summary.scalar("cost", cost)
 44 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 45 |     train_op = optimizer.minimize(cost, global_step=global_step)
 46 | 
 47 |     return train_op
 48 | 
 49 | 
 50 | def evaluate(output, y):
 51 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 52 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 53 | 
 54 |     tf.summary.scalar("validation error", (1.0 - accuracy))
 55 | 
 56 |     return accuracy
 57 | 
 58 | if __name__ == '__main__':
 59 |     if os.path.exists("logistic_logs/"):
 60 |         shutil.rmtree("logistic_logs/")
 61 | 
 62 |     with tf.Graph().as_default():
 63 | 
 64 |         x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
 65 |         y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
 66 | 
 67 | 
 68 |         output = inference(x)
 69 | 
 70 |         cost = loss(output, y)
 71 | 
 72 |         global_step = tf.Variable(0, name='global_step', trainable=False)
 73 | 
 74 |         train_op = training(cost, global_step)
 75 | 
 76 |         eval_op = evaluate(output, y)
 77 | 
 78 |         summary_op = tf.summary.merge_all()
 79 | 
 80 |         saver = tf.train.Saver()
 81 | 
 82 |         sess = tf.Session()
 83 | 
 84 |         summary_writer = tf.summary.FileWriter("logistic_logs/",
 85 |                                             graph_def=sess.graph_def)
 86 | 
 87 |         
 88 |         init_op = tf.global_variables_initializer()
 89 | 
 90 |         sess.run(init_op)
 91 | 
 92 | 
 93 |         # Training cycle
 94 |         for epoch in range(training_epochs):
 95 | 
 96 |             avg_cost = 0.
 97 |             total_batch = int(mnist.train.num_examples/batch_size)
 98 |             # Loop over all batches
 99 |             for i in range(total_batch):
100 |                 minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
101 |                 # Fit training using batch data
102 |                 sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})
103 |                 # Compute average loss
104 |                 avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch
105 |             # Display logs per epoch step
106 |             if epoch % display_step == 0:
107 |                 print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost))
108 | 
109 |                 accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels})
110 | 
111 |                 print("Validation Error:", (1 - accuracy))
112 | 
113 |                 summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
114 |                 summary_writer.add_summary(summary_str, sess.run(global_step))
115 | 
116 |                 saver.save(sess, "logistic_logs/model-checkpoint", global_step=global_step)
117 | 
118 | 
119 |         print("Optimization Finished!")
120 | 
121 | 
122 |         accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels})
123 | 
124 |         print("Test Accuracy:", accuracy)
125 | 


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter3/multilayer_perceptron_updated.py:
--------------------------------------------------------------------------------
  1 | from fdl_examples.datatools import input_data
  2 | mnist = input_data.read_data_sets("../../data/", one_hot=True)
  3 | 
  4 | import tensorflow as tf
  5 | import time, shutil, os
  6 | 
  7 | # Architecture
  8 | n_hidden_1 = 256
  9 | n_hidden_2 = 256
 10 | 
 11 | # Parameters
 12 | learning_rate = 0.01
 13 | training_epochs = 1000
 14 | batch_size = 100
 15 | display_step = 1
 16 | 
 17 | def layer(input, weight_shape, bias_shape):
 18 |     weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
 19 |     bias_init = tf.constant_initializer(value=0)
 20 |     W = tf.get_variable("W", weight_shape,
 21 |                         initializer=weight_init)
 22 |     b = tf.get_variable("b", bias_shape,
 23 |                         initializer=bias_init)
 24 |     return tf.nn.relu(tf.matmul(input, W) + b)
 25 | 
 26 | def inference(x):
 27 |     with tf.variable_scope("hidden_1"):
 28 |         hidden_1 = layer(x, [784, n_hidden_1], [n_hidden_1])
 29 |      
 30 |     with tf.variable_scope("hidden_2"):
 31 |         hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2])
 32 |      
 33 |     with tf.variable_scope("output"):
 34 |         output = layer(hidden_2, [n_hidden_2, 10], [10])
 35 | 
 36 |     return output
 37 | 
 38 | def loss(output, y):
 39 |     xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y)    
 40 |     loss = tf.reduce_mean(xentropy)
 41 |     return loss
 42 | 
 43 | def training(cost, global_step):
 44 |     tf.summary.scalar("cost", cost)
 45 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 46 |     train_op = optimizer.minimize(cost, global_step=global_step)
 47 |     return train_op
 48 | 
 49 | 
 50 | def evaluate(output, y):
 51 |     correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
 52 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 53 |     tf.summary.scalar("validation", accuracy)
 54 |     return accuracy
 55 | 
 56 | if __name__ == '__main__':
 57 |     
 58 |     if os.path.exists("mlp_logs/"):
 59 |         shutil.rmtree("mlp_logs/")
 60 | 
 61 |     with tf.Graph().as_default():
 62 | 
 63 |         with tf.variable_scope("mlp_model"):
 64 | 
 65 |             x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
 66 |             y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
 67 | 
 68 | 
 69 |             output = inference(x)
 70 | 
 71 |             cost = loss(output, y)
 72 | 
 73 |             global_step = tf.Variable(0, name='global_step', trainable=False)
 74 | 
 75 |             train_op = training(cost, global_step)
 76 | 
 77 |             eval_op = evaluate(output, y)
 78 | 
 79 |             summary_op = tf.summary.merge_all()
 80 | 
 81 |             saver = tf.train.Saver()
 82 | 
 83 |             sess = tf.Session()
 84 | 
 85 |             summary_writer = tf.summary.FileWriter("mlp_logs/",
 86 |                                                 graph_def=sess.graph_def)
 87 | 
 88 |             
 89 |             init_op = tf.global_variables_initializer()
 90 | 
 91 |             sess.run(init_op)
 92 | 
 93 |             # saver.restore(sess, "mlp_logs/model-checkpoint-66000")
 94 | 
 95 | 
 96 |             # Training cycle
 97 |             for epoch in range(training_epochs):
 98 | 
 99 |                 avg_cost = 0.
100 |                 total_batch = int(mnist.train.num_examples/batch_size)
101 |                 # Loop over all batches
102 |                 for i in range(total_batch):
103 |                     minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
104 |                     # Fit training using batch data
105 |                     sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})
106 |                     # Compute average loss
107 |                     avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch
108 |                 # Display logs per epoch step
109 |                 if epoch % display_step == 0:
110 |                     print("Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost))
111 | 
112 |                     accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels})
113 | 
114 |                     print("Validation Error:", (1 - accuracy))
115 | 
116 |                     summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
117 |                     summary_writer.add_summary(summary_str, sess.run(global_step))
118 | 
119 |                     saver.save(sess, "mlp_logs/model-checkpoint", global_step=global_step)
120 | 
121 | 
122 |             print("Optimization Finished!")
123 | 
124 | 
125 |             accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels})
126 | 
127 |             print("Test Accuracy:", accuracy)
128 | 


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/__init__.py


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model-checkpoint-550000"
2 | all_model_checkpoint_paths: "model-checkpoint-547800"
3 | all_model_checkpoint_paths: "model-checkpoint-548350"
4 | all_model_checkpoint_paths: "model-checkpoint-548900"
5 | all_model_checkpoint_paths: "model-checkpoint-549450"
6 | all_model_checkpoint_paths: "model-checkpoint-550000"
7 | 


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/events.out.tfevents.1503341411.Nikhils-MacBook-Pro.local:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/events.out.tfevents.1503341411.Nikhils-MacBook-Pro.local


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.data-00000-of-00001


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.index


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-547800.meta


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.data-00000-of-00001


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.index


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548350.meta


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.data-00000-of-00001


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.index


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-548900.meta


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.data-00000-of-00001


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.index


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-549450.meta


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.data-00000-of-00001


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.index


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter4/frozen_mlp_checkpoint/model-checkpoint-550000.meta


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter4/linear_interpolation_updated.py:
--------------------------------------------------------------------------------
 1 | from fdl_examples.datatools import input_data
 2 | mnist = input_data.read_data_sets("../../data/", one_hot=True)
 3 | 
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | from fdl_examples.chapter3.multilayer_perceptron_updated import inference, loss
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | 
10 | sess = tf.Session()
11 | 
12 | x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
13 | y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
14 | 
15 | 	
16 | saver = tf.train.import_meta_graph('frozen_mlp_checkpoint/model-checkpoint-547800.meta')
17 | saver.restore(sess, 'frozen_mlp_checkpoint/model-checkpoint-547800')
18 | 
19 | var_list_opt = [None, None, None, None, None, None]
20 | name_2_index = {
21 | 	"mlp_model/hidden_1/W:0" : 0,
22 | 	"mlp_model/hidden_1/b:0" : 1,
23 | 	"mlp_model/hidden_2/W:0" : 2,
24 | 	"mlp_model/hidden_2/b:0" : 3,
25 | 	"mlp_model/output/W:0" : 4,
26 | 	"mlp_model/output/b:0" : 5
27 | }
28 | 
29 | for x in tf.trainable_variables():
30 | 	if x.name in name_2_index:
31 | 		index = name_2_index[x.name]
32 | 		var_list_opt[index] = x
33 | 
34 | 
35 | 
36 | with tf.variable_scope("mlp_init") as scope:
37 | 
38 | 	output_rand = inference(x)
39 | 	cost_rand = loss(output_rand, y)
40 | 
41 | 	scope.reuse_variables()
42 | 
43 | 	var_list_rand = ["hidden_1/W", "hidden_1/b", "hidden_2/W", "hidden_2/b", "output/W", "output/b"]
44 | 	var_list_rand = [tf.get_variable(v) for v in var_list_rand]
45 | 
46 | 	init_op = tf.variables_initializer(var_list_rand)
47 | 
48 | 	sess.run(init_op)
49 | 
50 | 
51 | feed_dict = {
52 | 	x: mnist.test.images,
53 | 	y: mnist.test.labels,
54 | }
55 | 
56 | print(sess.run([cost_opt, cost_rand], feed_dict=feed_dict))
57 | 
58 | with tf.variable_scope("mlp_inter") as scope:
59 | 
60 | 	alpha = tf.placeholder("float", [1, 1])
61 | 
62 | 	h1_W_inter = var_list_opt[0] * (1 - alpha) + var_list_rand[0] * (alpha)
63 | 	h1_b_inter = var_list_opt[1] * (1 - alpha) + var_list_rand[1] * (alpha)
64 | 	h2_W_inter = var_list_opt[2] * (1 - alpha) + var_list_rand[2] * (alpha)
65 | 	h2_b_inter = var_list_opt[3] * (1 - alpha) + var_list_rand[3] * (alpha)
66 | 	o_W_inter = var_list_opt[4] * (1 - alpha) + var_list_rand[4] * (alpha)
67 | 	o_b_inter = var_list_opt[5] * (1 - alpha) + var_list_rand[5] * (alpha)
68 | 
69 | 	h1_inter = tf.nn.relu(tf.matmul(x, h1_W_inter) + h1_b_inter)
70 | 	h2_inter = tf.nn.relu(tf.matmul(h1_inter, h2_W_inter) + h2_b_inter)
71 | 	o_inter = tf.nn.relu(tf.matmul(h2_inter, o_W_inter) + o_b_inter)
72 | 
73 | 	cost_inter = loss(o_inter, y)
74 | 	tf.summary.scalar("interpolated_cost", cost_inter)
75 | 
76 | 
77 | summary_writer = tf.summary.FileWriter("linear_interp_logs/",
78 |                                         graph_def=sess.graph_def)
79 | summary_op = tf.summary.merge_all()
80 | results = []
81 | for a in np.arange(-2, 2, 0.01):
82 | 	feed_dict = {
83 | 		x: mnist.test.images,
84 | 		y: mnist.test.labels,
85 | 		alpha: [[a]],
86 | 	}
87 | 
88 | 	cost, summary_str = sess.run([cost_inter, summary_op], feed_dict=feed_dict)
89 | 	summary_writer.add_summary(summary_str, (a + 2)/0.01)
90 | 	results.append(cost)
91 | 
92 | plt.plot(np.arange(-2, 2, 0.01), results, 'ro')
93 | plt.ylabel('Incurred Error')
94 | plt.xlabel('Alpha')
95 | plt.show()
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/chapter9/dqn_plot_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/chapter9/dqn_plot_final.png


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/datatools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darksigma/Fundamentals-of-Deep-Learning-Book/4cf31ac897f280512ae76d1f777fd473ba278ae8/first_edition_archive/fdl_examples/datatools/__init__.py


--------------------------------------------------------------------------------
/first_edition_archive/fdl_examples/datatools/input_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Functions for downloading and reading MNIST data."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | import gzip
 20 | import os
 21 | import tensorflow.python.platform
 22 | import numpy
 23 | from six.moves import urllib
 24 | from six.moves import xrange  # pylint: disable=redefined-builtin
 25 | import tensorflow as tf
 26 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
 27 | def maybe_download(filename, work_directory):
 28 |   """Download the data from Yann's website, unless it's already here."""
 29 |   if not os.path.exists(work_directory):
 30 |     os.mkdir(work_directory)
 31 |   filepath = os.path.join(work_directory, filename)
 32 |   if not os.path.exists(filepath):
 33 |     filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)
 34 |     statinfo = os.stat(filepath)
 35 |     print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
 36 |   return filepath
 37 | def _read32(bytestream):
 38 |   dt = numpy.dtype(numpy.uint32).newbyteorder('>')
 39 |   return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
 40 | def extract_images(filename):
 41 |   """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
 42 |   print('Extracting', filename)
 43 |   with gzip.open(filename) as bytestream:
 44 |     magic = _read32(bytestream)
 45 |     if magic != 2051:
 46 |       raise ValueError(
 47 |           'Invalid magic number %d in MNIST image file: %s' %
 48 |           (magic, filename))
 49 |     num_images = _read32(bytestream)
 50 |     rows = _read32(bytestream)
 51 |     cols = _read32(bytestream)
 52 |     buf = bytestream.read(rows * cols * num_images)
 53 |     data = numpy.frombuffer(buf, dtype=numpy.uint8)
 54 |     data = data.reshape(num_images, rows, cols, 1)
 55 |     return data
 56 | def dense_to_one_hot(labels_dense, num_classes=10):
 57 |   """Convert class labels from scalars to one-hot vectors."""
 58 |   num_labels = labels_dense.shape[0]
 59 |   index_offset = numpy.arange(num_labels) * num_classes
 60 |   labels_one_hot = numpy.zeros((num_labels, num_classes))
 61 |   labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
 62 |   return labels_one_hot
 63 | def extract_labels(filename, one_hot=False):
 64 |   """Extract the labels into a 1D uint8 numpy array [index]."""
 65 |   print('Extracting', filename)
 66 |   with gzip.open(filename) as bytestream:
 67 |     magic = _read32(bytestream)
 68 |     if magic != 2049:
 69 |       raise ValueError(
 70 |           'Invalid magic number %d in MNIST label file: %s' %
 71 |           (magic, filename))
 72 |     num_items = _read32(bytestream)
 73 |     buf = bytestream.read(num_items)
 74 |     labels = numpy.frombuffer(buf, dtype=numpy.uint8)
 75 |     if one_hot:
 76 |       return dense_to_one_hot(labels)
 77 |     return labels
 78 | class DataSet(object):
 79 |   def __init__(self, images, labels, fake_data=False, one_hot=False,
 80 |                dtype=tf.float32):
 81 |     """Construct a DataSet.
 82 |     one_hot arg is used only if fake_data is true.  `dtype` can be either
 83 |     `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into
 84 |     `[0, 1]`.
 85 |     """
 86 |     dtype = tf.as_dtype(dtype).base_dtype
 87 |     if dtype not in (tf.uint8, tf.float32):
 88 |       raise TypeError('Invalid image dtype %r, expected uint8 or float32' %
 89 |                       dtype)
 90 |     if fake_data:
 91 |       self._num_examples = 10000
 92 |       self.one_hot = one_hot
 93 |     else:
 94 |       assert images.shape[0] == labels.shape[0], (
 95 |           'images.shape: %s labels.shape: %s' % (images.shape,
 96 |                                                  labels.shape))
 97 |       self._num_examples = images.shape[0]
 98 |       # Convert shape from [num examples, rows, columns, depth]
 99 |       # to [num examples, rows*columns] (assuming depth == 1)
100 |       assert images.shape[3] == 1
101 |       images = images.reshape(images.shape[0],
102 |                               images.shape[1] * images.shape[2])
103 |       if dtype == tf.float32:
104 |         # Convert from [0, 255] -> [0.0, 1.0].
105 |         images = images.astype(numpy.float32)
106 |         images = numpy.multiply(images, 1.0 / 255.0)
107 |     self._images = images
108 |     self._labels = labels
109 |     self._epochs_completed = 0
110 |     self._index_in_epoch = 0
111 |   @property
112 |   def images(self):
113 |     return self._images
114 |   @property
115 |   def labels(self):
116 |     return self._labels
117 |   @property
118 |   def num_examples(self):
119 |     return self._num_examples
120 |   @property
121 |   def epochs_completed(self):
122 |     return self._epochs_completed
123 |   def next_batch(self, batch_size, fake_data=False):
124 |     """Return the next `batch_size` examples from this data set."""
125 |     if fake_data:
126 |       fake_image = [1] * 784
127 |       if self.one_hot:
128 |         fake_label = [1] + [0] * 9
129 |       else:
130 |         fake_label = 0
131 |       return [fake_image for _ in xrange(batch_size)], [
132 |           fake_label for _ in xrange(batch_size)]
133 |     start = self._index_in_epoch
134 |     self._index_in_epoch += batch_size
135 |     if self._index_in_epoch > self._num_examples:
136 |       # Finished epoch
137 |       self._epochs_completed += 1
138 |       # Shuffle the data
139 |       perm = numpy.arange(self._num_examples)
140 |       numpy.random.shuffle(perm)
141 |       self._images = self._images[perm]
142 |       self._labels = self._labels[perm]
143 |       # Start next epoch
144 |       start = 0
145 |       self._index_in_epoch = batch_size
146 |       assert batch_size <= self._num_examples
147 |     end = self._index_in_epoch
148 |     return self._images[start:end], self._labels[start:end]
149 | def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32):
150 |   class DataSets(object):
151 |     pass
152 |   data_sets = DataSets()
153 |   if fake_data:
154 |     def fake():
155 |       return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)
156 |     data_sets.train = fake()
157 |     data_sets.validation = fake()
158 |     data_sets.test = fake()
159 |     return data_sets
160 |   TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
161 |   TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
162 |   TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
163 |   TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
164 |   VALIDATION_SIZE = 5000
165 |   local_file = maybe_download(TRAIN_IMAGES, train_dir)
166 |   train_images = extract_images(local_file)
167 |   local_file = maybe_download(TRAIN_LABELS, train_dir)
168 |   train_labels = extract_labels(local_file, one_hot=one_hot)
169 |   local_file = maybe_download(TEST_IMAGES, train_dir)
170 |   test_images = extract_images(local_file)
171 |   local_file = maybe_download(TEST_LABELS, train_dir)
172 |   test_labels = extract_labels(local_file, one_hot=one_hot)
173 |   validation_images = train_images[:VALIDATION_SIZE]
174 |   validation_labels = train_labels[:VALIDATION_SIZE]
175 |   train_images = train_images[VALIDATION_SIZE:]
176 |   train_labels = train_labels[VALIDATION_SIZE:]
177 |   data_sets.train = DataSet(train_images, train_labels, dtype=dtype)
178 |   data_sets.validation = DataSet(validation_images, validation_labels,
179 |                                  dtype=dtype)
180 |   data_sets.test = DataSet(test_images, test_labels, dtype=dtype)
181 |   return data_sets


--------------------------------------------------------------------------------