├── .gitignore
├── .ipynb_checkpoints
    ├── DAE_opendeep-checkpoint.ipynb
    ├── DAE_theano-checkpoint.ipynb
    ├── DAE_theano_with_comments-checkpoint.ipynb
    ├── MLP_opendeep-checkpoint.ipynb
    ├── MLP_theano-checkpoint.ipynb
    ├── MLP_theano_with_comments-checkpoint.ipynb
    └── RNN-GSN_opendeep-checkpoint.ipynb
├── DAE_opendeep.ipynb
├── DAE_theano.ipynb
├── DAE_theano_with_comments.ipynb
├── LICENSE
├── MLP_opendeep.ipynb
├── MLP_theano.ipynb
├── MLP_theano_with_comments.ipynb
├── README.md
├── RNN-GSN_opendeep.ipynb
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | data/
 2 | outputs/
 3 | datasets/
 4 | 
 5 | *.pkl
 6 | *.png
 7 | *.jpg
 8 | 
 9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 | 
13 | # C extensions
14 | *.so
15 | 
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | 
34 | # PyInstaller
35 | #  Usually these files are written by a python script from a template
36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | 
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *,cover
53 | 
54 | # Translations
55 | *.mo
56 | *.pot
57 | 
58 | # Django stuff:
59 | *.log
60 | 
61 | # Sphinx documentation
62 | docs/_build/
63 | 
64 | # PyBuilder
65 | target/
66 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/DAE_opendeep-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# imports and logger!\n",
 12 |     "from opendeep.log import config_root_logger\n",
 13 |     "from opendeep.models import GSN\n",
 14 |     "from opendeep.optimization import SGD\n",
 15 |     "from opendeep.data import MNIST\n",
 16 |     "from opendeep.utils.misc import closest_to_square_factors\n",
 17 |     "from PIL import Image as pil_img\n",
 18 |     "from opendeep.utils.image import tile_raster_images\n",
 19 |     "import numpy\n",
 20 |     "\n",
 21 |     "config_root_logger()"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# A denoising autoencoder (DAE) is a special case of a generative stochastic network (GSN).\n",
 33 |     "# GSN's can have multiple denoising layers that interact with each other both above and below.\n",
 34 |     "dae = GSN(\n",
 35 |     "    input_size=28*28,\n",
 36 |     "    hidden_size=1000,\n",
 37 |     "    visible_activation='sigmoid',\n",
 38 |     "    hidden_activation='tanh',\n",
 39 |     "    layers=1,\n",
 40 |     "    walkbacks=3,\n",
 41 |     "    input_noise='salt_and_pepper',\n",
 42 |     "    input_noise_level=0.3\n",
 43 |     ")\n"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# Create the MNIST data object\n",
 55 |     "mnist = MNIST(concat_train_valid=True)\n",
 56 |     "\n",
 57 |     "# Create the optimizer object\n",
 58 |     "optimizer = SGD(dataset=mnist,\n",
 59 |     "                epochs=40, \n",
 60 |     "                batch_size=100, \n",
 61 |     "                learning_rate=.25,\n",
 62 |     "                lr_decay='exponential',\n",
 63 |     "                lr_decay_factor=.995,\n",
 64 |     "                momentum=.5,\n",
 65 |     "                nesterov_momentum=False)\n"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "# Train the model with the optimizer on the mnist dataset!\n",
 77 |     "dae.train(optimizer)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "# Run some numbers to see the output\n",
 89 |     "n_examples = 100\n",
 90 |     "xs_test = mnist.test_inputs[:n_examples]\n",
 91 |     "noisy_xs_test = dae.f_noise(xs_test)\n",
 92 |     "reconstructed = dae.run(noisy_xs_test)\n",
 93 |     "# Concatenate stuff\n",
 94 |     "stacked = numpy.vstack(\n",
 95 |     "    [numpy.vstack([xs_test[i * 10: (i + 1) * 10],\n",
 96 |     "                   noisy_xs_test[i * 10: (i + 1) * 10],\n",
 97 |     "                   reconstructed[i * 10: (i + 1) * 10]])\n",
 98 |     "     for i in range(10)])\n",
 99 |     "number_reconstruction = pil_img.fromarray(\n",
100 |     "    tile_raster_images(stacked, (dae.image_height, dae.image_width), (10, 30), (1, 1))\n",
101 |     ")\n",
102 |     "\n",
103 |     "number_reconstruction.save(\"dae_opendeep_test.png\")\n",
104 |     "\n",
105 |     "# Construct image from the weight matrix\n",
106 |     "image = pil_img.fromarray(\n",
107 |     "    tile_raster_images(\n",
108 |     "        X=dae.weights_list[0].get_value(borrow=True).T,\n",
109 |     "        img_shape=(28, 28),\n",
110 |     "        tile_shape=closest_to_square_factors(dae.layer_sizes[1]),\n",
111 |     "        tile_spacing=(1, 1)\n",
112 |     "    )\n",
113 |     ")\n",
114 |     "image.save(\"dae_opendeep_filters.png\")\n",
115 |     "print \"Done!\""
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {
122 |     "collapsed": true
123 |    },
124 |    "outputs": [],
125 |    "source": []
126 |   }
127 |  ],
128 |  "metadata": {
129 |   "kernelspec": {
130 |    "display_name": "Python 2",
131 |    "language": "python",
132 |    "name": "python2"
133 |   },
134 |   "language_info": {
135 |    "codemirror_mode": {
136 |     "name": "ipython",
137 |     "version": 2
138 |    },
139 |    "file_extension": ".py",
140 |    "mimetype": "text/x-python",
141 |    "name": "python",
142 |    "nbconvert_exporter": "python",
143 |    "pygments_lexer": "ipython2",
144 |    "version": "2.7.6"
145 |   }
146 |  },
147 |  "nbformat": 4,
148 |  "nbformat_minor": 0
149 | }
150 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/DAE_theano-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n",
 12 |     "\n",
 13 |     "# We are working with MNIST again, this time no labels are necessary - \n",
 14 |     "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n",
 15 |     "\n",
 16 |     "# All imports up here this time\n",
 17 |     "import pickle\n",
 18 |     "import numpy\n",
 19 |     "import numpy.random as rng\n",
 20 |     "import theano\n",
 21 |     "import theano.tensor as T\n",
 22 |     "import theano.sandbox.rng_mrg as RNG_MRG\n",
 23 |     "from utils import tile_raster_images\n",
 24 |     "from PIL import Image as pil_img\n",
 25 |     "from IPython.display import Image\n",
 26 |     "\n",
 27 |     "# Load our data \n",
 28 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
 29 |     "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n",
 30 |     "print \"Shapes:\"\n",
 31 |     "print train_x.shape\n",
 32 |     "print valid_x.shape\n",
 33 |     "print test_x.shape"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# We can specify any hyperparameters to play with up here:\n",
 45 |     "input_size = 784  # 28x28 images\n",
 46 |     "hidden_size = 1000\n",
 47 |     "w_mean = 0.0\n",
 48 |     "w_std = 0.05\n",
 49 |     "w_interval = numpy.sqrt(6. / (input_size + hidden_size))\n",
 50 |     "noise = 0.3\n",
 51 |     "walkbacks = 3\n",
 52 |     "learning_rate = 0.25\n",
 53 |     "lr_decay = .985\n",
 54 |     "batch_size = 100\n",
 55 |     "epochs = 200\n",
 56 |     "check_frequency = 10\n",
 57 |     "\n",
 58 |     "# To make the organization better, lets define all the variables and parameters here.\n",
 59 |     "x = T.matrix('x')\n",
 60 |     "# W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n",
 61 |     "W_x = numpy.asarray(rng.uniform(low=-w_interval, high=w_interval, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n",
 62 |     "W_x = theano.shared(W_x, \"W_x\")\n",
 63 |     "\n",
 64 |     "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n",
 65 |     "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n",
 66 |     "b_x = theano.shared(b_x, \"b_x\")\n",
 67 |     "b_h = theano.shared(b_h, \"b_h\")\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Now for the most important part of a denoising autoencoder - making the input noisy!\n",
 79 |     "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n",
 80 |     "# This makes it more effective for test data by reducing overfitting.\n",
 81 |     "noise_switch = theano.shared(1, \"noise_switch\")\n",
 82 |     "\n",
 83 |     "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n",
 84 |     "def salt_and_pepper(variable):\n",
 85 |     "    mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n",
 86 |     "    saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n",
 87 |     "    ones = T.eq(mask, 0) * saltpepper\n",
 88 |     "    noisy = variable*mask + ones\n",
 89 |     "    return T.switch(noise_switch,\n",
 90 |     "                    noisy,\n",
 91 |     "                    variable)\n"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "# Now we are ready to create the computation graph!\n",
103 |     "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n",
104 |     "\n",
105 |     "inputs=[x]\n",
106 |     "for walkback in range(walkbacks):\n",
107 |     "    noisy_x = salt_and_pepper(inputs[-1])\n",
108 |     "\n",
109 |     "    h = T.tanh(\n",
110 |     "        T.dot(noisy_x, W_x) + b_h\n",
111 |     "    )\n",
112 |     "\n",
113 |     "    reconstruction = T.nnet.sigmoid(\n",
114 |     "        T.dot(h, W_x.T) + b_x\n",
115 |     "    )\n",
116 |     "\n",
117 |     "    inputs.append(reconstruction)\n",
118 |     "    \n",
119 |     "reconstructions = inputs[1:]\n"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {
126 |     "collapsed": false
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n",
131 |     "output = reconstructions[-1]\n",
132 |     "\n",
133 |     "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n",
134 |     "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "parameters = [W_x, b_h, b_x]\n",
146 |     "gradients = T.grad(cost, parameters)\n",
147 |     "\n",
148 |     "lr = theano.shared(numpy.asarray(learning_rate, dtype='float32'), 'lr')\n",
149 |     "train_updates = [(param, param - lr*gradient) for param, gradient in zip(parameters, gradients)]\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": false
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "# Compile our training and testing function like before!\n",
161 |     "# Train function updates the parameters and returns the total train cost to monitor.\n",
162 |     "f_train = theano.function(\n",
163 |     "    inputs=[x], \n",
164 |     "    outputs=cost, \n",
165 |     "    updates=train_updates, \n",
166 |     "    allow_input_downcast=True\n",
167 |     ")\n",
168 |     "\n",
169 |     "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n",
170 |     "f_test = theano.function(\n",
171 |     "    inputs=[x], \n",
172 |     "    outputs=output,\n",
173 |     "    allow_input_downcast=True\n",
174 |     ")\n"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "# That's it! Now perform SGD like before.\n",
186 |     "# Main training loop\n",
187 |     "\n",
188 |     "train_batches = len(train_x) / batch_size\n",
189 |     "\n",
190 |     "try:\n",
191 |     "    for epoch in range(epochs):\n",
192 |     "        print epoch+1,\":\",\n",
193 |     "\n",
194 |     "        # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n",
195 |     "        noise_switch.set_value(1.)\n",
196 |     "\n",
197 |     "        train_costs = []\n",
198 |     "        for i in range(train_batches):\n",
199 |     "            batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
200 |     "\n",
201 |     "            costs = f_train(batch_x)\n",
202 |     "\n",
203 |     "            train_costs.append(costs)\n",
204 |     "        print \"cost:\", numpy.mean(train_costs),\n",
205 |     "        \n",
206 |     "        old_lr = lr.get_value()\n",
207 |     "        print \"\\tlearning rate:\", old_lr\n",
208 |     "        new_lr = numpy.asarray(old_lr * lr_decay, dtype='float32')\n",
209 |     "        lr.set_value(new_lr)\n",
210 |     "\n",
211 |     "        if (epoch+1) % check_frequency == 0:\n",
212 |     "            print \"Saving images...\"\n",
213 |     "            train_recons = f_test(train_x[:25])\n",
214 |     "            train_stacked = numpy.vstack(\n",
215 |     "                [numpy.vstack([\n",
216 |     "                        train_x[i*5:(i+1)*5],\n",
217 |     "                        train_recons[i*5:(i+1)*5]\n",
218 |     "                    ])\n",
219 |     "                 for i in range(5)]\n",
220 |     "            )\n",
221 |     "            train_image = pil_img.fromarray(\n",
222 |     "                tile_raster_images(train_stacked, (28, 28), (5, 10), (1, 1))\n",
223 |     "            )\n",
224 |     "            train_image.save(\"dae_train_%d.png\"%(epoch+1))\n",
225 |     "            \n",
226 |     "            # For validation, let's run a few images through and see the reconstruction \n",
227 |     "            # (with the noise from training still added)\n",
228 |     "            valid_recons = f_test(valid_x[:25])\n",
229 |     "            # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n",
230 |     "            # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n",
231 |     "            valid_stacked = numpy.vstack(\n",
232 |     "                [numpy.vstack([\n",
233 |     "                        valid_x[i*5:(i+1)*5],\n",
234 |     "                        valid_recons[i*5:(i+1)*5]\n",
235 |     "                    ])\n",
236 |     "                 for i in range(5)]\n",
237 |     "            )\n",
238 |     "            valid_image = pil_img.fromarray(\n",
239 |     "                # helper from utils.py\n",
240 |     "                tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n",
241 |     "            )\n",
242 |     "            valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n",
243 |     "\n",
244 |     "            # Now do the same for test, but don't add any noise\n",
245 |     "            # This means set the noise switches to 0. (False)\n",
246 |     "            noise_switch.set_value(0.)\n",
247 |     "\n",
248 |     "            test_recons = f_test(test_x[:25])\n",
249 |     "            test_stacked = numpy.vstack(\n",
250 |     "                [numpy.vstack([\n",
251 |     "                        test_x[i*5:(i+1)*5],\n",
252 |     "                        test_recons[i*5:(i+1)*5]\n",
253 |     "                    ])\n",
254 |     "                 for i in range(5)]\n",
255 |     "            )\n",
256 |     "            test_image = pil_img.fromarray(\n",
257 |     "                tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n",
258 |     "            )\n",
259 |     "            test_image.save(\"dae_test_%d.png\"%(epoch+1))\n",
260 |     "            \n",
261 |     "            weight_filters = pil_img.fromarray(\n",
262 |     "                tile_raster_images(\n",
263 |     "                    W_x.get_value(borrow=True).T,\n",
264 |     "                    img_shape=(28, 28),\n",
265 |     "                    tile_shape=(25, 40),\n",
266 |     "                    tile_spacing=(1, 1)\n",
267 |     "                )\n",
268 |     "            )\n",
269 |     "            weight_filters.save(\"dae_filters_%d.png\"%(epoch+1))\n",
270 |     "except KeyboardInterrupt:\n",
271 |     "    pass        "
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {
278 |     "collapsed": true
279 |    },
280 |    "outputs": [],
281 |    "source": []
282 |   }
283 |  ],
284 |  "metadata": {
285 |   "kernelspec": {
286 |    "display_name": "Python 2",
287 |    "language": "python",
288 |    "name": "python2"
289 |   },
290 |   "language_info": {
291 |    "codemirror_mode": {
292 |     "name": "ipython",
293 |     "version": 2
294 |    },
295 |    "file_extension": ".py",
296 |    "mimetype": "text/x-python",
297 |    "name": "python",
298 |    "nbconvert_exporter": "python",
299 |    "pygments_lexer": "ipython2",
300 |    "version": "2.7.6"
301 |   }
302 |  },
303 |  "nbformat": 4,
304 |  "nbformat_minor": 0
305 | }
306 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/DAE_theano_with_comments-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n",
 12 |     "\n",
 13 |     "# We are working with MNIST again, this time no labels are necessary - \n",
 14 |     "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n",
 15 |     "\n",
 16 |     "# All imports up here this time\n",
 17 |     "import pickle\n",
 18 |     "import numpy\n",
 19 |     "import numpy.random as rng\n",
 20 |     "import theano\n",
 21 |     "import theano.tensor as T\n",
 22 |     "import theano.sandbox.rng_mrg as RNG_MRG\n",
 23 |     "from utils import tile_raster_images\n",
 24 |     "from PIL import Image as pil_img\n",
 25 |     "from IPython.display import Image\n",
 26 |     "\n",
 27 |     "# Load our data \n",
 28 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
 29 |     "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n",
 30 |     "print \"Shapes:\"\n",
 31 |     "print train_x.shape\n",
 32 |     "print valid_x.shape\n",
 33 |     "print test_x.shape"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# The DAE data flow looks like this: input -> input_(add noise) -> hiddens -> input\n",
 45 |     "# This can be repeated by sampling from the reconstructed input. Repeating like that is\n",
 46 |     "# a pseudo Gibbs sampling process. We can define how many times we want to repeat (known as walkbacks).\n",
 47 |     "\n",
 48 |     "# We can specify any hyperparameters to play with up here:\n",
 49 |     "input_size = 784  # 28x28 images\n",
 50 |     "hidden_size = 1000\n",
 51 |     "w_mean = 0.0\n",
 52 |     "w_std = 0.05\n",
 53 |     "noise = 0.3\n",
 54 |     "walkbacks = 3\n",
 55 |     "learning_rate = 0.1\n",
 56 |     "batch_size = 100\n",
 57 |     "epochs = 100\n",
 58 |     "check_frequency = 10\n",
 59 |     "\n",
 60 |     "# To make the organization better, lets define all the variables and parameters here.\n",
 61 |     "# Just like with the MLP, we need a symbolic matrix to input the images\n",
 62 |     "x = T.matrix('x')\n",
 63 |     "# Next, we need the weights matrix W_x. This will be used to go both from input -> hidden and\n",
 64 |     "# hidden -> input (by using its transpose). This is called tied weights.\n",
 65 |     "# Again, initialization has a lot of literature, but we are just goint to stick with gaussian at the moment.\n",
 66 |     "W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n",
 67 |     "# (Don't forget to make parameters into shared variables so they can be updated!)\n",
 68 |     "W_x = theano.shared(W_x, \"W_x\")\n",
 69 |     "# Because we are outputting back into the input space, we also need a bias vector for both the input\n",
 70 |     "# and hidden layers.\n",
 71 |     "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n",
 72 |     "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n",
 73 |     "b_x = theano.shared(b_x, \"b_x\")\n",
 74 |     "b_h = theano.shared(b_h, \"b_h\")\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# Now for the most important part of a denoising autoencoder - making the input noisy!\n",
 86 |     "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n",
 87 |     "# This makes it more effective for test data by reducing overfitting.\n",
 88 |     "\n",
 89 |     "# We deal with adding noise during training but not testing in Theano with a switch variable!\n",
 90 |     "# Switches can be turned on or off to direct data flow in the computation graph - \n",
 91 |     "# so we turn on for train and off for test!\n",
 92 |     "# You guessed it - we need a shared variable to represent the switch condition so we can change it at runtime.\n",
 93 |     "noise_switch = theano.shared(1, \"noise_switch\")\n",
 94 |     "\n",
 95 |     "# One important thing to note - the type of noise has to correspond to the type of input.\n",
 96 |     "# i.e. we can't add real-value noise when the input is expected to be binary\n",
 97 |     "# So for these binary inputs, we will add salt-and-pepper masking noise!\n",
 98 |     "# This is a function so we can keep adding it during the computation chain when we alternate sampling\n",
 99 |     "# from input and reconstructing from hiddens.\n",
100 |     "# Theano random number generator\n",
101 |     "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n",
102 |     "def salt_and_pepper(variable):\n",
103 |     "    mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n",
104 |     "    saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n",
105 |     "    ones = T.eq(mask, 0) * saltpepper\n",
106 |     "    # Randomly set some bits to 0 or 1 with equal probability.\n",
107 |     "    noisy = variable*mask + ones\n",
108 |     "    return T.switch(noise_switch,\n",
109 |     "                    # true condition\n",
110 |     "                    noisy,\n",
111 |     "                    # false condition\n",
112 |     "                    variable)\n"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "# Now we are ready to create the computation graph!\n",
124 |     "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n",
125 |     "\n",
126 |     "inputs=[x]\n",
127 |     "for walkback in range(walkbacks):\n",
128 |     "    # First, we want to corrupt the input x\n",
129 |     "    noisy_x = salt_and_pepper(inputs[-1])\n",
130 |     "    # Now calculate the hiddens\n",
131 |     "    h = T.tanh(\n",
132 |     "        T.dot(noisy_x, W_x) + b_h\n",
133 |     "    )\n",
134 |     "    # From the hiddens, reconstruct x.\n",
135 |     "    # We have to use an appropriate activation function for the type of inputs!\n",
136 |     "    # In our case with MNIST images, it is binary so sigmoid works.\n",
137 |     "    reconstruction = T.nnet.sigmoid(\n",
138 |     "        T.dot(h, W_x.T) + b_x\n",
139 |     "    )\n",
140 |     "    # That is all for an autoencoder!\n",
141 |     "    inputs.append(reconstruction)\n",
142 |     "    \n",
143 |     "# Remove the original input from our reconstructions list\n",
144 |     "reconstructions = inputs[1:]\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n",
156 |     "output = reconstructions[-1]\n",
157 |     "\n",
158 |     "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n",
159 |     "# Again, because our input space is binary, using mean binary cross-entropy is a good analog for \n",
160 |     "# reconstruction error.\n",
161 |     "# For real-valued inputs, we could use mean square error.\n",
162 |     "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "# Just like with the MLP, compute gradient updates for the parameters to use with training.\n",
174 |     "parameters = [W_x, b_h, b_x]\n",
175 |     "# Automagic differentiation! (Still love it)\n",
176 |     "gradients = T.grad(cost, parameters)\n",
177 |     "\n",
178 |     "# Update the parameters for stochastic gradient descent!\n",
179 |     "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "collapsed": false
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "# Compile our training and testing function like before!\n",
191 |     "# Train function updates the parameters and returns the total train cost to monitor.\n",
192 |     "f_train = theano.function(\n",
193 |     "    inputs=[x], \n",
194 |     "    outputs=cost, \n",
195 |     "    updates=train_updates, \n",
196 |     "    allow_input_downcast=True\n",
197 |     ")\n",
198 |     "\n",
199 |     "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n",
200 |     "f_test = theano.function(\n",
201 |     "    inputs=[x], \n",
202 |     "    outputs=output,\n",
203 |     "    allow_input_downcast=True\n",
204 |     ")\n"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": false
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "# That's it! Now perform SGD like before.\n",
216 |     "# Main training loop\n",
217 |     "\n",
218 |     "train_batches = len(train_x) / batch_size\n",
219 |     "\n",
220 |     "try:\n",
221 |     "    for epoch in range(epochs):\n",
222 |     "        print epoch+1, \":\",\n",
223 |     "\n",
224 |     "        # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n",
225 |     "        noise_switch.set_value(1.)\n",
226 |     "\n",
227 |     "        train_costs = []\n",
228 |     "        for i in range(train_batches):\n",
229 |     "            batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
230 |     "\n",
231 |     "            costs = f_train(batch_x)\n",
232 |     "\n",
233 |     "            train_costs.append(costs)\n",
234 |     "        print \"cost:\", numpy.mean(train_costs)\n",
235 |     "\n",
236 |     "        if (epoch+1) % check_frequency == 0:\n",
237 |     "            print \"Saving images...\"\n",
238 |     "            # For validation, let's run a few images through and see the reconstruction \n",
239 |     "            # (with the noise from training still added)\n",
240 |     "            valid_recons = f_test(valid_x[:25])\n",
241 |     "            # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n",
242 |     "            # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n",
243 |     "            valid_stacked = numpy.vstack(\n",
244 |     "                [numpy.vstack([\n",
245 |     "                        valid_x[i*5:(i+1)*5],\n",
246 |     "                        valid_recons[i*5:(i+1)*5]\n",
247 |     "                    ])\n",
248 |     "                 for i in range(5)]\n",
249 |     "            )\n",
250 |     "            valid_image = pil_img.fromarray(\n",
251 |     "                # helper from utils.py\n",
252 |     "                tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n",
253 |     "            )\n",
254 |     "            valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n",
255 |     "\n",
256 |     "            # Now do the same for test, but don't add any noise\n",
257 |     "            # This means set the noise switches to 0. (False)\n",
258 |     "            noise_switch.set_value(0.)\n",
259 |     "\n",
260 |     "            test_recons = f_test(test_x[:25])\n",
261 |     "            test_stacked = numpy.vstack(\n",
262 |     "                [numpy.vstack([\n",
263 |     "                        test_x[i*5:(i+1)*5],\n",
264 |     "                        test_recons[i*5:(i+1)*5]\n",
265 |     "                    ])\n",
266 |     "                 for i in range(5)]\n",
267 |     "            )\n",
268 |     "            test_image = pil_img.fromarray(\n",
269 |     "                tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n",
270 |     "            )\n",
271 |     "            test_image.save(\"dae_test_%d.png\"%(epoch+1))\n",
272 |     "except KeyboardInterrupt:\n",
273 |     "    pass\n",
274 |     "        \n",
275 |     "# Let's finally save an image of the filters the DAE learned - this is simply the transpose of the weights!\n",
276 |     "weight_filters = pil_img.fromarray(\n",
277 |     "    tile_raster_images(\n",
278 |     "        W_x.get_value(borrow=True).T,\n",
279 |     "        img_shape=(28, 28),\n",
280 |     "        tile_shape=(25, 40),\n",
281 |     "        tile_spacing=(1, 1)\n",
282 |     "    )\n",
283 |     ")\n",
284 |     "print \"Saving filters...\"\n",
285 |     "weight_filters.save(\"dae_filters.png\")\n",
286 |     "print \"Done!\"\n",
287 |     "        "
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {
294 |     "collapsed": true
295 |    },
296 |    "outputs": [],
297 |    "source": []
298 |   }
299 |  ],
300 |  "metadata": {
301 |   "kernelspec": {
302 |    "display_name": "Python 2",
303 |    "language": "python",
304 |    "name": "python2"
305 |   },
306 |   "language_info": {
307 |    "codemirror_mode": {
308 |     "name": "ipython",
309 |     "version": 2
310 |    },
311 |    "file_extension": ".py",
312 |    "mimetype": "text/x-python",
313 |    "name": "python",
314 |    "nbconvert_exporter": "python",
315 |    "pygments_lexer": "ipython2",
316 |    "version": "2.7.6"
317 |   }
318 |  },
319 |  "nbformat": 4,
320 |  "nbformat_minor": 0
321 | }
322 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/MLP_opendeep-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# imports and logger!\n",
 12 |     "from opendeep.log import config_root_logger\n",
 13 |     "from opendeep.models import Prototype, Dense, SoftmaxLayer\n",
 14 |     "from opendeep.optimization import SGD\n",
 15 |     "from opendeep.data import MNIST\n",
 16 |     "from opendeep.monitor import Monitor, FileService\n",
 17 |     "\n",
 18 |     "config_root_logger()"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "# Create the MLP with two hidden layers and one classification layer\n",
 30 |     "mlp = Prototype()\n",
 31 |     "mlp.add(\n",
 32 |     "    Dense(input_size=28*28, output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n",
 33 |     ")\n",
 34 |     "mlp.add(\n",
 35 |     "    Dense(output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n",
 36 |     ")\n",
 37 |     "mlp.add(\n",
 38 |     "    SoftmaxLayer(output_size=10)\n",
 39 |     ")"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Create the MNIST data object\n",
 51 |     "mnist = MNIST(concat_train_valid=True)\n",
 52 |     "\n",
 53 |     "# Create the optimizer object\n",
 54 |     "optimizer = SGD(model=mlp, \n",
 55 |     "                dataset=mnist, \n",
 56 |     "                epochs=100, \n",
 57 |     "                batch_size=500, \n",
 58 |     "                learning_rate=.01, \n",
 59 |     "                momentum=.9,\n",
 60 |     "                nesterov_momentum=True)\n",
 61 |     "\n",
 62 |     "# Make a monitor to watch the train and test prediction errors\n",
 63 |     "errorMonitor = Monitor('error', mlp.get_monitors()['softmax_error'], train=True, test=True)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Train the model with the optimizer!\n",
 75 |     "optimizer.train(monitor_channels=[errorMonitor])"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {
 82 |     "collapsed": false
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# Make some predictions on test data!\n",
 87 |     "test_data, test_labels = mnist.test_inputs, mnist.test_targets\n",
 88 |     "\n",
 89 |     "n=30\n",
 90 |     "predictions = mlp.run(test_data)\n",
 91 |     "labels = test_labels.astype('int32')\n",
 92 |     "\n",
 93 |     "print \"Predictions:\", predictions[:n]\n",
 94 |     "print \"Correct:    \", labels[:n]\n",
 95 |     "print \"Accuracy:   \", sum((predictions==labels) * 1./len(labels))*100, \"%\""
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": []
106 |   }
107 |  ],
108 |  "metadata": {
109 |   "kernelspec": {
110 |    "display_name": "Python 2",
111 |    "language": "python",
112 |    "name": "python2"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 2
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython2",
124 |    "version": "2.7.6"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 0
129 | }
130 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/MLP_theano-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from IPython.display import Image\n",
 12 |     "import pickle\n",
 13 |     "\n",
 14 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
 15 |     "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n",
 16 |     "print \"Shapes:\"\n",
 17 |     "print train_x.shape, train_y.shape\n",
 18 |     "print valid_x.shape, valid_y.shape\n",
 19 |     "print test_x.shape, test_y.shape\n",
 20 |     "\n",
 21 |     "print \"--------------\"\n",
 22 |     "print \"Example input:\"\n",
 23 |     "print train_x[0]\n",
 24 |     "print \"Example label:\"\n",
 25 |     "print train_y[0]\n"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n",
 37 |     "from utils import tile_raster_images\n",
 38 |     "from PIL import Image as pil_img\n",
 39 |     "\n",
 40 |     "input_images = train_x[:25]\n",
 41 |     "im = pil_img.fromarray(\n",
 42 |     "    tile_raster_images(input_images, \n",
 43 |     "                       img_shape=(28, 28), \n",
 44 |     "                       tile_shape=(1, 25),\n",
 45 |     "                       tile_spacing=(1, 1))\n",
 46 |     ")\n",
 47 |     "im.save(\"some_mnist_numbers.png\")\n",
 48 |     "Image(filename=\"some_mnist_numbers.png\")\n"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {
 55 |     "collapsed": false
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# Your basic Theano imports.\n",
 60 |     "import theano\n",
 61 |     "import theano.tensor as T\n",
 62 |     "\n",
 63 |     "x = T.matrix('x')\n"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Compute the hidden layer from the input\n",
 75 |     "import numpy\n",
 76 |     "import numpy.random as rng\n",
 77 |     "\n",
 78 |     "i = numpy.sqrt(6. / (784+500))\n",
 79 |     "# W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n",
 80 |     "W_x = numpy.asarray(rng.uniform(low=-i, high=i, size=(28*28, 500)), dtype=theano.config.floatX)\n",
 81 |     "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n",
 82 |     "\n",
 83 |     "W_x = theano.shared(W_x, name=\"W_x\")\n",
 84 |     "b_h = theano.shared(b_h, name=\"b_h\")\n",
 85 |     "\n",
 86 |     "h = T.tanh(\n",
 87 |     "    T.dot(x, W_x) + b_h\n",
 88 |     ")\n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": false
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Compute the output class probabilities from the hidden layer\n",
100 |     "i = numpy.sqrt(6. / (510))\n",
101 |     "# W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n",
102 |     "W_h = numpy.asarray(rng.uniform(low=-i, high=i, size=(500, 10)), dtype=theano.config.floatX)\n",
103 |     "b_y = numpy.zeros(shape=(10,), dtype=\"float32\")\n",
104 |     "\n",
105 |     "W_h = theano.shared(W_h, name=\"W_h\")\n",
106 |     "b_y = theano.shared(b_y, name=\"b_y\")\n",
107 |     "\n",
108 |     "y = T.nnet.softmax(\n",
109 |     "    T.dot(h, W_h) + b_y\n",
110 |     ")\n",
111 |     "\n",
112 |     "# The actual predicted label\n",
113 |     "y_hat = T.argmax(y, axis=1)\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": false
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "# Find cost compared to correct labels\n",
125 |     "correct_labels = T.ivector(\"labels\")\n",
126 |     "\n",
127 |     "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n",
128 |     "cost = -T.mean(log_likelihood)\n"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "# Compute gradient updates for the parameters\n",
140 |     "parameters = [W_x, b_h, W_h, b_y]\n",
141 |     "gradients = T.grad(cost, parameters)\n",
142 |     "\n",
143 |     "learning_rate = 0.01\n",
144 |     "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "# Compile function for training (changes parameters via updates) and testing (no updates)\n",
156 |     "f_train = theano.function(\n",
157 |     "    inputs=[x, correct_labels], \n",
158 |     "    outputs=cost, \n",
159 |     "    updates=train_updates, \n",
160 |     "    allow_input_downcast=True\n",
161 |     ")\n",
162 |     "\n",
163 |     "f_test = theano.function(\n",
164 |     "    inputs=[x], \n",
165 |     "    outputs=y_hat, \n",
166 |     "    allow_input_downcast=True\n",
167 |     ")\n"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "# Main training loop\n",
179 |     "batch_size = 100\n",
180 |     "epochs = 300\n",
181 |     "check_frequency = 3\n",
182 |     "\n",
183 |     "train_batches = len(train_x) / batch_size\n",
184 |     "valid_batches = len(valid_x) / batch_size\n",
185 |     "test_batches = len(test_x) / batch_size\n",
186 |     "\n",
187 |     "for epoch in range(epochs):\n",
188 |     "    print epoch+1, \":\",\n",
189 |     "    \n",
190 |     "    train_costs = []\n",
191 |     "    train_accuracy = []\n",
192 |     "    for i in range(train_batches):\n",
193 |     "        batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
194 |     "        batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n",
195 |     "\n",
196 |     "        costs = f_train(batch_x, batch_labels)\n",
197 |     "        preds = f_test(batch_x)\n",
198 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
199 |     "        \n",
200 |     "        train_costs.append(costs)\n",
201 |     "        train_accuracy.append(acc)\n",
202 |     "    print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n",
203 |     "    \n",
204 |     "    valid_accuracy = []\n",
205 |     "    for i in range(valid_batches):\n",
206 |     "        batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n",
207 |     "        batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n",
208 |     "        \n",
209 |     "        preds = f_test(batch_x)\n",
210 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
211 |     "        \n",
212 |     "        valid_accuracy.append(acc)\n",
213 |     "    print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n",
214 |     "    \n",
215 |     "    test_accuracy = []\n",
216 |     "    for i in range(test_batches):\n",
217 |     "        batch_x = test_x[i*batch_size:(i+1)*batch_size]\n",
218 |     "        batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n",
219 |     "        \n",
220 |     "        preds = f_test(batch_x)\n",
221 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
222 |     "        \n",
223 |     "        test_accuracy.append(acc)\n",
224 |     "    print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\"\n",
225 |     "    \n",
226 |     "    if (epoch+1) % check_frequency == 0:\n",
227 |     "        print 'saving filters...'\n",
228 |     "        weight_filters = pil_img.fromarray(\n",
229 |     "                tile_raster_images(\n",
230 |     "                    W_x.get_value(borrow=True).T,\n",
231 |     "                    img_shape=(28, 28),\n",
232 |     "                    tile_shape=(20, 25),\n",
233 |     "                    tile_spacing=(1, 1)\n",
234 |     "                )\n",
235 |     "            )\n",
236 |     "        weight_filters.save(\"mlp_filters_%d.png\"%(epoch+1))"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {
243 |     "collapsed": true
244 |    },
245 |    "outputs": [],
246 |    "source": []
247 |   }
248 |  ],
249 |  "metadata": {
250 |   "kernelspec": {
251 |    "display_name": "Python 2",
252 |    "language": "python",
253 |    "name": "python2"
254 |   },
255 |   "language_info": {
256 |    "codemirror_mode": {
257 |     "name": "ipython",
258 |     "version": 2
259 |    },
260 |    "file_extension": ".py",
261 |    "mimetype": "text/x-python",
262 |    "name": "python",
263 |    "nbconvert_exporter": "python",
264 |    "pygments_lexer": "ipython2",
265 |    "version": "2.7.6"
266 |   }
267 |  },
268 |  "nbformat": 4,
269 |  "nbformat_minor": 0
270 | }
271 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/MLP_theano_with_comments-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# This tutorial covers your simplest neural network: a multilayer perceptron (MLP)\n",
 12 |     "# Also known as feedforward neural network.\n",
 13 |     "# We will learn to classify MNIST handwritten digit images into their correct label (0-9).\n",
 14 |     "\n",
 15 |     "from IPython.display import Image\n",
 16 |     "# First, let's load our data and take a look!\n",
 17 |     "import pickle\n",
 18 |     "\n",
 19 |     "# Load our data \n",
 20 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
 21 |     "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n",
 22 |     "print \"Shapes:\"\n",
 23 |     "print train_x.shape, train_y.shape\n",
 24 |     "print valid_x.shape, valid_y.shape\n",
 25 |     "print test_x.shape, test_y.shape\n",
 26 |     "\n",
 27 |     "print \"--------------\"\n",
 28 |     "print \"Example input:\"\n",
 29 |     "print train_x[0]\n",
 30 |     "print \"Example label:\"\n",
 31 |     "print train_y[0]\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n",
 43 |     "from utils import tile_raster_images\n",
 44 |     "from PIL import Image as pil_img\n",
 45 |     "\n",
 46 |     "input_images = train_x[:25]\n",
 47 |     "im = pil_img.fromarray(\n",
 48 |     "    tile_raster_images(input_images, \n",
 49 |     "                       img_shape=(28, 28), \n",
 50 |     "                       tile_shape=(1, 25),\n",
 51 |     "                       tile_spacing=(1, 1))\n",
 52 |     ")\n",
 53 |     "im.save(\"some_mnist_numbers.png\")\n",
 54 |     "Image(filename=\"some_mnist_numbers.png\")\n"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# Cool, now we know a little about the input data, let's design the MLP to work with it!\n",
 66 |     "# An MLP looks like this: input -> hiddens -> output classification\n",
 67 |     "# Each stage is just a matrix multiplication with a nonlinear function applied after.\n",
 68 |     "\n",
 69 |     "# Your basic Theano imports.\n",
 70 |     "import theano\n",
 71 |     "import theano.tensor as T\n",
 72 |     "\n",
 73 |     "# Inputs are matrices where rows are examples and columns are pixels - so create a symbolic Theano matrix.\n",
 74 |     "x = T.matrix('x')\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# Now let's start building the equation for our MLP!\n",
 86 |     "\n",
 87 |     "# The first transformation is the input x -> hidden layer h.\n",
 88 |     "# We defined this transformation with h = tanh(x.dot(W_x) + b_h)\n",
 89 |     "# where the learnable model parameters are W_x and b_h.\n",
 90 |     "\n",
 91 |     "# Therefore, we will need a weights matrix W_x and a bias vector b_h.\n",
 92 |     "# W_x has shape (input_size, hidden_size) and b_h has shape (hidden_size,).\n",
 93 |     "# Initialization is important in deep learning; we want something random so the model doesn't get stuck early.\n",
 94 |     "# Many papers in this subject, but for now we will just use a normal distribution with mean=0 and std=0.05.\n",
 95 |     "# Another good option for tanh layers is to use a uniform distribution with interval +- sqrt(6/sum(shape)).\n",
 96 |     "# These are hyperparameters to play with.\n",
 97 |     "# Bias starting as zero is fine.\n",
 98 |     "import numpy.random as rng\n",
 99 |     "W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n",
100 |     "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n",
101 |     "\n",
102 |     "# To update a variable used in an equation (for example, while learning), \n",
103 |     "# Theano needs it to be in a special wrapper called a shared variable.\n",
104 |     "# These are the model parameters for our first hidden layer!\n",
105 |     "W_x = theano.shared(W_x, name=\"W_x\")\n",
106 |     "b_h = theano.shared(b_h, name=\"b_h\")\n",
107 |     "\n",
108 |     "# Now, we can finally write the equation to give our symbolic hidden layer h!\n",
109 |     "h = T.tanh(\n",
110 |     "    T.dot(x, W_x) + b_h\n",
111 |     ")\n",
112 |     "\n",
113 |     "# Side note - if we used softmax instead of tanh for the activation, this would be performing logistic regression!\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": false
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "# We have the hidden layer h, let's put that softmax layer on top for classification output y!\n",
125 |     "\n",
126 |     "# Same deal as before, the transformation is defined as:\n",
127 |     "# y = softmax(h.dot(W_h) + b_y)\n",
128 |     "# where the learnable parameters are W_h and b_y.\n",
129 |     "# W_h has shape (hidden_size, output_size) and b_y has shape (output_size,).\n",
130 |     "\n",
131 |     "# We will use the same random initialization strategy as before.\n",
132 |     "W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n",
133 |     "b_y = numpy.zeros(shape=(10,), dtype=theano.config.floatX)\n",
134 |     "# Don't forget to make them shared variables!\n",
135 |     "W_h = theano.shared(W_h, name=\"W_h\")\n",
136 |     "b_y = theano.shared(b_y, name=\"b_y\")\n",
137 |     "\n",
138 |     "# Now write the equation for the output!\n",
139 |     "y = T.nnet.softmax(\n",
140 |     "    T.dot(h, W_h) + b_y\n",
141 |     ")\n",
142 |     "\n",
143 |     "# The output (due to softmax) is a vector of class probabilities.\n",
144 |     "# To get the output class 'guess' from the model, just take the index of the highest probability!\n",
145 |     "y_hat = T.argmax(y, axis=1)\n",
146 |     "\n",
147 |     "# That's everything! Just four model parameters and one input variable.\n"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {
154 |     "collapsed": false
155 |    },
156 |    "outputs": [],
157 |    "source": [
158 |     "# The variable y_hat represents the output of running our model, but we need a cost function to use for training.\n",
159 |     "# For a softmax (probability) output, we want to maximize the likelihood of P(Y=y|X).\n",
160 |     "# This means we want to minimize the negative log-likelihood cost! (For a primer, see machine learning Coursera.)\n",
161 |     "\n",
162 |     "# Cost functions always need the truth outputs to compare against (this is supervised learning).\n",
163 |     "# From before, we saw the labels were a vector of ints - so let's make a symbolic variable for this!\n",
164 |     "correct_labels = T.ivector(\"labels\")  # integer vector\n",
165 |     "\n",
166 |     "# Now we can compare our output probability from y with the true labels.\n",
167 |     "# Because the labels are integers, we will want to make an indexing mask to pick out the probabilities\n",
168 |     "# our model thought was the likelihood of the correct label.\n",
169 |     "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n",
170 |     "# We use mean instead of sum to be less dependent on batch size (better for flexibility)\n",
171 |     "cost = -T.mean(log_likelihood)\n"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {
178 |     "collapsed": false
179 |    },
180 |    "outputs": [],
181 |    "source": [
182 |     "# Easiest way to train neural nets is with Stochastic Gradient Descent\n",
183 |     "# This takes each example, calculates the gradient, and changes the model parameters a small amount\n",
184 |     "# in the direction of the gradient.\n",
185 |     "\n",
186 |     "# Fancier add-ons to stochastic gradient descent will reduce the learning rate over time, add a momentum\n",
187 |     "# factor to the parameters, etc.\n",
188 |     "\n",
189 |     "# Before we can start training, we need to know what the gradients are.\n",
190 |     "# Luckily we don't have to do any math! Theano has symbolic auto-differentiation which means it can\n",
191 |     "# calculate the gradients for arbitrary equations with respect to a cost and parameters.\n",
192 |     "parameters = [W_x, b_h, W_h, b_y]\n",
193 |     "gradients = T.grad(cost, parameters)\n",
194 |     "# Now gradients contains the list of derivatives: [d_cost/d_W_x, d_cost/d_b_h, d_cost/d_W_h, d_cost/d_b_y]\n",
195 |     "\n",
196 |     "# One last thing we need to do before training is to use these gradients to update the parameters!\n",
197 |     "# Remember how parameters are shared variables? Well, Theano uses something called updates\n",
198 |     "# which are just pairs of (shared_variable, new_variable_expression) to change its value.\n",
199 |     "# So, let's create these updates to show how we change the parameter values during training with gradients!\n",
200 |     "# We use a learning rate to make small steps over time.\n",
201 |     "learning_rate = 0.01\n",
202 |     "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {
209 |     "collapsed": false
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "# Now we can create a Theano function that takes in real inputs and trains our model.\n",
214 |     "f_train = theano.function(inputs=[x, correct_labels], outputs=cost, updates=train_updates, allow_input_downcast=True)\n",
215 |     "\n",
216 |     "# For testing purposes, we don't want to use updates to change the parameters - so create a separate function!\n",
217 |     "# We also care more about the output guesses, so let's return those instead of the cost.\n",
218 |     "# error = sum(T.neq(y_hat, correct_labels))/float(y_hat.shape[0])\n",
219 |     "f_test = theano.function(inputs=[x], outputs=y_hat, allow_input_downcast=True)\n"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": false
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "# Our training can begin!\n",
231 |     "# The two hyperparameters we have for this part are minibatch size (how many examples to process in parallel)\n",
232 |     "# and the total number of passes over all examples (epochs).\n",
233 |     "batch_size = 100\n",
234 |     "epochs = 30\n",
235 |     "\n",
236 |     "# Given our batch size, compute how many batches we can fit into each data set\n",
237 |     "train_batches = len(train_x) / batch_size\n",
238 |     "valid_batches = len(valid_x) / batch_size\n",
239 |     "test_batches = len(test_x) / batch_size\n",
240 |     "\n",
241 |     "# Our main training loop!\n",
242 |     "for epoch in range(epochs):\n",
243 |     "    print epoch+1, \":\",\n",
244 |     "    \n",
245 |     "    train_costs = []\n",
246 |     "    train_accuracy = []\n",
247 |     "    for i in range(train_batches):\n",
248 |     "        # Grab our minibatch of examples from the whole train set.\n",
249 |     "        batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
250 |     "        batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n",
251 |     "        # Compute the costs from the train function (which also updates the parameters)\n",
252 |     "        costs = f_train(batch_x, batch_labels)\n",
253 |     "        # Compute the predictions from the test function (which does not update parameters)\n",
254 |     "        preds = f_test(batch_x)\n",
255 |     "        # Compute the accuracy of our predictions against the correct batch labels\n",
256 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
257 |     "        \n",
258 |     "        train_costs.append(costs)\n",
259 |     "        train_accuracy.append(acc)\n",
260 |     "    # Show the mean cost and accuracy across minibatches (the entire train set!)\n",
261 |     "    print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n",
262 |     "    \n",
263 |     "    valid_accuracy = []\n",
264 |     "    for i in range(valid_batches):\n",
265 |     "        batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n",
266 |     "        batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n",
267 |     "        \n",
268 |     "        preds = f_test(batch_x)\n",
269 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
270 |     "        \n",
271 |     "        valid_accuracy.append(acc)\n",
272 |     "    print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n",
273 |     "    \n",
274 |     "    test_accuracy = []\n",
275 |     "    for i in range(test_batches):\n",
276 |     "        batch_x = test_x[i*batch_size:(i+1)*batch_size]\n",
277 |     "        batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n",
278 |     "        \n",
279 |     "        preds = f_test(batch_x)\n",
280 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
281 |     "        \n",
282 |     "        test_accuracy.append(acc)\n",
283 |     "    print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\""
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {
290 |     "collapsed": true
291 |    },
292 |    "outputs": [],
293 |    "source": []
294 |   }
295 |  ],
296 |  "metadata": {
297 |   "kernelspec": {
298 |    "display_name": "Python 2",
299 |    "language": "python",
300 |    "name": "python2"
301 |   },
302 |   "language_info": {
303 |    "codemirror_mode": {
304 |     "name": "ipython",
305 |     "version": 2
306 |    },
307 |    "file_extension": ".py",
308 |    "mimetype": "text/x-python",
309 |    "name": "python",
310 |    "nbconvert_exporter": "python",
311 |    "pygments_lexer": "ipython2",
312 |    "version": "2.7.6"
313 |   }
314 |  },
315 |  "nbformat": 4,
316 |  "nbformat_minor": 0
317 | }
318 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/RNN-GSN_opendeep-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Imports!\n",
 12 |     "# standard libraries\n",
 13 |     "import logging\n",
 14 |     "import math\n",
 15 |     "# third party\n",
 16 |     "import theano\n",
 17 |     "# internal references\n",
 18 |     "from opendeep.data import MNIST\n",
 19 |     "from opendeep.log import config_root_logger\n",
 20 |     "from opendeep.models import Model, RNN, GSN\n",
 21 |     "from opendeep.optimization import RMSProp\n",
 22 |     "\n",
 23 |     "config_root_logger()\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "# Let's define a new model combining the RNN and GSNs.\n",
 35 |     "class RNN_GSN(Model):\n",
 36 |     "    def __init__(self):\n",
 37 |     "        super(RNN_GSN, self).__init__()\n",
 38 |     "\n",
 39 |     "        gsn_hiddens = 500\n",
 40 |     "        gsn_layers = 2\n",
 41 |     "\n",
 42 |     "        # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)\n",
 43 |     "        self.rnn = RNN(\n",
 44 |     "            input_size=28 * 28,\n",
 45 |     "            hidden_size=100,\n",
 46 |     "            # needs to output hidden units for odd layers of GSN\n",
 47 |     "            output_size=gsn_hiddens * (math.ceil(gsn_layers/2.)),\n",
 48 |     "            layers=1,\n",
 49 |     "            activation='tanh',\n",
 50 |     "            hidden_activation='relu',\n",
 51 |     "            weights_init='uniform', weights_interval='montreal',\n",
 52 |     "            r_weights_init='identity'\n",
 53 |     "        )\n",
 54 |     "\n",
 55 |     "        # Create the GSN that will encode the input space\n",
 56 |     "        gsn = GSN(\n",
 57 |     "            input_size=28 * 28,\n",
 58 |     "            hidden_size=gsn_hiddens,\n",
 59 |     "            layers=gsn_layers,\n",
 60 |     "            walkbacks=4,\n",
 61 |     "            visible_activation='sigmoid',\n",
 62 |     "            hidden_activation='tanh',\n",
 63 |     "            image_height=28,\n",
 64 |     "            image_width=28\n",
 65 |     "        )\n",
 66 |     "        # grab the input arguments\n",
 67 |     "        gsn_args = gsn.args.copy()\n",
 68 |     "        # grab the parameters it initialized\n",
 69 |     "        gsn_params = gsn.get_params()\n",
 70 |     "\n",
 71 |     "        # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)\n",
 72 |     "        # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens\n",
 73 |     "        def step(hiddens, x):\n",
 74 |     "            gsn = GSN(\n",
 75 |     "                inputs_hook=(28*28, x),\n",
 76 |     "                hiddens_hook=(gsn_hiddens, hiddens),\n",
 77 |     "                params_hook=(gsn_params),\n",
 78 |     "                **gsn_args\n",
 79 |     "            )\n",
 80 |     "            # return the reconstruction and cost!\n",
 81 |     "            return gsn.get_outputs(), gsn.get_train_cost()\n",
 82 |     "\n",
 83 |     "        (outputs, costs), scan_updates = theano.scan(\n",
 84 |     "            fn=lambda h, x: step(h, x),\n",
 85 |     "            sequences=[self.rnn.output, self.rnn.input],\n",
 86 |     "            outputs_info=[None, None]\n",
 87 |     "        )\n",
 88 |     "\n",
 89 |     "        self.outputs = outputs\n",
 90 |     "\n",
 91 |     "        self.updates = dict()\n",
 92 |     "        self.updates.update(self.rnn.get_updates())\n",
 93 |     "        self.updates.update(scan_updates)\n",
 94 |     "\n",
 95 |     "        self.cost = costs.sum()\n",
 96 |     "        self.params = gsn_params + self.rnn.get_params()\n",
 97 |     "\n",
 98 |     "    # Model functions necessary for training\n",
 99 |     "    def get_inputs(self):\n",
100 |     "        return self.rnn.get_inputs()\n",
101 |     "    def get_params(self):\n",
102 |     "        return self.params\n",
103 |     "    def get_train_cost(self):\n",
104 |     "        return self.cost\n",
105 |     "    def get_updates(self):\n",
106 |     "        return self.updates\n",
107 |     "    def get_outputs(self):\n",
108 |     "        return self.outputs\n",
109 |     "    "
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {
116 |     "collapsed": false
117 |    },
118 |    "outputs": [],
119 |    "source": [
120 |     "# Now we can instantiate and train the model!\n",
121 |     "rnn_gsn = RNN_GSN()\n",
122 |     "\n",
123 |     "# data! (needs to be 3d for rnn).\n",
124 |     "mnist = MNIST(sequence_number=1, seq_3d=True, seq_length=30)\n",
125 |     "\n",
126 |     "# optimizer!\n",
127 |     "optimizer = RMSProp(\n",
128 |     "    model=rnn_gsn,\n",
129 |     "    dataset=mnist,\n",
130 |     "    epochs=500,\n",
131 |     "    batch_size=50,\n",
132 |     "    save_freq=10,\n",
133 |     "    stop_patience=30,\n",
134 |     "    stop_threshold=.9995,\n",
135 |     "    learning_rate=1e-6,\n",
136 |     "    decay=.95,\n",
137 |     "    max_scaling=1e5,\n",
138 |     "    grad_clip=5.,\n",
139 |     "    hard_clip=False\n",
140 |     ")\n",
141 |     "# train!\n",
142 |     "optimizer.train()\n"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": true
150 |    },
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 2",
158 |    "language": "python",
159 |    "name": "python2"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 2
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython2",
171 |    "version": "2.7.6"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 0
176 | }
177 | 


--------------------------------------------------------------------------------
/DAE_opendeep.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# imports and logger!\n",
 12 |     "from opendeep.log import config_root_logger\n",
 13 |     "from opendeep.models import GSN\n",
 14 |     "from opendeep.optimization import SGD\n",
 15 |     "from opendeep.data import MNIST\n",
 16 |     "from opendeep.utils.misc import closest_to_square_factors\n",
 17 |     "from PIL import Image as pil_img\n",
 18 |     "from opendeep.utils.image import tile_raster_images\n",
 19 |     "import numpy\n",
 20 |     "\n",
 21 |     "config_root_logger()"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# A denoising autoencoder (DAE) is a special case of a generative stochastic network (GSN).\n",
 33 |     "# GSN's can have multiple denoising layers that interact with each other both above and below.\n",
 34 |     "dae = GSN(\n",
 35 |     "    input_size=28*28,\n",
 36 |     "    hidden_size=1000,\n",
 37 |     "    visible_activation='sigmoid',\n",
 38 |     "    hidden_activation='tanh',\n",
 39 |     "    layers=1,\n",
 40 |     "    walkbacks=3,\n",
 41 |     "    input_noise='salt_and_pepper',\n",
 42 |     "    input_noise_level=0.3\n",
 43 |     ")\n"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# Create the MNIST data object\n",
 55 |     "mnist = MNIST(concat_train_valid=True)\n",
 56 |     "\n",
 57 |     "# Create the optimizer object\n",
 58 |     "optimizer = SGD(dataset=mnist,\n",
 59 |     "                epochs=40, \n",
 60 |     "                batch_size=100, \n",
 61 |     "                learning_rate=.25,\n",
 62 |     "                lr_decay='exponential',\n",
 63 |     "                lr_decay_factor=.995,\n",
 64 |     "                momentum=.5,\n",
 65 |     "                nesterov_momentum=False)\n"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "# Train the model with the optimizer on the mnist dataset!\n",
 77 |     "dae.train(optimizer)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "# Run some numbers to see the output\n",
 89 |     "n_examples = 100\n",
 90 |     "xs_test = mnist.test_inputs[:n_examples]\n",
 91 |     "noisy_xs_test = dae.f_noise(xs_test)\n",
 92 |     "reconstructed = dae.run(noisy_xs_test)\n",
 93 |     "# Concatenate stuff\n",
 94 |     "stacked = numpy.vstack(\n",
 95 |     "    [numpy.vstack([xs_test[i * 10: (i + 1) * 10],\n",
 96 |     "                   noisy_xs_test[i * 10: (i + 1) * 10],\n",
 97 |     "                   reconstructed[i * 10: (i + 1) * 10]])\n",
 98 |     "     for i in range(10)])\n",
 99 |     "number_reconstruction = pil_img.fromarray(\n",
100 |     "    tile_raster_images(stacked, (dae.image_height, dae.image_width), (10, 30), (1, 1))\n",
101 |     ")\n",
102 |     "\n",
103 |     "number_reconstruction.save(\"dae_opendeep_test.png\")\n",
104 |     "\n",
105 |     "# Construct image from the weight matrix\n",
106 |     "image = pil_img.fromarray(\n",
107 |     "    tile_raster_images(\n",
108 |     "        X=dae.weights_list[0].get_value(borrow=True).T,\n",
109 |     "        img_shape=(28, 28),\n",
110 |     "        tile_shape=closest_to_square_factors(dae.layer_sizes[1]),\n",
111 |     "        tile_spacing=(1, 1)\n",
112 |     "    )\n",
113 |     ")\n",
114 |     "image.save(\"dae_opendeep_filters.png\")\n",
115 |     "print \"Done!\""
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {
122 |     "collapsed": true
123 |    },
124 |    "outputs": [],
125 |    "source": []
126 |   }
127 |  ],
128 |  "metadata": {
129 |   "kernelspec": {
130 |    "display_name": "Python 2",
131 |    "language": "python",
132 |    "name": "python2"
133 |   },
134 |   "language_info": {
135 |    "codemirror_mode": {
136 |     "name": "ipython",
137 |     "version": 2
138 |    },
139 |    "file_extension": ".py",
140 |    "mimetype": "text/x-python",
141 |    "name": "python",
142 |    "nbconvert_exporter": "python",
143 |    "pygments_lexer": "ipython2",
144 |    "version": "2.7.6"
145 |   }
146 |  },
147 |  "nbformat": 4,
148 |  "nbformat_minor": 0
149 | }
150 | 


--------------------------------------------------------------------------------
/DAE_theano.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n",
 12 |     "\n",
 13 |     "# We are working with MNIST again, this time no labels are necessary - \n",
 14 |     "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n",
 15 |     "\n",
 16 |     "# All imports up here this time\n",
 17 |     "import pickle\n",
 18 |     "import numpy\n",
 19 |     "import numpy.random as rng\n",
 20 |     "import theano\n",
 21 |     "import theano.tensor as T\n",
 22 |     "import theano.sandbox.rng_mrg as RNG_MRG\n",
 23 |     "from utils import tile_raster_images\n",
 24 |     "from PIL import Image as pil_img\n",
 25 |     "from IPython.display import Image\n",
 26 |     "\n",
 27 |     "# Load our data \n",
 28 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
 29 |     "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n",
 30 |     "print \"Shapes:\"\n",
 31 |     "print train_x.shape\n",
 32 |     "print valid_x.shape\n",
 33 |     "print test_x.shape"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# We can specify any hyperparameters to play with up here:\n",
 45 |     "input_size = 784  # 28x28 images\n",
 46 |     "hidden_size = 1000\n",
 47 |     "w_mean = 0.0\n",
 48 |     "w_std = 0.05\n",
 49 |     "w_interval = numpy.sqrt(6. / (input_size + hidden_size))\n",
 50 |     "noise = 0.3\n",
 51 |     "walkbacks = 3\n",
 52 |     "learning_rate = 0.25\n",
 53 |     "lr_decay = .985\n",
 54 |     "batch_size = 100\n",
 55 |     "epochs = 200\n",
 56 |     "check_frequency = 10\n",
 57 |     "\n",
 58 |     "# To make the organization better, lets define all the variables and parameters here.\n",
 59 |     "x = T.matrix('x')\n",
 60 |     "# W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n",
 61 |     "W_x = numpy.asarray(rng.uniform(low=-w_interval, high=w_interval, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n",
 62 |     "W_x = theano.shared(W_x, \"W_x\")\n",
 63 |     "\n",
 64 |     "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n",
 65 |     "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n",
 66 |     "b_x = theano.shared(b_x, \"b_x\")\n",
 67 |     "b_h = theano.shared(b_h, \"b_h\")\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Now for the most important part of a denoising autoencoder - making the input noisy!\n",
 79 |     "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n",
 80 |     "# This makes it more effective for test data by reducing overfitting.\n",
 81 |     "noise_switch = theano.shared(1, \"noise_switch\")\n",
 82 |     "\n",
 83 |     "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n",
 84 |     "def salt_and_pepper(variable):\n",
 85 |     "    mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n",
 86 |     "    saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n",
 87 |     "    ones = T.eq(mask, 0) * saltpepper\n",
 88 |     "    noisy = variable*mask + ones\n",
 89 |     "    return T.switch(noise_switch,\n",
 90 |     "                    noisy,\n",
 91 |     "                    variable)\n"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "# Now we are ready to create the computation graph!\n",
103 |     "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n",
104 |     "\n",
105 |     "inputs=[x]\n",
106 |     "for walkback in range(walkbacks):\n",
107 |     "    noisy_x = salt_and_pepper(inputs[-1])\n",
108 |     "\n",
109 |     "    h = T.tanh(\n",
110 |     "        T.dot(noisy_x, W_x) + b_h\n",
111 |     "    )\n",
112 |     "\n",
113 |     "    reconstruction = T.nnet.sigmoid(\n",
114 |     "        T.dot(h, W_x.T) + b_x\n",
115 |     "    )\n",
116 |     "\n",
117 |     "    inputs.append(reconstruction)\n",
118 |     "    \n",
119 |     "reconstructions = inputs[1:]\n"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {
126 |     "collapsed": false
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n",
131 |     "output = reconstructions[-1]\n",
132 |     "\n",
133 |     "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n",
134 |     "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "parameters = [W_x, b_h, b_x]\n",
146 |     "gradients = T.grad(cost, parameters)\n",
147 |     "\n",
148 |     "lr = theano.shared(numpy.asarray(learning_rate, dtype='float32'), 'lr')\n",
149 |     "train_updates = [(param, param - lr*gradient) for param, gradient in zip(parameters, gradients)]\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": false
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "# Compile our training and testing function like before!\n",
161 |     "# Train function updates the parameters and returns the total train cost to monitor.\n",
162 |     "f_train = theano.function(\n",
163 |     "    inputs=[x], \n",
164 |     "    outputs=cost, \n",
165 |     "    updates=train_updates, \n",
166 |     "    allow_input_downcast=True\n",
167 |     ")\n",
168 |     "\n",
169 |     "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n",
170 |     "f_test = theano.function(\n",
171 |     "    inputs=[x], \n",
172 |     "    outputs=output,\n",
173 |     "    allow_input_downcast=True\n",
174 |     ")\n"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "# That's it! Now perform SGD like before.\n",
186 |     "# Main training loop\n",
187 |     "\n",
188 |     "train_batches = len(train_x) / batch_size\n",
189 |     "\n",
190 |     "try:\n",
191 |     "    for epoch in range(epochs):\n",
192 |     "        print epoch+1,\":\",\n",
193 |     "\n",
194 |     "        # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n",
195 |     "        noise_switch.set_value(1.)\n",
196 |     "\n",
197 |     "        train_costs = []\n",
198 |     "        for i in range(train_batches):\n",
199 |     "            batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
200 |     "\n",
201 |     "            costs = f_train(batch_x)\n",
202 |     "\n",
203 |     "            train_costs.append(costs)\n",
204 |     "        print \"cost:\", numpy.mean(train_costs),\n",
205 |     "        \n",
206 |     "        old_lr = lr.get_value()\n",
207 |     "        print \"\\tlearning rate:\", old_lr\n",
208 |     "        new_lr = numpy.asarray(old_lr * lr_decay, dtype='float32')\n",
209 |     "        lr.set_value(new_lr)\n",
210 |     "\n",
211 |     "        if (epoch+1) % check_frequency == 0:\n",
212 |     "            print \"Saving images...\"\n",
213 |     "            train_recons = f_test(train_x[:25])\n",
214 |     "            train_stacked = numpy.vstack(\n",
215 |     "                [numpy.vstack([\n",
216 |     "                        train_x[i*5:(i+1)*5],\n",
217 |     "                        train_recons[i*5:(i+1)*5]\n",
218 |     "                    ])\n",
219 |     "                 for i in range(5)]\n",
220 |     "            )\n",
221 |     "            train_image = pil_img.fromarray(\n",
222 |     "                tile_raster_images(train_stacked, (28, 28), (5, 10), (1, 1))\n",
223 |     "            )\n",
224 |     "            train_image.save(\"dae_train_%d.png\"%(epoch+1))\n",
225 |     "            \n",
226 |     "            # For validation, let's run a few images through and see the reconstruction \n",
227 |     "            # (with the noise from training still added)\n",
228 |     "            valid_recons = f_test(valid_x[:25])\n",
229 |     "            # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n",
230 |     "            # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n",
231 |     "            valid_stacked = numpy.vstack(\n",
232 |     "                [numpy.vstack([\n",
233 |     "                        valid_x[i*5:(i+1)*5],\n",
234 |     "                        valid_recons[i*5:(i+1)*5]\n",
235 |     "                    ])\n",
236 |     "                 for i in range(5)]\n",
237 |     "            )\n",
238 |     "            valid_image = pil_img.fromarray(\n",
239 |     "                # helper from utils.py\n",
240 |     "                tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n",
241 |     "            )\n",
242 |     "            valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n",
243 |     "\n",
244 |     "            # Now do the same for test, but don't add any noise\n",
245 |     "            # This means set the noise switches to 0. (False)\n",
246 |     "            noise_switch.set_value(0.)\n",
247 |     "\n",
248 |     "            test_recons = f_test(test_x[:25])\n",
249 |     "            test_stacked = numpy.vstack(\n",
250 |     "                [numpy.vstack([\n",
251 |     "                        test_x[i*5:(i+1)*5],\n",
252 |     "                        test_recons[i*5:(i+1)*5]\n",
253 |     "                    ])\n",
254 |     "                 for i in range(5)]\n",
255 |     "            )\n",
256 |     "            test_image = pil_img.fromarray(\n",
257 |     "                tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n",
258 |     "            )\n",
259 |     "            test_image.save(\"dae_test_%d.png\"%(epoch+1))\n",
260 |     "            \n",
261 |     "            weight_filters = pil_img.fromarray(\n",
262 |     "                tile_raster_images(\n",
263 |     "                    W_x.get_value(borrow=True).T,\n",
264 |     "                    img_shape=(28, 28),\n",
265 |     "                    tile_shape=(25, 40),\n",
266 |     "                    tile_spacing=(1, 1)\n",
267 |     "                )\n",
268 |     "            )\n",
269 |     "            weight_filters.save(\"dae_filters_%d.png\"%(epoch+1))\n",
270 |     "except KeyboardInterrupt:\n",
271 |     "    pass        "
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {
278 |     "collapsed": true
279 |    },
280 |    "outputs": [],
281 |    "source": []
282 |   }
283 |  ],
284 |  "metadata": {
285 |   "kernelspec": {
286 |    "display_name": "Python 2",
287 |    "language": "python",
288 |    "name": "python2"
289 |   },
290 |   "language_info": {
291 |    "codemirror_mode": {
292 |     "name": "ipython",
293 |     "version": 2
294 |    },
295 |    "file_extension": ".py",
296 |    "mimetype": "text/x-python",
297 |    "name": "python",
298 |    "nbconvert_exporter": "python",
299 |    "pygments_lexer": "ipython2",
300 |    "version": "2.7.6"
301 |   }
302 |  },
303 |  "nbformat": 4,
304 |  "nbformat_minor": 0
305 | }
306 | 


--------------------------------------------------------------------------------
/DAE_theano_with_comments.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n",
 12 |     "\n",
 13 |     "# We are working with MNIST again, this time no labels are necessary - \n",
 14 |     "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n",
 15 |     "\n",
 16 |     "# All imports up here this time\n",
 17 |     "import pickle\n",
 18 |     "import numpy\n",
 19 |     "import numpy.random as rng\n",
 20 |     "import theano\n",
 21 |     "import theano.tensor as T\n",
 22 |     "import theano.sandbox.rng_mrg as RNG_MRG\n",
 23 |     "from utils import tile_raster_images\n",
 24 |     "from PIL import Image as pil_img\n",
 25 |     "from IPython.display import Image\n",
 26 |     "\n",
 27 |     "# Load our data \n",
 28 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
 29 |     "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n",
 30 |     "print \"Shapes:\"\n",
 31 |     "print train_x.shape\n",
 32 |     "print valid_x.shape\n",
 33 |     "print test_x.shape"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# The DAE data flow looks like this: input -> input_(add noise) -> hiddens -> input\n",
 45 |     "# This can be repeated by sampling from the reconstructed input. Repeating like that is\n",
 46 |     "# a pseudo Gibbs sampling process. We can define how many times we want to repeat (known as walkbacks).\n",
 47 |     "\n",
 48 |     "# We can specify any hyperparameters to play with up here:\n",
 49 |     "input_size = 784  # 28x28 images\n",
 50 |     "hidden_size = 1000\n",
 51 |     "w_mean = 0.0\n",
 52 |     "w_std = 0.05\n",
 53 |     "noise = 0.3\n",
 54 |     "walkbacks = 3\n",
 55 |     "learning_rate = 0.1\n",
 56 |     "batch_size = 100\n",
 57 |     "epochs = 100\n",
 58 |     "check_frequency = 10\n",
 59 |     "\n",
 60 |     "# To make the organization better, lets define all the variables and parameters here.\n",
 61 |     "# Just like with the MLP, we need a symbolic matrix to input the images\n",
 62 |     "x = T.matrix('x')\n",
 63 |     "# Next, we need the weights matrix W_x. This will be used to go both from input -> hidden and\n",
 64 |     "# hidden -> input (by using its transpose). This is called tied weights.\n",
 65 |     "# Again, initialization has a lot of literature, but we are just goint to stick with gaussian at the moment.\n",
 66 |     "W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n",
 67 |     "# (Don't forget to make parameters into shared variables so they can be updated!)\n",
 68 |     "W_x = theano.shared(W_x, \"W_x\")\n",
 69 |     "# Because we are outputting back into the input space, we also need a bias vector for both the input\n",
 70 |     "# and hidden layers.\n",
 71 |     "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n",
 72 |     "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n",
 73 |     "b_x = theano.shared(b_x, \"b_x\")\n",
 74 |     "b_h = theano.shared(b_h, \"b_h\")\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# Now for the most important part of a denoising autoencoder - making the input noisy!\n",
 86 |     "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n",
 87 |     "# This makes it more effective for test data by reducing overfitting.\n",
 88 |     "\n",
 89 |     "# We deal with adding noise during training but not testing in Theano with a switch variable!\n",
 90 |     "# Switches can be turned on or off to direct data flow in the computation graph - \n",
 91 |     "# so we turn on for train and off for test!\n",
 92 |     "# You guessed it - we need a shared variable to represent the switch condition so we can change it at runtime.\n",
 93 |     "noise_switch = theano.shared(1, \"noise_switch\")\n",
 94 |     "\n",
 95 |     "# One important thing to note - the type of noise has to correspond to the type of input.\n",
 96 |     "# i.e. we can't add real-value noise when the input is expected to be binary\n",
 97 |     "# So for these binary inputs, we will add salt-and-pepper masking noise!\n",
 98 |     "# This is a function so we can keep adding it during the computation chain when we alternate sampling\n",
 99 |     "# from input and reconstructing from hiddens.\n",
100 |     "# Theano random number generator\n",
101 |     "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n",
102 |     "def salt_and_pepper(variable):\n",
103 |     "    mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n",
104 |     "    saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n",
105 |     "    ones = T.eq(mask, 0) * saltpepper\n",
106 |     "    # Randomly set some bits to 0 or 1 with equal probability.\n",
107 |     "    noisy = variable*mask + ones\n",
108 |     "    return T.switch(noise_switch,\n",
109 |     "                    # true condition\n",
110 |     "                    noisy,\n",
111 |     "                    # false condition\n",
112 |     "                    variable)\n"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "# Now we are ready to create the computation graph!\n",
124 |     "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n",
125 |     "\n",
126 |     "inputs=[x]\n",
127 |     "for walkback in range(walkbacks):\n",
128 |     "    # First, we want to corrupt the input x\n",
129 |     "    noisy_x = salt_and_pepper(inputs[-1])\n",
130 |     "    # Now calculate the hiddens\n",
131 |     "    h = T.tanh(\n",
132 |     "        T.dot(noisy_x, W_x) + b_h\n",
133 |     "    )\n",
134 |     "    # From the hiddens, reconstruct x.\n",
135 |     "    # We have to use an appropriate activation function for the type of inputs!\n",
136 |     "    # In our case with MNIST images, it is binary so sigmoid works.\n",
137 |     "    reconstruction = T.nnet.sigmoid(\n",
138 |     "        T.dot(h, W_x.T) + b_x\n",
139 |     "    )\n",
140 |     "    # That is all for an autoencoder!\n",
141 |     "    inputs.append(reconstruction)\n",
142 |     "    \n",
143 |     "# Remove the original input from our reconstructions list\n",
144 |     "reconstructions = inputs[1:]\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n",
156 |     "output = reconstructions[-1]\n",
157 |     "\n",
158 |     "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n",
159 |     "# Again, because our input space is binary, using mean binary cross-entropy is a good analog for \n",
160 |     "# reconstruction error.\n",
161 |     "# For real-valued inputs, we could use mean square error.\n",
162 |     "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "# Just like with the MLP, compute gradient updates for the parameters to use with training.\n",
174 |     "parameters = [W_x, b_h, b_x]\n",
175 |     "# Automagic differentiation! (Still love it)\n",
176 |     "gradients = T.grad(cost, parameters)\n",
177 |     "\n",
178 |     "# Update the parameters for stochastic gradient descent!\n",
179 |     "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "collapsed": false
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "# Compile our training and testing function like before!\n",
191 |     "# Train function updates the parameters and returns the total train cost to monitor.\n",
192 |     "f_train = theano.function(\n",
193 |     "    inputs=[x], \n",
194 |     "    outputs=cost, \n",
195 |     "    updates=train_updates, \n",
196 |     "    allow_input_downcast=True\n",
197 |     ")\n",
198 |     "\n",
199 |     "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n",
200 |     "f_test = theano.function(\n",
201 |     "    inputs=[x], \n",
202 |     "    outputs=output,\n",
203 |     "    allow_input_downcast=True\n",
204 |     ")\n"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": false
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "# That's it! Now perform SGD like before.\n",
216 |     "# Main training loop\n",
217 |     "\n",
218 |     "train_batches = len(train_x) / batch_size\n",
219 |     "\n",
220 |     "try:\n",
221 |     "    for epoch in range(epochs):\n",
222 |     "        print epoch+1, \":\",\n",
223 |     "\n",
224 |     "        # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n",
225 |     "        noise_switch.set_value(1.)\n",
226 |     "\n",
227 |     "        train_costs = []\n",
228 |     "        for i in range(train_batches):\n",
229 |     "            batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
230 |     "\n",
231 |     "            costs = f_train(batch_x)\n",
232 |     "\n",
233 |     "            train_costs.append(costs)\n",
234 |     "        print \"cost:\", numpy.mean(train_costs)\n",
235 |     "\n",
236 |     "        if (epoch+1) % check_frequency == 0:\n",
237 |     "            print \"Saving images...\"\n",
238 |     "            # For validation, let's run a few images through and see the reconstruction \n",
239 |     "            # (with the noise from training still added)\n",
240 |     "            valid_recons = f_test(valid_x[:25])\n",
241 |     "            # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n",
242 |     "            # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n",
243 |     "            valid_stacked = numpy.vstack(\n",
244 |     "                [numpy.vstack([\n",
245 |     "                        valid_x[i*5:(i+1)*5],\n",
246 |     "                        valid_recons[i*5:(i+1)*5]\n",
247 |     "                    ])\n",
248 |     "                 for i in range(5)]\n",
249 |     "            )\n",
250 |     "            valid_image = pil_img.fromarray(\n",
251 |     "                # helper from utils.py\n",
252 |     "                tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n",
253 |     "            )\n",
254 |     "            valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n",
255 |     "\n",
256 |     "            # Now do the same for test, but don't add any noise\n",
257 |     "            # This means set the noise switches to 0. (False)\n",
258 |     "            noise_switch.set_value(0.)\n",
259 |     "\n",
260 |     "            test_recons = f_test(test_x[:25])\n",
261 |     "            test_stacked = numpy.vstack(\n",
262 |     "                [numpy.vstack([\n",
263 |     "                        test_x[i*5:(i+1)*5],\n",
264 |     "                        test_recons[i*5:(i+1)*5]\n",
265 |     "                    ])\n",
266 |     "                 for i in range(5)]\n",
267 |     "            )\n",
268 |     "            test_image = pil_img.fromarray(\n",
269 |     "                tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n",
270 |     "            )\n",
271 |     "            test_image.save(\"dae_test_%d.png\"%(epoch+1))\n",
272 |     "except KeyboardInterrupt:\n",
273 |     "    pass\n",
274 |     "        \n",
275 |     "# Let's finally save an image of the filters the DAE learned - this is simply the transpose of the weights!\n",
276 |     "weight_filters = pil_img.fromarray(\n",
277 |     "    tile_raster_images(\n",
278 |     "        W_x.get_value(borrow=True).T,\n",
279 |     "        img_shape=(28, 28),\n",
280 |     "        tile_shape=(25, 40),\n",
281 |     "        tile_spacing=(1, 1)\n",
282 |     "    )\n",
283 |     ")\n",
284 |     "print \"Saving filters...\"\n",
285 |     "weight_filters.save(\"dae_filters.png\")\n",
286 |     "print \"Done!\"\n",
287 |     "        "
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {
294 |     "collapsed": true
295 |    },
296 |    "outputs": [],
297 |    "source": []
298 |   }
299 |  ],
300 |  "metadata": {
301 |   "kernelspec": {
302 |    "display_name": "Python 2",
303 |    "language": "python",
304 |    "name": "python2"
305 |   },
306 |   "language_info": {
307 |    "codemirror_mode": {
308 |     "name": "ipython",
309 |     "version": 2
310 |    },
311 |    "file_extension": ".py",
312 |    "mimetype": "text/x-python",
313 |    "name": "python",
314 |    "nbconvert_exporter": "python",
315 |    "pygments_lexer": "ipython2",
316 |    "version": "2.7.6"
317 |   }
318 |  },
319 |  "nbformat": 4,
320 |  "nbformat_minor": 0
321 | }
322 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Markus Beissinger
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/MLP_opendeep.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# imports and logger!\n",
 12 |     "from opendeep.log import config_root_logger\n",
 13 |     "from opendeep.models import Prototype, Dense, SoftmaxLayer\n",
 14 |     "from opendeep.optimization import SGD\n",
 15 |     "from opendeep.data import MNIST\n",
 16 |     "from opendeep.monitor import Monitor, FileService\n",
 17 |     "\n",
 18 |     "config_root_logger()"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "# Create the MLP with two hidden layers and one classification layer\n",
 30 |     "mlp = Prototype()\n",
 31 |     "mlp.add(\n",
 32 |     "    Dense(input_size=28*28, output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n",
 33 |     ")\n",
 34 |     "mlp.add(\n",
 35 |     "    Dense(output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n",
 36 |     ")\n",
 37 |     "mlp.add(\n",
 38 |     "    SoftmaxLayer(output_size=10)\n",
 39 |     ")"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Create the MNIST data object\n",
 51 |     "mnist = MNIST(concat_train_valid=True)\n",
 52 |     "\n",
 53 |     "# Create the optimizer object\n",
 54 |     "optimizer = SGD(model=mlp, \n",
 55 |     "                dataset=mnist, \n",
 56 |     "                epochs=100, \n",
 57 |     "                batch_size=500, \n",
 58 |     "                learning_rate=.01, \n",
 59 |     "                momentum=.9,\n",
 60 |     "                nesterov_momentum=True)\n",
 61 |     "\n",
 62 |     "# Make a monitor to watch the train and test prediction errors\n",
 63 |     "errorMonitor = Monitor('error', mlp.get_monitors()['softmax_error'], train=True, test=True)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Train the model with the optimizer!\n",
 75 |     "optimizer.train(monitor_channels=[errorMonitor])"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {
 82 |     "collapsed": false
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# Make some predictions on test data!\n",
 87 |     "test_data, test_labels = mnist.test_inputs, mnist.test_targets\n",
 88 |     "\n",
 89 |     "n=30\n",
 90 |     "predictions = mlp.run(test_data)\n",
 91 |     "labels = test_labels.astype('int32')\n",
 92 |     "\n",
 93 |     "print \"Predictions:\", predictions[:n]\n",
 94 |     "print \"Correct:    \", labels[:n]\n",
 95 |     "print \"Accuracy:   \", sum((predictions==labels) * 1./len(labels))*100, \"%\""
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": []
106 |   }
107 |  ],
108 |  "metadata": {
109 |   "kernelspec": {
110 |    "display_name": "Python 2",
111 |    "language": "python",
112 |    "name": "python2"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 2
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython2",
124 |    "version": "2.7.6"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 0
129 | }
130 | 


--------------------------------------------------------------------------------
/MLP_theano.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 41,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       " Shapes:\n",
 15 |       "(50000, 784) (50000,)\n",
 16 |       "(10000, 784) (10000,)\n",
 17 |       "(10000, 784) (10000,)\n",
 18 |       "--------------\n",
 19 |       "Example input:\n",
 20 |       "[ 0.          0.          0.          0.          0.          0.          0.\n",
 21 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 22 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 23 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 24 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 25 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 26 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 27 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 28 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 29 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 30 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 31 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 32 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 33 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 34 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 35 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 36 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 37 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 38 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 39 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 40 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 41 |       "  0.          0.          0.          0.          0.          0.01171875\n",
 42 |       "  0.0703125   0.0703125   0.0703125   0.4921875   0.53125     0.68359375\n",
 43 |       "  0.1015625   0.6484375   0.99609375  0.96484375  0.49609375  0.          0.\n",
 44 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 45 |       "  0.          0.          0.          0.1171875   0.140625    0.3671875\n",
 46 |       "  0.6015625   0.6640625   0.98828125  0.98828125  0.98828125  0.98828125\n",
 47 |       "  0.98828125  0.87890625  0.671875    0.98828125  0.9453125   0.76171875\n",
 48 |       "  0.25        0.          0.          0.          0.          0.          0.\n",
 49 |       "  0.          0.          0.          0.          0.          0.19140625\n",
 50 |       "  0.9296875   0.98828125  0.98828125  0.98828125  0.98828125  0.98828125\n",
 51 |       "  0.98828125  0.98828125  0.98828125  0.98046875  0.36328125  0.3203125\n",
 52 |       "  0.3203125   0.21875     0.15234375  0.          0.          0.          0.\n",
 53 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 54 |       "  0.          0.0703125   0.85546875  0.98828125  0.98828125  0.98828125\n",
 55 |       "  0.98828125  0.98828125  0.7734375   0.7109375   0.96484375  0.94140625\n",
 56 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 57 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 58 |       "  0.          0.          0.          0.          0.3125      0.609375\n",
 59 |       "  0.41796875  0.98828125  0.98828125  0.80078125  0.04296875  0.\n",
 60 |       "  0.16796875  0.6015625   0.          0.          0.          0.          0.\n",
 61 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 62 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 63 |       "  0.0546875   0.00390625  0.6015625   0.98828125  0.3515625   0.          0.\n",
 64 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 65 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 66 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 67 |       "  0.          0.          0.54296875  0.98828125  0.7421875   0.0078125   0.\n",
 68 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 69 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 70 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 71 |       "  0.          0.          0.04296875  0.7421875   0.98828125  0.2734375   0.\n",
 72 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 73 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 74 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 75 |       "  0.          0.          0.          0.13671875  0.94140625  0.87890625\n",
 76 |       "  0.625       0.421875    0.00390625  0.          0.          0.          0.\n",
 77 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 78 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 79 |       "  0.          0.          0.          0.          0.          0.31640625\n",
 80 |       "  0.9375      0.98828125  0.98828125  0.46484375  0.09765625  0.          0.\n",
 81 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 82 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 83 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 84 |       "  0.17578125  0.7265625   0.98828125  0.98828125  0.5859375   0.10546875\n",
 85 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 86 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 87 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 88 |       "  0.          0.          0.0625      0.36328125  0.984375    0.98828125\n",
 89 |       "  0.73046875  0.          0.          0.          0.          0.          0.\n",
 90 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 91 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 92 |       "  0.          0.          0.          0.          0.          0.97265625\n",
 93 |       "  0.98828125  0.97265625  0.25        0.          0.          0.          0.\n",
 94 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 95 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 96 |       "  0.          0.          0.          0.1796875   0.5078125   0.71484375\n",
 97 |       "  0.98828125  0.98828125  0.80859375  0.0078125   0.          0.          0.\n",
 98 |       "  0.          0.          0.          0.          0.          0.          0.\n",
 99 |       "  0.          0.          0.          0.          0.          0.          0.\n",
100 |       "  0.          0.          0.15234375  0.578125    0.89453125  0.98828125\n",
101 |       "  0.98828125  0.98828125  0.9765625   0.7109375   0.          0.          0.\n",
102 |       "  0.          0.          0.          0.          0.          0.          0.\n",
103 |       "  0.          0.          0.          0.          0.          0.          0.\n",
104 |       "  0.          0.09375     0.4453125   0.86328125  0.98828125  0.98828125\n",
105 |       "  0.98828125  0.98828125  0.78515625  0.3046875   0.          0.          0.\n",
106 |       "  0.          0.          0.          0.          0.          0.          0.\n",
107 |       "  0.          0.          0.          0.          0.          0.          0.\n",
108 |       "  0.08984375  0.2578125   0.83203125  0.98828125  0.98828125  0.98828125\n",
109 |       "  0.98828125  0.7734375   0.31640625  0.0078125   0.          0.          0.\n",
110 |       "  0.          0.          0.          0.          0.          0.          0.\n",
111 |       "  0.          0.          0.          0.          0.          0.          0.0703125\n",
112 |       "  0.66796875  0.85546875  0.98828125  0.98828125  0.98828125  0.98828125\n",
113 |       "  0.76171875  0.3125      0.03515625  0.          0.          0.          0.\n",
114 |       "  0.          0.          0.          0.          0.          0.          0.\n",
115 |       "  0.          0.          0.          0.          0.          0.21484375\n",
116 |       "  0.671875    0.8828125   0.98828125  0.98828125  0.98828125  0.98828125\n",
117 |       "  0.953125    0.51953125  0.04296875  0.          0.          0.          0.\n",
118 |       "  0.          0.          0.          0.          0.          0.          0.\n",
119 |       "  0.          0.          0.          0.          0.          0.          0.\n",
120 |       "  0.53125     0.98828125  0.98828125  0.98828125  0.828125    0.52734375\n",
121 |       "  0.515625    0.0625      0.          0.          0.          0.          0.\n",
122 |       "  0.          0.          0.          0.          0.          0.          0.\n",
123 |       "  0.          0.          0.          0.          0.          0.          0.\n",
124 |       "  0.          0.          0.          0.          0.          0.          0.\n",
125 |       "  0.          0.          0.          0.          0.          0.          0.\n",
126 |       "  0.          0.          0.          0.          0.          0.          0.\n",
127 |       "  0.          0.          0.          0.          0.          0.          0.\n",
128 |       "  0.          0.          0.          0.          0.          0.          0.\n",
129 |       "  0.          0.          0.          0.          0.          0.          0.\n",
130 |       "  0.          0.          0.          0.          0.          0.          0.\n",
131 |       "  0.          0.          0.          0.          0.          0.          0.\n",
132 |       "  0.          0.          0.          0.          0.          0.          0.\n",
133 |       "  0.          0.          0.          0.          0.          0.          0.\n",
134 |       "  0.          0.          0.          0.          0.          0.          0.\n",
135 |       "  0.          0.          0.          0.        ]\n",
136 |       "Example label:\n",
137 |       "5\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "from IPython.display import Image\n",
143 |     "import pickle\n",
144 |     "\n",
145 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
146 |     "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n",
147 |     "print \"Shapes:\"\n",
148 |     "print train_x.shape, train_y.shape\n",
149 |     "print valid_x.shape, valid_y.shape\n",
150 |     "print test_x.shape, test_y.shape\n",
151 |     "\n",
152 |     "print \"--------------\"\n",
153 |     "print \"Example input:\"\n",
154 |     "print train_x[0]\n",
155 |     "print \"Example label:\"\n",
156 |     "print train_y[0]\n"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 42,
162 |    "metadata": {
163 |     "collapsed": false
164 |    },
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtQAAAAcCAAAAABkYnfcAAAQtklEQVR4nO1beVxV5dZ+EgQHUJFS\nLBw+FUcKmhyupab3NthgJlamxrW+zMzUrlfDm+ZshqalmUJq95opNpioOXyCQ0maESY4oCiiIIMT\ng4Cw13rPvn+cI3DOXu9RKX99f/D8tfdae717nX2e/b5reDdQgxrUoAY1qEENbgRBpzL+bBdqUIM/\nFIvy6Ls/2wdn/E9Mefs/24cbxpr0rtW0vG8lr7z3D/Xl9+AjW3KLW3qDuHhXSaeJaWsmTvT642/V\nZC/TwcZ//Li/A90LTy5oVE3boPgAvbIX23pWc1w9EswTtWVNLT8/vynvr2/2pa30PUEdcomILrod\nu09OO53q3er9Fp+AVyeJLGp5kelRnVVQp5E2Zmb+Vvqpnj333sCdF5Quc5G8XmSapmn2uQHja6j/\nyksfRpd8Hx0dHT3jfu1VQbHE/+ytUa4tvOsmblgFQ79ILi/f08D9RfUOZLWU5P1KF9S9iXv5BFS9\neixN9tBdGZ5ocOIYrdotItT7sqJ5uWkK7jZvMyxqHTMzZ3zNhT/0sl7x4Fmmy7nUrSpNHn7W+Zp3\nNulIHX7F4IdvwG9ntPr0IBF9LKnqrdeSulPk6TPEREREK3yten91rul1b/1+aeEgF1HjXNM0TfOy\n9l2y4gNVCTo0qZV8VTciflEzQt1M9eqN368S/t/xxU2bitQR/SXN7rmn+cvqqI+galu89babudtM\nGlfl7CGiNpoLw+MMgw1DXmNbLCxXX+pv4nOOr74iau42zfW1LNLQS+yAMXTAgK5WatbtcZqYfh5I\nPKmKdMoqp4tuW3ZIF4a9Z8ik7rL4EPO45xd3EXTtl14hPp1MOeKgC7WkjiWia6Smv1j1/kqFaNys\nxC76P4tsZLGZYZrmh+4tW7Sflpe30n6cppQ6HxcXF7ckLlEp1U80CDrF/Ix2uHg1SasDgH9E/If5\nsEX8y4U5fkD7QpoiWgVHbduWzjxrvW2HEPZ479onTAbX4DfyLLOzUzMpv8ovGKghdcNe6VcMI/mw\nhtTDC375+xTWBvIeE5gzOkoaz9Wm6ToDAfA7wczMCd+XFsgjrrLzJDyO11SRpjmT+k71H41DfS8Y\nyS28rfLnc4jjDhHxWouqwdLLRHSsRVuiHsKIDXdqST2a6NzMWTNnxmlJbdOT+uFtfgDw4oXUe6zK\ng2ayaZqttcYA+i65rJj5qP2s9ROtWzviS5/TSrkGNHbMoI136gd8TvtQAfQctc4gIip3nZD/6vif\npqt00fAtZi75dyazGiJoI0vdhDzdfmIiopVVZTOJLlWEV/UTiKQXsf9mgw3DGBYukrp2ROmshriX\ntY/iIWZ+QtQsMkVSo/9no5gT66FTlGh23yXm+Lc5855nbFXpd8qZ1FvUZNmfHpmGMcwq9uhWSPG9\nPet/TzzeogwnIkoNhIbUzU4yvSuvYh6BgU0BwPcM0ddCQO6vVDfZTwDHuAcApNieFZRhSaZpmuJs\nYcdn+5VSBUuGC+/vYKVKH5BsEkrS2upHRKC6KqddAbvOnCkg/pmIiDJclE+k2mfOu1W2NOdOLeYV\nc29HSC7nCq56ZW/R++OfTDlLH19DR6vGoa8Q0eZGjpNQEkk9hJltzPxyOLPwx43g0QDeydbdt+Wv\nzNvrSJrXSjSkhi+ieLBuwJBLRBvr94u4HeCiygLI3cXOpE5QmsJKNHOcIA4n2uILDCHKuN2i3EyU\ntqY58JRMakxmojd1DgMAwoqIFgpyf6X0hr9SXwAhhXKQG3DINM2vdbaNo9SFA8+1ay6oai8tUSpU\nMnqGaY67VDDQpkZI8r7pRETUrnG73qeJXFno7cib2in1umAcqdIDgDbr1JU3BO3kIjdFrr30PYC2\n54uqrmS1JhPRtdi/2XEp/BhywShKyzWMogHiTO2XF+MBtEjTkjqZ+fJfJcXwMjNRQ2ogkuM1yUHQ\nas49ONB+zLS6Qv6OciJ1k2wVKNr7s3H+Eat4JtPHvgCOEgkhZbOp3e8AgFc1pMb1SP1CHBFJ01TD\ny2qBzmiGkXI7UG8N7fUUtEM+UKZpjtMZL+SP6suaR1YoVTZCmBTRcB7TBAAYM2+eaBqo1GhJvp2I\nit98AMASojR/jUueKWq+IO6Swv+u1yyWL4yTjH7YqhkNAHbQCABtz2c5xQkNjhElOaLzUBJI3Z8N\nYwfCDWM0JFJ7HE3xA/CDitTdVzHPcpX59Hhl6SXTfKuNltT14vlvosIrlvIfbeyYTZh+qFCsVO9U\nvW6VOtZQsm+ZyIaQrkzh0u/qAN5PF/M02SMAwHIdqW3Mbkj9UkopEf0irlexWlIH5pT2BLCMzgjK\nDkcN09TG1HWnnXr6GYm2AB40lFJXn5DeE5+dNu4KvD3uFLNNiic1pP5bIdEpe8IQSyQR144kkdRe\n0ZzZP51ZfF0eMjoA6NVJHjCOB3l3ioxPDnYWLyeiEAC1R4/eI5A6PJ+LdnRE+C+jPVB/r5XUL3BP\nAIONwiaa3/Ghjbdbnm7HVNPMjwrybKUlNVoXZHwu0aQbUWWJ2ZnUlTVb30GxV9VL4rivG7zVWi5t\nmEPfAWiznyimnsajtyImRfxMe6zVGrsnmpm65eTdu3czEV16TV7XtaQOPkELAIwvI2nFH3DVNE3T\nNBeJtrN5jYbSwDx7Ue/nycEW1ZPE6W0R8i1R4RH+SYg1NaTeRrSnDwA0GpzvOJLgdVRNleTzmVlx\nlLiyLj3khfCLqnSUOGIO7dtPNNBV/CoRjUT38VOJiCjFtaRy0uAIAG2aAEC8ldRbjngCTXOtk7ED\nn2SpJGuICu/g4BYAWpnm3zWGeDafeaI1K0ngKr01G/9YcbxShQG4J3T8wiUFRXkbC0gsx/TPN3YL\n798dRM3vmLi3gMl4SvSm7gMbmW3MmZpqg47UwaeulfQ2yIaIlQsKHuFs432TvAL2l68Q7caUmqY2\nprappzW3A7pvznPUqufd4azxGU2Z0xG0mnO/CO3JRyRS20RSP5cYby+2RxD9pi+7t1OqK+Df512X\nKu1bzKw2Bok25QNRO32gz/Mlj0naw8XEVGhNlr+wl4TtTS92qSeH5HGVs53WRFG9C/ju5U80lcQH\ns1iNlFUA4J9trtcqg7czL3FdA58sobGVZ0yLK46X8MWkpCRW5fkJC166yzO3XBqzJTOvFOQNs4mJ\n6MxZEnMDzwfPUlHmukKi7H/KHVAtqdMrHq5cAUKsypfEQ4g4lWifxiMATwwePKxAQ+r9fEbMY+xo\nfu/j0ayUUjud85bHiaagSSzlL/bqfDR/sWCpi6kdeKqUrkrJHgDAq/VwpQ4uT8pQBZ87KWp9pZTa\nKBt1Uv3R9VMAi3aL+q4DmT63ikPtfQFHfyDaSdf5pGFUngnhRx8VjEdPq+O6/udSpQ430+gAIMEc\no1c2HErs2nQIo3MVs7fXHN5eJReauGHDhg0bhttrHq+pNGnITw3DEPuMXc5z6gcdm+4iKRao/TTR\n5L/A7yAR0fNio9zGvE78ES3+dX/nzp07LyDSkHqcSOrnjdLs3iFxREzGWW0t+rZpZpq1sdylNvym\nckEHnRUA4KV9Sik1wUk2kQjYS9QT3YjETDFQKXf7C5joNUFcp8WAuQcOpCillJGe/t79rZzVXzEz\nx8oj9lEd4NMYQEeWLwhmEqZ4B6lTj82fYiF1vFGV1EKiGFxy5OzVApuOmWMNZnecRoI2pgYAlHGZ\nS488jCrq914zKEPbJI5RcwVpyEnD0NbAADxsk7IVzzlEmxri9gNcOu0boq2PhIZarmEiclMyRgMt\nqZ9TxcI6H39yOICOPxIT6fsd3qZ51DUSDUg8PwTwZ+7uxh0AHruUUs6NgDn8LUKyeCyCTvFY0ShQ\nKTcVv9k2ZuuUUWfuEaWUys8sVyrKWp5r9i/FBz7jffKQfZSjce6rIfWLNtaQOu+XFx1HFlKPqzhp\nn2ecsISjw3avefpMnCYpCUzh8k9klQMJStjY4cDd07cwJ7kU9sLoI8dRyGr6Rj9wjNjSyDOMHzV1\nLgDAo0zWBKDW+1TwRiM8sI+O9YbvY6sKqPLFqsAnJJehr2GQltTPqBLhXxkTCAAPX6ZBHTvqm8SR\npvlPV1l28ZsAZvI2N71lAMB8pZRzm2kOfYOQs7TiTN7+O+XH5JbUtbcwjbIWYrep0o0f9+lxF46p\nNGHUoawifIaynDigr81B6qfEIA3oz3HCJijfFbs+tefBoUSU0aiqLr5K7619KmcLzVrgozKxNwW0\nOcIszZd2BHXpcvedYafC7mi5Stpr0G5xFjOXf+8iHsQZ9oO3L7Gbjq2G1GwYur06jgsEUo+kwhf8\nHl93habYJ8UXN22yNt1Gi6T27Oeo4g0v1JIaR9QSjabBYj4uyf1j7ZWdZgVCSS+iWCmVqtI1LYuA\nKY61sdYOpcofctJ1I+r2ej4R5+p8DVRKGwvVfY14lVBGtZ0KBQCPuVdyBJ96XeYnvVueYHlXSMVM\n7blZLvN02JhjqX04IZSIyGku3snsmPXrf8t8QoxGe/F0zXj9mFnY7QAAXh0/LDbNsoumaeblm9Ya\nd9O3TzIz77fk8GFU9nFIYFhsBqevcbcVO8YmdMJX2sSeaCXEmTqbihOPEdG7cjHPgePMNtf/+6Et\nFAgAfkMuExX11lguLNBV3yIoW5wXV5vHerXF/YOTTHOetfY9fnVubt6mdrK3TX9T9vynyVyl1G/O\nyvsK7amVmCMCcJso+qwlektqmKlfPQB4b1Sl0qo8S+2E55t5SpNfBmSNBADPzw6LHZ0GGfQPna92\ntMokouVVd5f2uWAYO+PHPBa/c69RFCHv5Dx3VregD2WOayRqmq41TdM8l7U9MjIyMjLStWDa5JHD\nzMwJz1qfURgRZR0loh91r5IdMSrcIgvJ5NJ52vItALwhkTqJiCh2fBv3227XE7ErqQ8SLZo9e/bs\nA0y04zmd5cJ8Te+0xUljmqjonmCa6ZsLTVMdcRdMSVirVEgdoM57BUrZCl2Tvn5xTLR8rD4XrJ2i\nJXUHolRRcUyt2DA9/DD9FCppZ3K8ZxhfkHf5ABhVMtI39OXU3+StRcvoC62vDnTJIiKn3kPPCwYb\nhmGwsUOY+QDg/nKpnQ8AOM2sWRreNk1zU2/N5wF+X51gZv6hv9SAu+snIibK/Uh3UwdihE1ovQwW\nSyKVCLaxldQ+QxdENNH4WonHZVI7SkvnlulfpoVqgKw4LhWrAADz37B3Xtx/JSHhf5VSifHxiUop\nVXgzXxg4cEBpyhTto0kqbAPAjNKysrKvxDozsIxjdjHLzQEAwKgS5vzp8vPvW1yk3yZ7DQ/kVO3X\nAcCd09gwDCNrraZo5/1ruq4D1+m8LlBCq0Nbh2tUXb4+w8x8ZZZm2ICpxDTf3U4yAECMrTqkxnGq\n7udlLZKtpA5dbi8sJX1sbd5V4lypZtfuJNKwHfCeMOFL08y/76bdbPXltc8Eyj+Qto1fD9HK8l2Z\nHauJtBVqdxjLrC5MEzcQXBctLxZrn1AVDP51hiuZXk4xkodpQmNgJGv/r1eu8sQbda8S7zNzypyZ\n4r6Nm0C4MFM33X1dUodTnLvC3M3Ca8R5+nrEdb5sWfvbrf280RleL0SN37MnKuqF0GqZt/xJXpg7\nbaQl2q/n3KHRhKKd46rlCuosppjqWV4HR5L0oWbG+dBbcs9bCN+ttE638tTADebSyWpx+vfgDf7x\nFnx3DCCnWmvO/1/4LnLbQ6mBBn2kDbu3Fg9mTqvmV8A1qEENfgf+C/434fbpcuVtAAAAAElFTkSu\nQmCC\n",
169 |       "text/plain": [
170 |        "<IPython.core.display.Image object>"
171 |       ]
172 |      },
173 |      "execution_count": 42,
174 |      "metadata": {},
175 |      "output_type": "execute_result"
176 |     }
177 |    ],
178 |    "source": [
179 |     "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n",
180 |     "from utils import tile_raster_images\n",
181 |     "from PIL import Image as pil_img\n",
182 |     "\n",
183 |     "input_images = train_x[:25]\n",
184 |     "im = pil_img.fromarray(\n",
185 |     "    tile_raster_images(input_images, \n",
186 |     "                       img_shape=(28, 28), \n",
187 |     "                       tile_shape=(1, 25),\n",
188 |     "                       tile_spacing=(1, 1))\n",
189 |     ")\n",
190 |     "im.save(\"some_mnist_numbers.png\")\n",
191 |     "Image(filename=\"some_mnist_numbers.png\")\n"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 43,
197 |    "metadata": {
198 |     "collapsed": false
199 |    },
200 |    "outputs": [],
201 |    "source": [
202 |     "# Your basic Theano imports.\n",
203 |     "import theano\n",
204 |     "import theano.tensor as T\n",
205 |     "\n",
206 |     "x = T.matrix('x')\n"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 44,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "# Compute the hidden layer from the input\n",
218 |     "import numpy\n",
219 |     "import numpy.random as rng\n",
220 |     "\n",
221 |     "i = numpy.sqrt(6. / (784+500))\n",
222 |     "# W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n",
223 |     "W_x = numpy.asarray(rng.uniform(low=-i, high=i, size=(28*28, 500)), dtype=theano.config.floatX)\n",
224 |     "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n",
225 |     "\n",
226 |     "W_x = theano.shared(W_x, name=\"W_x\")\n",
227 |     "b_h = theano.shared(b_h, name=\"b_h\")\n",
228 |     "\n",
229 |     "h = T.tanh(\n",
230 |     "    T.dot(x, W_x) + b_h\n",
231 |     ")\n"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 45,
237 |    "metadata": {
238 |     "collapsed": false
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "# Compute the output class probabilities from the hidden layer\n",
243 |     "i = numpy.sqrt(6. / (510))\n",
244 |     "# W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n",
245 |     "W_h = numpy.asarray(rng.uniform(low=-i, high=i, size=(500, 10)), dtype=theano.config.floatX)\n",
246 |     "b_y = numpy.zeros(shape=(10,), dtype=\"float32\")\n",
247 |     "\n",
248 |     "W_h = theano.shared(W_h, name=\"W_h\")\n",
249 |     "b_y = theano.shared(b_y, name=\"b_y\")\n",
250 |     "\n",
251 |     "y = T.nnet.softmax(\n",
252 |     "    T.dot(h, W_h) + b_y\n",
253 |     ")\n",
254 |     "\n",
255 |     "# The actual predicted label\n",
256 |     "y_hat = T.argmax(y, axis=1)\n"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 46,
262 |    "metadata": {
263 |     "collapsed": false
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "# Find cost compared to correct labels\n",
268 |     "correct_labels = T.ivector(\"labels\")\n",
269 |     "\n",
270 |     "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n",
271 |     "cost = -T.mean(log_likelihood)\n"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 47,
277 |    "metadata": {
278 |     "collapsed": false
279 |    },
280 |    "outputs": [],
281 |    "source": [
282 |     "# Compute gradient updates for the parameters\n",
283 |     "parameters = [W_x, b_h, W_h, b_y]\n",
284 |     "gradients = T.grad(cost, parameters)\n",
285 |     "\n",
286 |     "learning_rate = 0.01\n",
287 |     "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 48,
293 |    "metadata": {
294 |     "collapsed": false
295 |    },
296 |    "outputs": [],
297 |    "source": [
298 |     "# Compile function for training (changes parameters via updates) and testing (no updates)\n",
299 |     "f_train = theano.function(\n",
300 |     "    inputs=[x, correct_labels], \n",
301 |     "    outputs=cost, \n",
302 |     "    updates=train_updates, \n",
303 |     "    allow_input_downcast=True\n",
304 |     ")\n",
305 |     "\n",
306 |     "f_test = theano.function(\n",
307 |     "    inputs=[x], \n",
308 |     "    outputs=y_hat, \n",
309 |     "    allow_input_downcast=True\n",
310 |     ")\n"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 49,
316 |    "metadata": {
317 |     "collapsed": false
318 |    },
319 |    "outputs": [
320 |     {
321 |      "name": "stdout",
322 |      "output_type": "stream",
323 |      "text": [
324 |       "1 : cost: 0.910093 \ttrain: 79.788% \tvalid: 88.05% \ttest: 87.49%\n",
325 |       "2 : cost: 0.492869 \ttrain: 87.558% \tvalid: 89.6% \ttest: 88.99%\n",
326 |       "3 : cost: 0.419445 \ttrain: 88.848% \tvalid: 90.26% \ttest: 89.9%\n",
327 |       "saving filters...\n",
328 |       "4 : cost: 0.38414 \ttrain: 89.536% \tvalid: 90.67% \ttest: 90.37%\n",
329 |       "5 : cost: 0.362308 \ttrain: 89.99% \tvalid: 91.1% \ttest: 90.69%\n",
330 |       "6 : cost: 0.346996 \ttrain: 90.424% \tvalid: 91.34% \ttest: 90.95%\n",
331 |       "saving filters...\n",
332 |       "7 : cost: 0.335397 \ttrain: 90.724% \tvalid: 91.51% \ttest: 91.15%\n",
333 |       "8 : cost: 0.326137 \ttrain: 90.982% \tvalid: 91.76% \ttest: 91.42%\n",
334 |       "9 : cost: 0.318454 \ttrain: 91.17% \tvalid: 91.89% \ttest: 91.65%\n",
335 |       "saving filters...\n",
336 |       "10 : cost: 0.311887 \ttrain: 91.354% \tvalid: 92.02% \ttest: 91.8%\n",
337 |       "11 : cost: 0.306141 \ttrain: 91.554% \tvalid: 92.08% \ttest: 91.98%\n",
338 |       "12 : cost: 0.301014 \ttrain: 91.684% \tvalid: 92.2% \ttest: 92.04%\n",
339 |       "saving filters...\n",
340 |       "13 : cost: 0.296363 \ttrain: 91.804% \tvalid: 92.22% \ttest: 92.08%\n",
341 |       "14 : cost: 0.292087 \ttrain: 91.922% \tvalid: 92.22% \ttest: 92.17%\n",
342 |       "15 : cost: 0.288107 \ttrain: 92.034% \tvalid: 92.31% \ttest: 92.23%\n",
343 |       "saving filters...\n",
344 |       "16 : cost: 0.284364 \ttrain: 92.126% \tvalid: 92.35% \ttest: 92.32%\n",
345 |       "17 : cost: 0.280813 \ttrain: 92.228% \tvalid: 92.4% \ttest: 92.39%\n",
346 |       "18 : cost: 0.277417 \ttrain: 92.332% \tvalid: 92.45% \ttest: 92.44%\n",
347 |       "saving filters...\n",
348 |       "19 : cost: 0.274148 \ttrain: 92.442% \tvalid: 92.52% \ttest: 92.48%\n",
349 |       "20 : cost: 0.270981 \ttrain: 92.53% \tvalid: 92.6% \ttest: 92.57%\n",
350 |       "21 : cost: 0.267898 \ttrain: 92.618% \tvalid: 92.64% \ttest: 92.67%\n",
351 |       "saving filters...\n",
352 |       "22 : cost: 0.264884 \ttrain: 92.734% \tvalid: 92.73% \ttest: 92.72%\n",
353 |       "23 : cost: 0.261926 \ttrain: 92.814% \tvalid: 92.85% \ttest: 92.8%\n",
354 |       "24 : cost: 0.259013 \ttrain: 92.89% \tvalid: 92.93% \ttest: 92.93%\n",
355 |       "saving filters...\n",
356 |       "25 : cost: 0.256139 \ttrain: 92.984% \tvalid: 93.05% \ttest: 92.95%\n",
357 |       "26 : cost: 0.253295 \ttrain: 93.068% \tvalid: 93.16% \ttest: 92.98%\n",
358 |       "27 : cost: 0.250478 \ttrain: 93.148% \tvalid: 93.22% \ttest: 93.08%\n",
359 |       "saving filters...\n",
360 |       "28 : cost: 0.247683 \ttrain: 93.214% \tvalid: 93.29% \ttest: 93.14%\n",
361 |       "29 : cost: 0.244908 \ttrain: 93.292% \tvalid: 93.42% \ttest: 93.14%\n",
362 |       "30 : cost: 0.24215 \ttrain: 93.374% \tvalid: 93.53% \ttest: 93.21%\n",
363 |       "saving filters...\n",
364 |       "31 : cost: 0.239409 \ttrain: 93.462% \tvalid: 93.61% \ttest: 93.26%\n",
365 |       "32 : cost: 0.236685 \ttrain: 93.538% \tvalid: 93.72% \ttest: 93.36%\n",
366 |       "33 : cost: 0.233977 \ttrain: 93.642% \tvalid: 93.79% \ttest: 93.39%\n",
367 |       "saving filters...\n",
368 |       "34 : cost: 0.231286 \ttrain: 93.712% \tvalid: 93.86% \ttest: 93.43%\n",
369 |       "35 : cost: 0.228612 \ttrain: 93.782% \tvalid: 93.94% \ttest: 93.47%\n",
370 |       "36 : cost: 0.225956 \ttrain: 93.858% \tvalid: 94.06% \ttest: 93.55%\n",
371 |       "saving filters...\n",
372 |       "37 : cost: 0.223321 \ttrain: 93.916% \tvalid: 94.19% \ttest: 93.62%\n",
373 |       "38 : cost: 0.220706 \ttrain: 94.0% \tvalid: 94.35% \ttest: 93.72%\n",
374 |       "39 : cost: 0.218114 \ttrain: 94.068% \tvalid: 94.43% \ttest: 93.84%\n",
375 |       "saving filters...\n",
376 |       "40 : cost: 0.215546 \ttrain: 94.152% \tvalid: 94.48% \ttest: 93.89%\n",
377 |       "41 : cost: 0.213002 \ttrain: 94.232% \tvalid: 94.54% \ttest: 93.96%\n",
378 |       "42 : cost: 0.210484 \ttrain: 94.286% \tvalid: 94.61% \ttest: 94.05%\n",
379 |       "saving filters...\n",
380 |       "43 : cost: 0.207995 \ttrain: 94.368% \tvalid: 94.69% \ttest: 94.09%\n",
381 |       "44 : cost: 0.205533 \ttrain: 94.46% \tvalid: 94.75% \ttest: 94.15%\n",
382 |       "45 : cost: 0.203102 \ttrain: 94.514% \tvalid: 94.81% \ttest: 94.22%\n",
383 |       "saving filters...\n",
384 |       "46 : cost: 0.2007 \ttrain: 94.576% \tvalid: 94.91% \ttest: 94.29%\n",
385 |       "47 : cost: 0.19833 \ttrain: 94.64% \tvalid: 94.94% \ttest: 94.38%\n",
386 |       "48 : cost: 0.195992 \ttrain: 94.71% \tvalid: 94.99% \ttest: 94.45%\n",
387 |       "saving filters...\n",
388 |       "49 : cost: 0.193687 \ttrain: 94.764% \tvalid: 95.02% \ttest: 94.49%\n",
389 |       "50 : cost: 0.191414 \ttrain: 94.826% \tvalid: 95.06% \ttest: 94.53%\n",
390 |       "51 : cost: 0.189174 \ttrain: 94.872% \tvalid: 95.13% \ttest: 94.56%\n",
391 |       "saving filters...\n",
392 |       "52 : cost: 0.186968 \ttrain: 94.924% \tvalid: 95.18% \ttest: 94.61%\n",
393 |       "53 : cost: 0.184796 \ttrain: 94.982% \tvalid: 95.18% \ttest: 94.65%\n",
394 |       "54 : cost: 0.182657 \ttrain: 95.036% \tvalid: 95.24% \ttest: 94.68%\n",
395 |       "saving filters...\n",
396 |       "55 : cost: 0.180553 \ttrain: 95.088% \tvalid: 95.3% \ttest: 94.68%\n",
397 |       "56 : cost: 0.178481 \ttrain: 95.134% \tvalid: 95.34% \ttest: 94.78%\n",
398 |       "57 : cost: 0.176444 \ttrain: 95.194% \tvalid: 95.38% \ttest: 94.83%\n",
399 |       "saving filters...\n",
400 |       "58 : cost: 0.174439 \ttrain: 95.24% \tvalid: 95.41% \ttest: 94.9%\n",
401 |       "59 : cost: 0.172468 \ttrain: 95.288% \tvalid: 95.4% \ttest: 94.98%\n",
402 |       "60 : cost: 0.170529 \ttrain: 95.358% \tvalid: 95.44% \ttest: 95.01%\n",
403 |       "saving filters...\n",
404 |       "61 : cost: 0.168623 \ttrain: 95.422% \tvalid: 95.5% \ttest: 95.02%\n",
405 |       "62 : cost: 0.166749 \ttrain: 95.47% \tvalid: 95.54% \ttest: 95.05%\n",
406 |       "63 : cost: 0.164906 \ttrain: 95.518% \tvalid: 95.62% \ttest: 95.07%\n",
407 |       "saving filters...\n",
408 |       "64 : cost: 0.163095 \ttrain: 95.568% \tvalid: 95.67% \ttest: 95.13%\n",
409 |       "65 : cost: 0.161314 \ttrain: 95.612% \tvalid: 95.71% \ttest: 95.19%\n",
410 |       "66 : cost: 0.159564 \ttrain: 95.666% \tvalid: 95.72% \ttest: 95.21%\n",
411 |       "saving filters...\n",
412 |       "67 : cost: 0.157843 \ttrain: 95.706% \tvalid: 95.76% \ttest: 95.27%\n",
413 |       "68 : cost: 0.156151 \ttrain: 95.756% \tvalid: 95.83% \ttest: 95.31%\n",
414 |       "69 : cost: 0.154488 \ttrain: 95.806% \tvalid: 95.87% \ttest: 95.31%\n",
415 |       "saving filters...\n",
416 |       "70 : cost: 0.152853 \ttrain: 95.854% \tvalid: 95.88% \ttest: 95.36%\n",
417 |       "71 : cost: 0.151246 \ttrain: 95.904% \tvalid: 95.94% \ttest: 95.38%\n",
418 |       "72 : cost: 0.149666 \ttrain: 95.964% \tvalid: 95.96% \ttest: 95.46%\n",
419 |       "saving filters...\n",
420 |       "73 : cost: 0.148112 \ttrain: 96.016% \tvalid: 95.99% \ttest: 95.5%\n",
421 |       "74 : cost: 0.146585 \ttrain: 96.072% \tvalid: 96.01% \ttest: 95.56%\n",
422 |       "75 : cost: 0.145083 \ttrain: 96.138% \tvalid: 96.05% \ttest: 95.6%\n",
423 |       "saving filters...\n",
424 |       "76 : cost: 0.143606 \ttrain: 96.18% \tvalid: 96.09% \ttest: 95.64%\n",
425 |       "77 : cost: 0.142153 \ttrain: 96.224% \tvalid: 96.1% \ttest: 95.68%\n",
426 |       "78 : cost: 0.140724 \ttrain: 96.254% \tvalid: 96.12% \ttest: 95.74%\n",
427 |       "saving filters...\n",
428 |       "79 : cost: 0.139319 \ttrain: 96.306% \tvalid: 96.14% \ttest: 95.77%\n",
429 |       "80 : cost: 0.137937 \ttrain: 96.348% \tvalid: 96.15% \ttest: 95.77%\n",
430 |       "81 : cost: 0.136577 \ttrain: 96.384% \tvalid: 96.16% \ttest: 95.81%\n",
431 |       "saving filters...\n",
432 |       "82 : cost: 0.13524 \ttrain: 96.42% \tvalid: 96.22% \ttest: 95.86%\n",
433 |       "83 : cost: 0.133923 \ttrain: 96.468% \tvalid: 96.24% \ttest: 95.87%\n",
434 |       "84 : cost: 0.132628 \ttrain: 96.502% \tvalid: 96.3% \ttest: 95.93%\n",
435 |       "saving filters...\n",
436 |       "85 : cost: 0.131354 \ttrain: 96.544% \tvalid: 96.33% \ttest: 95.96%\n",
437 |       "86 : cost: 0.130099 \ttrain: 96.58% \tvalid: 96.36% \ttest: 96.03%\n",
438 |       "87 : cost: 0.128864 \ttrain: 96.608% \tvalid: 96.38% \ttest: 96.07%\n",
439 |       "saving filters...\n",
440 |       "88 : cost: 0.127649 \ttrain: 96.658% \tvalid: 96.42% \ttest: 96.09%\n",
441 |       "89 : cost: 0.126453 \ttrain: 96.688% \tvalid: 96.43% \ttest: 96.1%\n",
442 |       "90 : cost: 0.125275 \ttrain: 96.71% \tvalid: 96.44% \ttest: 96.14%\n",
443 |       "saving filters...\n",
444 |       "91 : cost: 0.124115 \ttrain: 96.742% \tvalid: 96.44% \ttest: 96.16%\n",
445 |       "92 : cost: 0.122972 \ttrain: 96.79% \tvalid: 96.5% \ttest: 96.18%\n",
446 |       "93 : cost: 0.121848 \ttrain: 96.822% \tvalid: 96.51% \ttest: 96.18%\n",
447 |       "saving filters...\n",
448 |       "94 : cost: 0.120739 \ttrain: 96.854% \tvalid: 96.52% \ttest: 96.2%\n",
449 |       "95 : cost: 0.119648 \ttrain: 96.892% \tvalid: 96.53% \ttest: 96.22%\n",
450 |       "96 : cost: 0.118573 \ttrain: 96.93% \tvalid: 96.56% \ttest: 96.26%\n",
451 |       "saving filters...\n",
452 |       "97 : cost: 0.117514 \ttrain: 96.954% \tvalid: 96.58% \ttest: 96.29%\n",
453 |       "98 : cost: 0.11647 \ttrain: 96.97% \tvalid: 96.58% \ttest: 96.3%\n",
454 |       "99 : cost: 0.115442 \ttrain: 96.994% \tvalid: 96.6% \ttest: 96.33%\n",
455 |       "saving filters...\n",
456 |       "100 : cost: 0.114428 \ttrain: 97.022% \tvalid: 96.61% \ttest: 96.33%\n",
457 |       "101 : cost: 0.113429 \ttrain: 97.062% \tvalid: 96.62% \ttest: 96.33%\n",
458 |       "102 : cost: 0.112444 \ttrain: 97.092% \tvalid: 96.61% \ttest: 96.36%\n",
459 |       "saving filters...\n",
460 |       "103 : cost: 0.111474 \ttrain: 97.136% \tvalid: 96.64% \ttest: 96.37%\n",
461 |       "104 : cost: 0.110517 \ttrain: 97.154% \tvalid: 96.69% \ttest: 96.39%\n",
462 |       "105 : cost: 0.109573 \ttrain: 97.18% \tvalid: 96.72% \ttest: 96.4%\n",
463 |       "saving filters...\n",
464 |       "106 : cost: 0.108643 \ttrain: 97.208% \tvalid: 96.73% \ttest: 96.44%\n",
465 |       "107 : cost: 0.107725 \ttrain: 97.246% \tvalid: 96.74% \ttest: 96.45%\n",
466 |       "108 : cost: 0.10682 \ttrain: 97.27% \tvalid: 96.74% \ttest: 96.48%\n",
467 |       "saving filters...\n",
468 |       "109 : cost: 0.105928 \ttrain: 97.288% \tvalid: 96.76% \ttest: 96.5%\n",
469 |       "110 : cost: 0.105048 \ttrain: 97.322% \tvalid: 96.78% \ttest: 96.53%\n",
470 |       "111 : cost: 0.104179 \ttrain: 97.338% \tvalid: 96.8% \ttest: 96.56%\n",
471 |       "saving filters...\n",
472 |       "112 : cost: 0.103322 \ttrain: 97.364% \tvalid: 96.8% \ttest: 96.59%\n",
473 |       "113 : cost: 0.102477 \ttrain: 97.386% \tvalid: 96.8% \ttest: 96.63%\n",
474 |       "114 : cost: 0.101643 \ttrain: 97.41% \tvalid: 96.81% \ttest: 96.64%\n",
475 |       "saving filters...\n",
476 |       "115 : cost: 0.10082 \ttrain: 97.438% \tvalid: 96.82% \ttest: 96.65%\n",
477 |       "116 : cost: 0.100007 \ttrain: 97.46% \tvalid: 96.84% \ttest: 96.66%\n",
478 |       "117 : cost: 0.0992054 \ttrain: 97.476% \tvalid: 96.85% \ttest: 96.67%\n",
479 |       "saving filters...\n",
480 |       "118 : cost: 0.0984139 \ttrain: 97.498% \tvalid: 96.87% \ttest: 96.68%\n",
481 |       "119 : cost: 0.0976325 \ttrain: 97.532% \tvalid: 96.89% \ttest: 96.68%\n",
482 |       "120 : cost: 0.0968612 \ttrain: 97.546% \tvalid: 96.89% \ttest: 96.69%\n",
483 |       "saving filters...\n",
484 |       "121 : cost: 0.0960996 \ttrain: 97.558% \tvalid: 96.9% \ttest: 96.7%\n",
485 |       "122 : cost: 0.0953476 \ttrain: 97.592% \tvalid: 96.91% \ttest: 96.7%\n",
486 |       "123 : cost: 0.094605 \ttrain: 97.612% \tvalid: 96.92% \ttest: 96.72%\n",
487 |       "saving filters...\n",
488 |       "124 : cost: 0.0938718 \ttrain: 97.63% \tvalid: 96.94% \ttest: 96.74%\n",
489 |       "125 : cost: 0.0931474 \ttrain: 97.652% \tvalid: 96.95% \ttest: 96.74%\n",
490 |       "126 : cost: 0.0924321 \ttrain: 97.668% \tvalid: 96.98% \ttest: 96.74%\n",
491 |       "saving filters...\n",
492 |       "127 : cost: 0.0917255 \ttrain: 97.684% \tvalid: 96.99% \ttest: 96.74%\n",
493 |       "128 : cost: 0.0910275 \ttrain: 97.706% \tvalid: 97.0% \ttest: 96.77%\n",
494 |       "129 : cost: 0.0903379 \ttrain: 97.724% \tvalid: 97.01% \ttest: 96.79%\n",
495 |       "saving filters...\n",
496 |       "130 : cost: 0.0896565 \ttrain: 97.736% \tvalid: 97.02% \ttest: 96.79%\n",
497 |       "131 : cost: 0.0889833 \ttrain: 97.756% \tvalid: 97.02% \ttest: 96.79%\n",
498 |       "132 : cost: 0.0883181 \ttrain: 97.772% \tvalid: 97.03% \ttest: 96.8%\n",
499 |       "saving filters...\n",
500 |       "133 : cost: 0.0876607 \ttrain: 97.786% \tvalid: 97.07% \ttest: 96.8%\n",
501 |       "134 : cost: 0.087011 \ttrain: 97.804% \tvalid: 97.08% \ttest: 96.81%\n",
502 |       "135 : cost: 0.0863689 \ttrain: 97.812% \tvalid: 97.09% \ttest: 96.82%\n",
503 |       "saving filters...\n",
504 |       "136 : cost: 0.0857343 \ttrain: 97.824% \tvalid: 97.12% \ttest: 96.83%\n",
505 |       "137 : cost: 0.085107 \ttrain: 97.842% \tvalid: 97.12% \ttest: 96.86%\n",
506 |       "138 : cost: 0.0844868 \ttrain: 97.86% \tvalid: 97.13% \ttest: 96.88%\n",
507 |       "saving filters...\n",
508 |       "139 : cost: 0.0838737 \ttrain: 97.884% \tvalid: 97.12% \ttest: 96.9%\n",
509 |       "140 : cost: 0.0832676 \ttrain: 97.898% \tvalid: 97.11% \ttest: 96.91%\n",
510 |       "141 : cost: 0.0826682 \ttrain: 97.906% \tvalid: 97.11% \ttest: 96.94%\n",
511 |       "saving filters...\n",
512 |       "142 : cost: 0.0820756 \ttrain: 97.916% \tvalid: 97.12% \ttest: 96.93%\n",
513 |       "143 : cost: 0.0814896 \ttrain: 97.93% \tvalid: 97.14% \ttest: 96.93%\n",
514 |       "144 : cost: 0.0809101 \ttrain: 97.948% \tvalid: 97.15% \ttest: 96.96%\n",
515 |       "saving filters...\n",
516 |       "145 : cost: 0.080337 \ttrain: 97.966% \tvalid: 97.15% \ttest: 96.97%\n",
517 |       "146 : cost: 0.0797701 \ttrain: 97.99% \tvalid: 97.16% \ttest: 96.97%\n",
518 |       "147 : cost: 0.0792095 \ttrain: 98.012% \tvalid: 97.18% \ttest: 96.98%\n",
519 |       "saving filters...\n",
520 |       "148 : cost: 0.0786549 \ttrain: 98.03% \tvalid: 97.19% \ttest: 96.98%\n",
521 |       "149 : cost: 0.0781063 \ttrain: 98.058% \tvalid: 97.21% \ttest: 97.0%\n",
522 |       "150 : cost: 0.0775635 \ttrain: 98.074% \tvalid: 97.21% \ttest: 97.01%\n",
523 |       "saving filters...\n",
524 |       "151 : cost: 0.0770266 \ttrain: 98.082% \tvalid: 97.21% \ttest: 97.02%\n",
525 |       "152 : cost: 0.0764954 \ttrain: 98.096% \tvalid: 97.21% \ttest: 97.02%\n",
526 |       "153 : cost: 0.0759698 \ttrain: 98.12% \tvalid: 97.22% \ttest: 97.02%\n",
527 |       "saving filters...\n",
528 |       "154 : cost: 0.0754497 \ttrain: 98.134% \tvalid: 97.22% \ttest: 97.03%\n",
529 |       "155 : cost: 0.0749351 \ttrain: 98.144% \tvalid: 97.22% \ttest: 97.04%\n",
530 |       "156 : cost: 0.0744257 \ttrain: 98.166% \tvalid: 97.24% \ttest: 97.06%\n",
531 |       "saving filters...\n",
532 |       "157 : cost: 0.0739218 \ttrain: 98.184% \tvalid: 97.24% \ttest: 97.06%\n",
533 |       "158 : cost: 0.0734229 \ttrain: 98.208% \tvalid: 97.27% \ttest: 97.07%\n",
534 |       "159 : cost: 0.0729292 \ttrain: 98.224% \tvalid: 97.28% \ttest: 97.07%\n",
535 |       "saving filters...\n",
536 |       "160 : cost: 0.0724405 \ttrain: 98.234% \tvalid: 97.29% \ttest: 97.08%\n",
537 |       "161 : cost: 0.0719567 \ttrain: 98.246% \tvalid: 97.3% \ttest: 97.1%\n",
538 |       "162 : cost: 0.0714778 \ttrain: 98.262% \tvalid: 97.29% \ttest: 97.12%\n",
539 |       "saving filters...\n",
540 |       "163 : cost: 0.0710037 \ttrain: 98.27% \tvalid: 97.3% \ttest: 97.12%\n",
541 |       "164 : cost: 0.0705344 \ttrain: 98.284% \tvalid: 97.32% \ttest: 97.13%\n",
542 |       "165 : cost: 0.0700698 \ttrain: 98.292% \tvalid: 97.31% \ttest: 97.13%\n",
543 |       "saving filters...\n",
544 |       "166 : cost: 0.0696097 \ttrain: 98.306% \tvalid: 97.3% \ttest: 97.14%\n",
545 |       "167 : cost: 0.0691542 \ttrain: 98.314% \tvalid: 97.29% \ttest: 97.15%\n",
546 |       "168 : cost: 0.0687031 \ttrain: 98.328% \tvalid: 97.31% \ttest: 97.16%\n",
547 |       "saving filters...\n",
548 |       "169 : cost: 0.0682565 \ttrain: 98.344% \tvalid: 97.32% \ttest: 97.16%\n",
549 |       "170 : cost: 0.0678142 \ttrain: 98.344% \tvalid: 97.33% \ttest: 97.17%\n",
550 |       "171 : cost: 0.0673761 \ttrain: 98.348% \tvalid: 97.34% \ttest: 97.17%\n",
551 |       "saving filters...\n",
552 |       "172 : cost: 0.0669422 \ttrain: 98.366% \tvalid: 97.34% \ttest: 97.17%\n",
553 |       "173 : cost: 0.0665125 \ttrain: 98.382% \tvalid: 97.33% \ttest: 97.17%\n",
554 |       "174 : cost: 0.0660869 \ttrain: 98.39% \tvalid: 97.33% \ttest: 97.18%\n",
555 |       "saving filters...\n",
556 |       "175 : cost: 0.0656654 \ttrain: 98.406% \tvalid: 97.33% \ttest: 97.18%\n",
557 |       "176 : cost: 0.0652478 \ttrain: 98.418% \tvalid: 97.33% \ttest: 97.16%\n",
558 |       "177 : cost: 0.0648342 \ttrain: 98.43% \tvalid: 97.33% \ttest: 97.17%\n",
559 |       "saving filters...\n",
560 |       "178 : cost: 0.0644244 \ttrain: 98.442% \tvalid: 97.35% \ttest: 97.17%\n",
561 |       "179 : cost: 0.0640184 \ttrain: 98.458% \tvalid: 97.35% \ttest: 97.17%\n",
562 |       "180 : cost: 0.0636162 \ttrain: 98.466% \tvalid: 97.34% \ttest: 97.18%\n",
563 |       "saving filters...\n",
564 |       "181 : cost: 0.0632178 \ttrain: 98.48% \tvalid: 97.34% \ttest: 97.18%\n",
565 |       "182 : cost: 0.0628229 \ttrain: 98.494% \tvalid: 97.37% \ttest: 97.19%\n",
566 |       "183 : cost: 0.0624317 \ttrain: 98.508% \tvalid: 97.37% \ttest: 97.2%\n",
567 |       "saving filters...\n",
568 |       "184 : cost: 0.0620441 \ttrain: 98.53% \tvalid: 97.37% \ttest: 97.21%\n",
569 |       "185 : cost: 0.06166 \ttrain: 98.538% \tvalid: 97.37% \ttest: 97.23%\n",
570 |       "186 : cost: 0.0612793 \ttrain: 98.55% \tvalid: 97.36% \ttest: 97.24%\n",
571 |       "saving filters...\n",
572 |       "187 : cost: 0.0609021 \ttrain: 98.568% \tvalid: 97.36% \ttest: 97.24%\n",
573 |       "188 : cost: 0.0605283 \ttrain: 98.582% \tvalid: 97.36% \ttest: 97.26%\n",
574 |       "189 : cost: 0.0601578 \ttrain: 98.6% \tvalid: 97.37% \ttest: 97.27%\n",
575 |       "saving filters...\n",
576 |       "190 : cost: 0.0597905 \ttrain: 98.606% \tvalid: 97.37% \ttest: 97.28%\n",
577 |       "191 : cost: 0.0594266 \ttrain: 98.62% \tvalid: 97.38% \ttest: 97.29%\n",
578 |       "192 : cost: 0.0590659 \ttrain: 98.624% \tvalid: 97.38% \ttest: 97.3%\n",
579 |       "saving filters...\n",
580 |       "193 : cost: 0.0587082 \ttrain: 98.628% \tvalid: 97.38% \ttest: 97.3%\n",
581 |       "194 : cost: 0.0583538 \ttrain: 98.632% \tvalid: 97.38% \ttest: 97.3%\n",
582 |       "195 : cost: 0.0580024 \ttrain: 98.634% \tvalid: 97.39% \ttest: 97.31%\n",
583 |       "saving filters...\n",
584 |       "196 : cost: 0.057654 \ttrain: 98.64% \tvalid: 97.38% \ttest: 97.3%\n",
585 |       "197 : cost: 0.0573087 \ttrain: 98.644% \tvalid: 97.39% \ttest: 97.31%\n",
586 |       "198 : cost: 0.0569663 \ttrain: 98.658% \tvalid: 97.38% \ttest: 97.33%\n",
587 |       "saving filters...\n",
588 |       "199 : cost: 0.0566269 \ttrain: 98.666% \tvalid: 97.38% \ttest: 97.33%\n",
589 |       "200 : cost: 0.0562904 \ttrain: 98.674% \tvalid: 97.39% \ttest: 97.34%\n",
590 |       "201 : cost: 0.0559567 \ttrain: 98.688% \tvalid: 97.39% \ttest: 97.34%\n",
591 |       "saving filters...\n",
592 |       "202 : cost: 0.0556259 \ttrain: 98.704% \tvalid: 97.39% \ttest: 97.35%\n",
593 |       "203 : cost: 0.0552979 \ttrain: 98.72% \tvalid: 97.4% \ttest: 97.35%\n",
594 |       "204 : cost: 0.0549725 \ttrain: 98.738% \tvalid: 97.41% \ttest: 97.36%\n",
595 |       "saving filters...\n",
596 |       "205 : cost: 0.05465 \ttrain: 98.742% \tvalid: 97.41% \ttest: 97.36%\n",
597 |       "206 : cost: 0.0543301 \ttrain: 98.744% \tvalid: 97.41% \ttest: 97.37%\n",
598 |       "207 : cost: 0.0540128 \ttrain: 98.756% \tvalid: 97.41% \ttest: 97.37%\n",
599 |       "saving filters...\n",
600 |       "208 : cost: 0.0536982 \ttrain: 98.762% \tvalid: 97.42% \ttest: 97.38%\n",
601 |       "209 : cost: 0.0533862 \ttrain: 98.768% \tvalid: 97.43% \ttest: 97.4%\n",
602 |       "210 : cost: 0.0530767 \ttrain: 98.778% \tvalid: 97.44% \ttest: 97.41%\n",
603 |       "saving filters...\n",
604 |       "211 : cost: 0.0527698 \ttrain: 98.786% \tvalid: 97.46% \ttest: 97.41%\n",
605 |       "212 : cost: 0.0524655 \ttrain: 98.792% \tvalid: 97.46% \ttest: 97.41%\n",
606 |       "213 : cost: 0.0521635 \ttrain: 98.804% \tvalid: 97.46% \ttest: 97.41%\n",
607 |       "saving filters...\n",
608 |       "214 : cost: 0.051864 \ttrain: 98.814% \tvalid: 97.46% \ttest: 97.41%\n",
609 |       "215 : cost: 0.0515669 \ttrain: 98.816% \tvalid: 97.46% \ttest: 97.41%\n",
610 |       "216 : cost: 0.0512722 \ttrain: 98.824% \tvalid: 97.46% \ttest: 97.41%\n",
611 |       "saving filters...\n",
612 |       "217 : cost: 0.0509799 \ttrain: 98.838% \tvalid: 97.46% \ttest: 97.4%\n",
613 |       "218 : cost: 0.0506899 \ttrain: 98.842% \tvalid: 97.45% \ttest: 97.4%\n",
614 |       "219 : cost: 0.0504022 \ttrain: 98.852% \tvalid: 97.46% \ttest: 97.41%\n",
615 |       "saving filters...\n",
616 |       "220 : cost: 0.0501168 \ttrain: 98.86% \tvalid: 97.46% \ttest: 97.43%\n",
617 |       "221 : cost: 0.0498336 \ttrain: 98.876% \tvalid: 97.47% \ttest: 97.43%\n",
618 |       "222 : cost: 0.0495527 \ttrain: 98.888% \tvalid: 97.48% \ttest: 97.43%\n",
619 |       "saving filters...\n",
620 |       "223 : cost: 0.0492739 \ttrain: 98.894% \tvalid: 97.49% \ttest: 97.44%\n",
621 |       "224 : cost: 0.0489974 \ttrain: 98.904% \tvalid: 97.5% \ttest: 97.44%\n",
622 |       "225 : cost: 0.048723 \ttrain: 98.914% \tvalid: 97.51% \ttest: 97.44%\n",
623 |       "saving filters...\n",
624 |       "226 : cost: 0.0484507 \ttrain: 98.926% \tvalid: 97.52% \ttest: 97.45%\n",
625 |       "227 : cost: 0.0481805 \ttrain: 98.942% \tvalid: 97.52% \ttest: 97.45%\n",
626 |       "228 : cost: 0.0479125 \ttrain: 98.946% \tvalid: 97.53% \ttest: 97.45%\n",
627 |       "saving filters...\n",
628 |       "229 : cost: 0.0476465 \ttrain: 98.956% \tvalid: 97.54% \ttest: 97.47%\n",
629 |       "230 : cost: 0.0473825 \ttrain: 98.97% \tvalid: 97.54% \ttest: 97.47%\n",
630 |       "231 : cost: 0.0471206 \ttrain: 98.98% \tvalid: 97.54% \ttest: 97.48%\n",
631 |       "saving filters...\n",
632 |       "232 : cost: 0.0468606 \ttrain: 98.99% \tvalid: 97.54% \ttest: 97.5%\n",
633 |       "233 : cost: 0.0466026 \ttrain: 98.996% \tvalid: 97.54% \ttest: 97.5%\n",
634 |       "234 : cost: 0.0463466 \ttrain: 99.004% \tvalid: 97.54% \ttest: 97.51%\n",
635 |       "saving filters...\n",
636 |       "235 : cost: 0.0460925 \ttrain: 99.014% \tvalid: 97.53% \ttest: 97.51%\n",
637 |       "236 : cost: 0.0458403 \ttrain: 99.02% \tvalid: 97.52% \ttest: 97.52%\n",
638 |       "237 : cost: 0.0455901 \ttrain: 99.026% \tvalid: 97.52% \ttest: 97.52%\n",
639 |       "saving filters...\n",
640 |       "238 : cost: 0.0453416 \ttrain: 99.032% \tvalid: 97.52% \ttest: 97.52%\n",
641 |       "239 : cost: 0.0450951 \ttrain: 99.04% \tvalid: 97.52% \ttest: 97.52%\n",
642 |       "240 : cost: 0.0448504 \ttrain: 99.048% \tvalid: 97.52% \ttest: 97.52%\n",
643 |       "saving filters...\n",
644 |       "241 : cost: 0.0446075 \ttrain: 99.06% \tvalid: 97.52% \ttest: 97.52%\n",
645 |       "242 : cost: 0.0443663 \ttrain: 99.064% \tvalid: 97.52% \ttest: 97.53%\n",
646 |       "243 : cost: 0.044127 \ttrain: 99.066% \tvalid: 97.52% \ttest: 97.54%\n",
647 |       "saving filters...\n",
648 |       "244 : cost: 0.0438895 \ttrain: 99.072% \tvalid: 97.53% \ttest: 97.54%\n",
649 |       "245 : cost: 0.0436537 \ttrain: 99.082% \tvalid: 97.54% \ttest: 97.54%\n",
650 |       "246 : cost: 0.0434195 \ttrain: 99.088% \tvalid: 97.53% \ttest: 97.54%\n",
651 |       "saving filters...\n",
652 |       "247 : cost: 0.0431871 \ttrain: 99.094% \tvalid: 97.54% \ttest: 97.54%\n",
653 |       "248 : cost: 0.0429565 \ttrain: 99.1% \tvalid: 97.55% \ttest: 97.54%\n",
654 |       "249 : cost: 0.0427274 \ttrain: 99.108% \tvalid: 97.55% \ttest: 97.54%\n",
655 |       "saving filters...\n",
656 |       "250 : cost: 0.0425001 \ttrain: 99.11% \tvalid: 97.56% \ttest: 97.55%\n",
657 |       "251 : cost: 0.0422743 \ttrain: 99.112% \tvalid: 97.56% \ttest: 97.55%\n",
658 |       "252 : cost: 0.0420502 \ttrain: 99.114% \tvalid: 97.56% \ttest: 97.56%\n",
659 |       "saving filters...\n",
660 |       "253 : cost: 0.0418277 \ttrain: 99.122% \tvalid: 97.56% \ttest: 97.57%\n",
661 |       "254 : cost: 0.0416068 \ttrain: 99.132% \tvalid: 97.56% \ttest: 97.57%\n",
662 |       "255 : cost: 0.0413875 \ttrain: 99.138% \tvalid: 97.58% \ttest: 97.58%\n",
663 |       "saving filters...\n",
664 |       "256 : cost: 0.0411697 \ttrain: 99.146% \tvalid: 97.59% \ttest: 97.58%\n",
665 |       "257 : cost: 0.0409535 \ttrain: 99.154% \tvalid: 97.59% \ttest: 97.61%\n",
666 |       "258 : cost: 0.0407388 \ttrain: 99.162% \tvalid: 97.6% \ttest: 97.61%\n",
667 |       "saving filters...\n",
668 |       "259 : cost: 0.0405257 \ttrain: 99.166% \tvalid: 97.6% \ttest: 97.6%\n",
669 |       "260 : cost: 0.040314 \ttrain: 99.17% \tvalid: 97.6% \ttest: 97.6%\n",
670 |       "261 : cost: 0.0401038 \ttrain: 99.174% \tvalid: 97.61% \ttest: 97.6%\n",
671 |       "saving filters...\n",
672 |       "262 : cost: 0.0398951 \ttrain: 99.184% \tvalid: 97.61% \ttest: 97.6%\n",
673 |       "263 : cost: 0.0396879 \ttrain: 99.19% \tvalid: 97.61% \ttest: 97.6%\n",
674 |       "264 : cost: 0.0394821 \ttrain: 99.194% \tvalid: 97.61% \ttest: 97.6%\n",
675 |       "saving filters...\n",
676 |       "265 : cost: 0.0392777 \ttrain: 99.204% \tvalid: 97.62% \ttest: 97.6%\n",
677 |       "266 : cost: 0.0390748 \ttrain: 99.206% \tvalid: 97.62% \ttest: 97.61%\n",
678 |       "267 : cost: 0.0388732 \ttrain: 99.214% \tvalid: 97.62% \ttest: 97.61%\n",
679 |       "saving filters...\n",
680 |       "268 : cost: 0.0386731 \ttrain: 99.22% \tvalid: 97.62% \ttest: 97.61%\n",
681 |       "269 : cost: 0.0384743 \ttrain: 99.23% \tvalid: 97.64% \ttest: 97.61%\n",
682 |       "270 : cost: 0.0382769 \ttrain: 99.248% \tvalid: 97.64% \ttest: 97.61%\n",
683 |       "saving filters...\n",
684 |       "271 : cost: 0.0380808 \ttrain: 99.25% \tvalid: 97.64% \ttest: 97.61%\n",
685 |       "272 : cost: 0.0378862 \ttrain: 99.252% \tvalid: 97.63% \ttest: 97.62%\n",
686 |       "273 : cost: 0.0376928 \ttrain: 99.272% \tvalid: 97.63% \ttest: 97.62%\n",
687 |       "saving filters...\n",
688 |       "274 : cost: 0.0375007 \ttrain: 99.274% \tvalid: 97.64% \ttest: 97.63%\n",
689 |       "275 : cost: 0.03731 \ttrain: 99.278% \tvalid: 97.64% \ttest: 97.63%\n",
690 |       "276 : cost: 0.0371205 \ttrain: 99.28% \tvalid: 97.64% \ttest: 97.63%\n",
691 |       "saving filters...\n",
692 |       "277 : cost: 0.0369324 \ttrain: 99.29% \tvalid: 97.65% \ttest: 97.63%\n",
693 |       "278 : cost: 0.0367454 \ttrain: 99.294% \tvalid: 97.65% \ttest: 97.63%\n",
694 |       "279 : cost: 0.0365598 \ttrain: 99.3% \tvalid: 97.65% \ttest: 97.64%\n",
695 |       "saving filters...\n",
696 |       "280 : cost: 0.0363754 \ttrain: 99.306% \tvalid: 97.65% \ttest: 97.64%\n",
697 |       "281 : cost: 0.0361922 \ttrain: 99.31% \tvalid: 97.64% \ttest: 97.65%\n",
698 |       "282 : cost: 0.0360103 \ttrain: 99.316% \tvalid: 97.65% \ttest: 97.65%\n",
699 |       "saving filters...\n",
700 |       "283 : cost: 0.0358296 \ttrain: 99.32% \tvalid: 97.65% \ttest: 97.65%\n",
701 |       "284 : cost: 0.0356501 \ttrain: 99.322% \tvalid: 97.65% \ttest: 97.65%\n",
702 |       "285 : cost: 0.0354718 \ttrain: 99.324% \tvalid: 97.65% \ttest: 97.65%\n",
703 |       "saving filters...\n",
704 |       "286 : cost: 0.0352947 \ttrain: 99.334% \tvalid: 97.65% \ttest: 97.66%\n",
705 |       "287 : cost: 0.0351187 \ttrain: 99.334% \tvalid: 97.66% \ttest: 97.66%\n",
706 |       "288 : cost: 0.0349439 \ttrain: 99.338% \tvalid: 97.66% \ttest: 97.66%\n",
707 |       "saving filters...\n",
708 |       "289 : cost: 0.0347703 \ttrain: 99.34% \tvalid: 97.66% \ttest: 97.68%\n",
709 |       "290 : cost: 0.0345977 \ttrain: 99.342% \tvalid: 97.66% \ttest: 97.69%\n",
710 |       "291 : cost: 0.0344264 \ttrain: 99.346% \tvalid: 97.66% \ttest: 97.69%\n",
711 |       "saving filters...\n",
712 |       "292 : cost: 0.0342562 \ttrain: 99.356% \tvalid: 97.66% \ttest: 97.69%\n",
713 |       "293 : cost: 0.0340871 \ttrain: 99.358% \tvalid: 97.66% \ttest: 97.69%\n",
714 |       "294 : cost: 0.033919 \ttrain: 99.362% \tvalid: 97.66% \ttest: 97.72%\n",
715 |       "saving filters...\n",
716 |       "295 : cost: 0.0337521 \ttrain: 99.368% \tvalid: 97.67% \ttest: 97.72%\n",
717 |       "296 : cost: 0.0335863 \ttrain: 99.37% \tvalid: 97.67% \ttest: 97.73%\n",
718 |       "297 : cost: 0.0334215 \ttrain: 99.378% \tvalid: 97.67% \ttest: 97.73%\n",
719 |       "saving filters...\n",
720 |       "298 : cost: 0.0332579 \ttrain: 99.388% \tvalid: 97.67% \ttest: 97.74%\n",
721 |       "299 : cost: 0.0330952 \ttrain: 99.39% \tvalid: 97.67% \ttest: 97.74%\n",
722 |       "300 : cost: 0.0329337 \ttrain: 99.392% \tvalid: 97.68% \ttest: 97.75%\n",
723 |       "saving filters...\n"
724 |      ]
725 |     }
726 |    ],
727 |    "source": [
728 |     "# Main training loop\n",
729 |     "batch_size = 100\n",
730 |     "epochs = 300\n",
731 |     "check_frequency = 3\n",
732 |     "\n",
733 |     "train_batches = len(train_x) / batch_size\n",
734 |     "valid_batches = len(valid_x) / batch_size\n",
735 |     "test_batches = len(test_x) / batch_size\n",
736 |     "\n",
737 |     "for epoch in range(epochs):\n",
738 |     "    print epoch+1, \":\",\n",
739 |     "    \n",
740 |     "    train_costs = []\n",
741 |     "    train_accuracy = []\n",
742 |     "    for i in range(train_batches):\n",
743 |     "        batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
744 |     "        batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n",
745 |     "\n",
746 |     "        costs = f_train(batch_x, batch_labels)\n",
747 |     "        preds = f_test(batch_x)\n",
748 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
749 |     "        \n",
750 |     "        train_costs.append(costs)\n",
751 |     "        train_accuracy.append(acc)\n",
752 |     "    print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n",
753 |     "    \n",
754 |     "    valid_accuracy = []\n",
755 |     "    for i in range(valid_batches):\n",
756 |     "        batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n",
757 |     "        batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n",
758 |     "        \n",
759 |     "        preds = f_test(batch_x)\n",
760 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
761 |     "        \n",
762 |     "        valid_accuracy.append(acc)\n",
763 |     "    print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n",
764 |     "    \n",
765 |     "    test_accuracy = []\n",
766 |     "    for i in range(test_batches):\n",
767 |     "        batch_x = test_x[i*batch_size:(i+1)*batch_size]\n",
768 |     "        batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n",
769 |     "        \n",
770 |     "        preds = f_test(batch_x)\n",
771 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
772 |     "        \n",
773 |     "        test_accuracy.append(acc)\n",
774 |     "    print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\"\n",
775 |     "    \n",
776 |     "    if (epoch+1) % check_frequency == 0:\n",
777 |     "        print 'saving filters...'\n",
778 |     "        weight_filters = pil_img.fromarray(\n",
779 |     "                tile_raster_images(\n",
780 |     "                    W_x.get_value(borrow=True).T,\n",
781 |     "                    img_shape=(28, 28),\n",
782 |     "                    tile_shape=(20, 25),\n",
783 |     "                    tile_spacing=(1, 1)\n",
784 |     "                )\n",
785 |     "            )\n",
786 |     "        weight_filters.save(\"mlp_filters_%d.png\"%(epoch+1))"
787 |    ]
788 |   },
789 |   {
790 |    "cell_type": "code",
791 |    "execution_count": null,
792 |    "metadata": {
793 |     "collapsed": true
794 |    },
795 |    "outputs": [],
796 |    "source": []
797 |   }
798 |  ],
799 |  "metadata": {
800 |   "kernelspec": {
801 |    "display_name": "Python 2",
802 |    "language": "python",
803 |    "name": "python2"
804 |   },
805 |   "language_info": {
806 |    "codemirror_mode": {
807 |     "name": "ipython",
808 |     "version": 2
809 |    },
810 |    "file_extension": ".py",
811 |    "mimetype": "text/x-python",
812 |    "name": "python",
813 |    "nbconvert_exporter": "python",
814 |    "pygments_lexer": "ipython2",
815 |    "version": "2.7.6"
816 |   }
817 |  },
818 |  "nbformat": 4,
819 |  "nbformat_minor": 0
820 | }
821 | 


--------------------------------------------------------------------------------
/MLP_theano_with_comments.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# This tutorial covers your simplest neural network: a multilayer perceptron (MLP)\n",
 12 |     "# Also known as feedforward neural network.\n",
 13 |     "# We will learn to classify MNIST handwritten digit images into their correct label (0-9).\n",
 14 |     "\n",
 15 |     "from IPython.display import Image\n",
 16 |     "# First, let's load our data and take a look!\n",
 17 |     "import pickle\n",
 18 |     "\n",
 19 |     "# Load our data \n",
 20 |     "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n",
 21 |     "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n",
 22 |     "print \"Shapes:\"\n",
 23 |     "print train_x.shape, train_y.shape\n",
 24 |     "print valid_x.shape, valid_y.shape\n",
 25 |     "print test_x.shape, test_y.shape\n",
 26 |     "\n",
 27 |     "print \"--------------\"\n",
 28 |     "print \"Example input:\"\n",
 29 |     "print train_x[0]\n",
 30 |     "print \"Example label:\"\n",
 31 |     "print train_y[0]\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n",
 43 |     "from utils import tile_raster_images\n",
 44 |     "from PIL import Image as pil_img\n",
 45 |     "\n",
 46 |     "input_images = train_x[:25]\n",
 47 |     "im = pil_img.fromarray(\n",
 48 |     "    tile_raster_images(input_images, \n",
 49 |     "                       img_shape=(28, 28), \n",
 50 |     "                       tile_shape=(1, 25),\n",
 51 |     "                       tile_spacing=(1, 1))\n",
 52 |     ")\n",
 53 |     "im.save(\"some_mnist_numbers.png\")\n",
 54 |     "Image(filename=\"some_mnist_numbers.png\")\n"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# Cool, now we know a little about the input data, let's design the MLP to work with it!\n",
 66 |     "# An MLP looks like this: input -> hiddens -> output classification\n",
 67 |     "# Each stage is just a matrix multiplication with a nonlinear function applied after.\n",
 68 |     "\n",
 69 |     "# Your basic Theano imports.\n",
 70 |     "import theano\n",
 71 |     "import theano.tensor as T\n",
 72 |     "\n",
 73 |     "# Inputs are matrices where rows are examples and columns are pixels - so create a symbolic Theano matrix.\n",
 74 |     "x = T.matrix('x')\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# Now let's start building the equation for our MLP!\n",
 86 |     "\n",
 87 |     "# The first transformation is the input x -> hidden layer h.\n",
 88 |     "# We defined this transformation with h = tanh(x.dot(W_x) + b_h)\n",
 89 |     "# where the learnable model parameters are W_x and b_h.\n",
 90 |     "\n",
 91 |     "# Therefore, we will need a weights matrix W_x and a bias vector b_h.\n",
 92 |     "# W_x has shape (input_size, hidden_size) and b_h has shape (hidden_size,).\n",
 93 |     "# Initialization is important in deep learning; we want something random so the model doesn't get stuck early.\n",
 94 |     "# Many papers in this subject, but for now we will just use a normal distribution with mean=0 and std=0.05.\n",
 95 |     "# Another good option for tanh layers is to use a uniform distribution with interval +- sqrt(6/sum(shape)).\n",
 96 |     "# These are hyperparameters to play with.\n",
 97 |     "# Bias starting as zero is fine.\n",
 98 |     "import numpy.random as rng\n",
 99 |     "W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n",
100 |     "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n",
101 |     "\n",
102 |     "# To update a variable used in an equation (for example, while learning), \n",
103 |     "# Theano needs it to be in a special wrapper called a shared variable.\n",
104 |     "# These are the model parameters for our first hidden layer!\n",
105 |     "W_x = theano.shared(W_x, name=\"W_x\")\n",
106 |     "b_h = theano.shared(b_h, name=\"b_h\")\n",
107 |     "\n",
108 |     "# Now, we can finally write the equation to give our symbolic hidden layer h!\n",
109 |     "h = T.tanh(\n",
110 |     "    T.dot(x, W_x) + b_h\n",
111 |     ")\n",
112 |     "\n",
113 |     "# Side note - if we used softmax instead of tanh for the activation, this would be performing logistic regression!\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": false
121 |    },
122 |    "outputs": [],
123 |    "source": [
124 |     "# We have the hidden layer h, let's put that softmax layer on top for classification output y!\n",
125 |     "\n",
126 |     "# Same deal as before, the transformation is defined as:\n",
127 |     "# y = softmax(h.dot(W_h) + b_y)\n",
128 |     "# where the learnable parameters are W_h and b_y.\n",
129 |     "# W_h has shape (hidden_size, output_size) and b_y has shape (output_size,).\n",
130 |     "\n",
131 |     "# We will use the same random initialization strategy as before.\n",
132 |     "W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n",
133 |     "b_y = numpy.zeros(shape=(10,), dtype=theano.config.floatX)\n",
134 |     "# Don't forget to make them shared variables!\n",
135 |     "W_h = theano.shared(W_h, name=\"W_h\")\n",
136 |     "b_y = theano.shared(b_y, name=\"b_y\")\n",
137 |     "\n",
138 |     "# Now write the equation for the output!\n",
139 |     "y = T.nnet.softmax(\n",
140 |     "    T.dot(h, W_h) + b_y\n",
141 |     ")\n",
142 |     "\n",
143 |     "# The output (due to softmax) is a vector of class probabilities.\n",
144 |     "# To get the output class 'guess' from the model, just take the index of the highest probability!\n",
145 |     "y_hat = T.argmax(y, axis=1)\n",
146 |     "\n",
147 |     "# That's everything! Just four model parameters and one input variable.\n"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {
154 |     "collapsed": false
155 |    },
156 |    "outputs": [],
157 |    "source": [
158 |     "# The variable y_hat represents the output of running our model, but we need a cost function to use for training.\n",
159 |     "# For a softmax (probability) output, we want to maximize the likelihood of P(Y=y|X).\n",
160 |     "# This means we want to minimize the negative log-likelihood cost! (For a primer, see machine learning Coursera.)\n",
161 |     "\n",
162 |     "# Cost functions always need the truth outputs to compare against (this is supervised learning).\n",
163 |     "# From before, we saw the labels were a vector of ints - so let's make a symbolic variable for this!\n",
164 |     "correct_labels = T.ivector(\"labels\")  # integer vector\n",
165 |     "\n",
166 |     "# Now we can compare our output probability from y with the true labels.\n",
167 |     "# Because the labels are integers, we will want to make an indexing mask to pick out the probabilities\n",
168 |     "# our model thought was the likelihood of the correct label.\n",
169 |     "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n",
170 |     "# We use mean instead of sum to be less dependent on batch size (better for flexibility)\n",
171 |     "cost = -T.mean(log_likelihood)\n"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {
178 |     "collapsed": false
179 |    },
180 |    "outputs": [],
181 |    "source": [
182 |     "# Easiest way to train neural nets is with Stochastic Gradient Descent\n",
183 |     "# This takes each example, calculates the gradient, and changes the model parameters a small amount\n",
184 |     "# in the direction of the gradient.\n",
185 |     "\n",
186 |     "# Fancier add-ons to stochastic gradient descent will reduce the learning rate over time, add a momentum\n",
187 |     "# factor to the parameters, etc.\n",
188 |     "\n",
189 |     "# Before we can start training, we need to know what the gradients are.\n",
190 |     "# Luckily we don't have to do any math! Theano has symbolic auto-differentiation which means it can\n",
191 |     "# calculate the gradients for arbitrary equations with respect to a cost and parameters.\n",
192 |     "parameters = [W_x, b_h, W_h, b_y]\n",
193 |     "gradients = T.grad(cost, parameters)\n",
194 |     "# Now gradients contains the list of derivatives: [d_cost/d_W_x, d_cost/d_b_h, d_cost/d_W_h, d_cost/d_b_y]\n",
195 |     "\n",
196 |     "# One last thing we need to do before training is to use these gradients to update the parameters!\n",
197 |     "# Remember how parameters are shared variables? Well, Theano uses something called updates\n",
198 |     "# which are just pairs of (shared_variable, new_variable_expression) to change its value.\n",
199 |     "# So, let's create these updates to show how we change the parameter values during training with gradients!\n",
200 |     "# We use a learning rate to make small steps over time.\n",
201 |     "learning_rate = 0.01\n",
202 |     "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {
209 |     "collapsed": false
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "# Now we can create a Theano function that takes in real inputs and trains our model.\n",
214 |     "f_train = theano.function(inputs=[x, correct_labels], outputs=cost, updates=train_updates, allow_input_downcast=True)\n",
215 |     "\n",
216 |     "# For testing purposes, we don't want to use updates to change the parameters - so create a separate function!\n",
217 |     "# We also care more about the output guesses, so let's return those instead of the cost.\n",
218 |     "# error = sum(T.neq(y_hat, correct_labels))/float(y_hat.shape[0])\n",
219 |     "f_test = theano.function(inputs=[x], outputs=y_hat, allow_input_downcast=True)\n"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": false
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "# Our training can begin!\n",
231 |     "# The two hyperparameters we have for this part are minibatch size (how many examples to process in parallel)\n",
232 |     "# and the total number of passes over all examples (epochs).\n",
233 |     "batch_size = 100\n",
234 |     "epochs = 30\n",
235 |     "\n",
236 |     "# Given our batch size, compute how many batches we can fit into each data set\n",
237 |     "train_batches = len(train_x) / batch_size\n",
238 |     "valid_batches = len(valid_x) / batch_size\n",
239 |     "test_batches = len(test_x) / batch_size\n",
240 |     "\n",
241 |     "# Our main training loop!\n",
242 |     "for epoch in range(epochs):\n",
243 |     "    print epoch+1, \":\",\n",
244 |     "    \n",
245 |     "    train_costs = []\n",
246 |     "    train_accuracy = []\n",
247 |     "    for i in range(train_batches):\n",
248 |     "        # Grab our minibatch of examples from the whole train set.\n",
249 |     "        batch_x = train_x[i*batch_size:(i+1)*batch_size]\n",
250 |     "        batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n",
251 |     "        # Compute the costs from the train function (which also updates the parameters)\n",
252 |     "        costs = f_train(batch_x, batch_labels)\n",
253 |     "        # Compute the predictions from the test function (which does not update parameters)\n",
254 |     "        preds = f_test(batch_x)\n",
255 |     "        # Compute the accuracy of our predictions against the correct batch labels\n",
256 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
257 |     "        \n",
258 |     "        train_costs.append(costs)\n",
259 |     "        train_accuracy.append(acc)\n",
260 |     "    # Show the mean cost and accuracy across minibatches (the entire train set!)\n",
261 |     "    print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n",
262 |     "    \n",
263 |     "    valid_accuracy = []\n",
264 |     "    for i in range(valid_batches):\n",
265 |     "        batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n",
266 |     "        batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n",
267 |     "        \n",
268 |     "        preds = f_test(batch_x)\n",
269 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
270 |     "        \n",
271 |     "        valid_accuracy.append(acc)\n",
272 |     "    print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n",
273 |     "    \n",
274 |     "    test_accuracy = []\n",
275 |     "    for i in range(test_batches):\n",
276 |     "        batch_x = test_x[i*batch_size:(i+1)*batch_size]\n",
277 |     "        batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n",
278 |     "        \n",
279 |     "        preds = f_test(batch_x)\n",
280 |     "        acc = sum(preds==batch_labels)/float(len(batch_labels))\n",
281 |     "        \n",
282 |     "        test_accuracy.append(acc)\n",
283 |     "    print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\""
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {
290 |     "collapsed": true
291 |    },
292 |    "outputs": [],
293 |    "source": []
294 |   }
295 |  ],
296 |  "metadata": {
297 |   "kernelspec": {
298 |    "display_name": "Python 2",
299 |    "language": "python",
300 |    "name": "python2"
301 |   },
302 |   "language_info": {
303 |    "codemirror_mode": {
304 |     "name": "ipython",
305 |     "version": 2
306 |    },
307 |    "file_extension": ".py",
308 |    "mimetype": "text/x-python",
309 |    "name": "python",
310 |    "nbconvert_exporter": "python",
311 |    "pygments_lexer": "ipython2",
312 |    "version": "2.7.6"
313 |   }
314 |  },
315 |  "nbformat": 4,
316 |  "nbformat_minor": 0
317 | }
318 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # intro_deep
2 | Introduction tutorials to deep learning with Theano and OpenDeep
3 | 
4 | ## Dependencies
5 | Install [OpenDeep](https://github.com/vitruvianscience/opendeep) and its dependencies. GPU support is highly recommendended.
6 | You need at least version 0.0.9a of OpenDeep.
7 | 


--------------------------------------------------------------------------------
/RNN-GSN_opendeep.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Imports!\n",
 12 |     "# standard libraries\n",
 13 |     "import logging\n",
 14 |     "import math\n",
 15 |     "# third party\n",
 16 |     "import theano\n",
 17 |     "# internal references\n",
 18 |     "from opendeep.data import MNIST\n",
 19 |     "from opendeep.log import config_root_logger\n",
 20 |     "from opendeep.models import Model, RNN, GSN\n",
 21 |     "from opendeep.optimization import RMSProp\n",
 22 |     "\n",
 23 |     "config_root_logger()\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "# Let's define a new model combining the RNN and GSNs.\n",
 35 |     "class RNN_GSN(Model):\n",
 36 |     "    def __init__(self):\n",
 37 |     "        super(RNN_GSN, self).__init__()\n",
 38 |     "\n",
 39 |     "        gsn_hiddens = 500\n",
 40 |     "        gsn_layers = 2\n",
 41 |     "\n",
 42 |     "        # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)\n",
 43 |     "        self.rnn = RNN(\n",
 44 |     "            input_size=28 * 28,\n",
 45 |     "            hidden_size=100,\n",
 46 |     "            # needs to output hidden units for odd layers of GSN\n",
 47 |     "            output_size=gsn_hiddens * (math.ceil(gsn_layers/2.)),\n",
 48 |     "            layers=1,\n",
 49 |     "            activation='tanh',\n",
 50 |     "            hidden_activation='relu',\n",
 51 |     "            weights_init='uniform', weights_interval='montreal',\n",
 52 |     "            r_weights_init='identity'\n",
 53 |     "        )\n",
 54 |     "\n",
 55 |     "        # Create the GSN that will encode the input space\n",
 56 |     "        gsn = GSN(\n",
 57 |     "            input_size=28 * 28,\n",
 58 |     "            hidden_size=gsn_hiddens,\n",
 59 |     "            layers=gsn_layers,\n",
 60 |     "            walkbacks=4,\n",
 61 |     "            visible_activation='sigmoid',\n",
 62 |     "            hidden_activation='tanh',\n",
 63 |     "            image_height=28,\n",
 64 |     "            image_width=28\n",
 65 |     "        )\n",
 66 |     "        # grab the input arguments\n",
 67 |     "        gsn_args = gsn.args.copy()\n",
 68 |     "        # grab the parameters it initialized\n",
 69 |     "        gsn_params = gsn.get_params()\n",
 70 |     "\n",
 71 |     "        # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)\n",
 72 |     "        # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens\n",
 73 |     "        def step(hiddens, x):\n",
 74 |     "            gsn = GSN(\n",
 75 |     "                inputs_hook=(28*28, x),\n",
 76 |     "                hiddens_hook=(gsn_hiddens, hiddens),\n",
 77 |     "                params_hook=(gsn_params),\n",
 78 |     "                **gsn_args\n",
 79 |     "            )\n",
 80 |     "            # return the reconstruction and cost!\n",
 81 |     "            return gsn.get_outputs(), gsn.get_train_cost()\n",
 82 |     "\n",
 83 |     "        (outputs, costs), scan_updates = theano.scan(\n",
 84 |     "            fn=lambda h, x: step(h, x),\n",
 85 |     "            sequences=[self.rnn.output, self.rnn.input],\n",
 86 |     "            outputs_info=[None, None]\n",
 87 |     "        )\n",
 88 |     "\n",
 89 |     "        self.outputs = outputs\n",
 90 |     "\n",
 91 |     "        self.updates = dict()\n",
 92 |     "        self.updates.update(self.rnn.get_updates())\n",
 93 |     "        self.updates.update(scan_updates)\n",
 94 |     "\n",
 95 |     "        self.cost = costs.sum()\n",
 96 |     "        self.params = gsn_params + self.rnn.get_params()\n",
 97 |     "\n",
 98 |     "    # Model functions necessary for training\n",
 99 |     "    def get_inputs(self):\n",
100 |     "        return self.rnn.get_inputs()\n",
101 |     "    def get_params(self):\n",
102 |     "        return self.params\n",
103 |     "    def get_train_cost(self):\n",
104 |     "        return self.cost\n",
105 |     "    def get_updates(self):\n",
106 |     "        return self.updates\n",
107 |     "    def get_outputs(self):\n",
108 |     "        return self.outputs\n",
109 |     "    "
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {
116 |     "collapsed": false
117 |    },
118 |    "outputs": [],
119 |    "source": [
120 |     "# Now we can instantiate and train the model!\n",
121 |     "rnn_gsn = RNN_GSN()\n",
122 |     "\n",
123 |     "# data! (needs to be 3d for rnn).\n",
124 |     "mnist = MNIST(sequence_number=1, seq_3d=True, seq_length=30)\n",
125 |     "\n",
126 |     "# optimizer!\n",
127 |     "optimizer = RMSProp(\n",
128 |     "    model=rnn_gsn,\n",
129 |     "    dataset=mnist,\n",
130 |     "    epochs=500,\n",
131 |     "    batch_size=50,\n",
132 |     "    save_freq=10,\n",
133 |     "    stop_patience=30,\n",
134 |     "    stop_threshold=.9995,\n",
135 |     "    learning_rate=1e-6,\n",
136 |     "    decay=.95,\n",
137 |     "    max_scaling=1e5,\n",
138 |     "    grad_clip=5.,\n",
139 |     "    hard_clip=False\n",
140 |     ")\n",
141 |     "# train!\n",
142 |     "optimizer.train()\n"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": true
150 |    },
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 2",
158 |    "language": "python",
159 |    "name": "python2"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 2
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython2",
171 |    "version": "2.7.6"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 0
176 | }
177 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (Copied from OpenDeep https://github.com/vitruvianscience/opendeep)
  3 | 
  4 | This module contains different utility functions that are not connected
  5 | in anyway to the networks presented in the tutorials, but rather help in
  6 | processing the outputs into a more understandable way.
  7 | 
  8 | For example ``tile_raster_images`` helps in generating a easy to grasp
  9 | image from a set of samples or weights.
 10 | 
 11 | Written by Li Yao (University of Montreal)
 12 | https://github.com/yaoli/GSN
 13 | """
 14 | 
 15 | import numpy
 16 | 
 17 | def scale_to_unit_interval(ndar, eps=1e-8):
 18 |     """
 19 |     Scales all values in the ndarray 'ndar' to be between 0 and 1.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     ndar : numpy.ndarray
 24 |         The input array to scale values.
 25 |     eps : float
 26 |         Small value to avoid divide-by-zero when scaling.
 27 | 
 28 |     Returns
 29 |     -------
 30 |     numpy.ndarray
 31 |         The input array scaled to be between 0 and 1.
 32 |     """
 33 |     ndar = ndar.copy()
 34 |     ndar -= ndar.min()
 35 |     ndar *= 1.0 / (ndar.max() + eps)
 36 |     return ndar
 37 | 
 38 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
 39 |                        scale_rows_to_unit_interval=True,
 40 |                        output_pixel_vals=True):
 41 |     """   
 42 |     Transform an array with one flattened image per row, into an array in
 43 |     which images are reshaped and layed out like tiles on a floor.
 44 | 
 45 |     This function is useful for visualizing datasets whose rows are images,
 46 |     and also columns of matrices for transforming those rows
 47 |     (such as the first layer of a neural net).
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     X : 2D ndarray or a tuple of 4 channels, elements of which can be 2D ndarrays or None
 52 |         A 2D array in which every row is a flattened image.
 53 |     img_shape : tuple
 54 |         The original (height, width) shape of each image.
 55 |     tile_shape : tuple
 56 |         The number of images to tile (rows, cols).
 57 |     tile_spacing : tuple
 58 |         The amount of pixels to put between image tiles (like a border size).
 59 |     scale_rows_to_unit_interval : bool
 60 |         If the values need to be scaled before being plotted to [0,1] or not.
 61 |     output_pixel_vals : bool
 62 |         If output should be pixel values (i.e. int8 values) or floats.
 63 | 
 64 |     Returns
 65 |     -------
 66 |     2D array
 67 |         Array suitable for viewing as an image. (See:`PIL.Image.fromarray`.)
 68 |     """
 69 | 
 70 |     assert len(img_shape) == 2
 71 |     assert len(tile_shape) == 2
 72 |     assert len(tile_spacing) == 2
 73 | 
 74 |     # The expression below can be re-written in a more C style as
 75 |     # follows :
 76 |     #
 77 |     # out_shape    = [0,0]
 78 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
 79 |     #                tile_spacing[0]
 80 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
 81 |     #                tile_spacing[1]
 82 |     out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
 83 |                         in zip(img_shape, tile_shape, tile_spacing)]
 84 | 
 85 |     if isinstance(X, tuple):
 86 |         assert len(X) == 4
 87 |         # Create an output numpy ndarray to store the image
 88 |         if output_pixel_vals:
 89 |             out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
 90 |                                     dtype='uint8')
 91 |         else:
 92 |             out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
 93 |                                     dtype=X.dtype)
 94 | 
 95 |         #colors default to 0, alpha defaults to 1 (opaque)
 96 |         if output_pixel_vals:
 97 |             channel_defaults = [0, 0, 0, 255]
 98 |         else:
 99 |             channel_defaults = [0., 0., 0., 1.]
100 | 
101 |         for i in xrange(4):
102 |             if X[i] is None:
103 |                 # if channel is None, fill it with zeros of the correct
104 |                 # dtype
105 |                 dt = out_array.dtype
106 |                 if output_pixel_vals:
107 |                     dt = 'uint8'
108 |                 out_array[:, :, i] = numpy.zeros(out_shape,
109 |                         dtype=dt) + channel_defaults[i]
110 |             else:
111 |                 # use a recurrent call to run the channel and store it
112 |                 # in the output
113 |                 out_array[:, :, i] = tile_raster_images(
114 |                     X[i], img_shape, tile_shape, tile_spacing,
115 |                     scale_rows_to_unit_interval, output_pixel_vals)
116 |         return out_array
117 | 
118 |     else:
119 |         # if we are dealing with only one channel
120 |         H, W = img_shape
121 |         Hs, Ws = tile_spacing
122 | 
123 |         # generate a matrix to store the output
124 |         dt = X.dtype
125 |         if output_pixel_vals:
126 |             dt = 'uint8'
127 |         out_array = numpy.zeros(out_shape, dtype=dt)
128 | 
129 |         for tile_row in xrange(tile_shape[0]):
130 |             for tile_col in xrange(tile_shape[1]):
131 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
132 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
133 |                     if scale_rows_to_unit_interval:
134 |                         # if we should scale values to be between 0 and 1
135 |                         # do this by calling the `scale_to_unit_interval`
136 |                         # function
137 |                         this_img = scale_to_unit_interval(
138 |                             this_x.reshape(img_shape))
139 |                     else:
140 |                         this_img = this_x.reshape(img_shape)
141 |                     # add the slice to the corresponding position in the
142 |                     # output array
143 |                     c = 1
144 |                     if output_pixel_vals:
145 |                         c = 255
146 |                     out_array[
147 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
148 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W
149 |                         ] = this_img * c
150 |         return out_array


--------------------------------------------------------------------------------