├── .gitignore ├── .ipynb_checkpoints ├── DAE_opendeep-checkpoint.ipynb ├── DAE_theano-checkpoint.ipynb ├── DAE_theano_with_comments-checkpoint.ipynb ├── MLP_opendeep-checkpoint.ipynb ├── MLP_theano-checkpoint.ipynb ├── MLP_theano_with_comments-checkpoint.ipynb └── RNN-GSN_opendeep-checkpoint.ipynb ├── DAE_opendeep.ipynb ├── DAE_theano.ipynb ├── DAE_theano_with_comments.ipynb ├── LICENSE ├── MLP_opendeep.ipynb ├── MLP_theano.ipynb ├── MLP_theano_with_comments.ipynb ├── README.md ├── RNN-GSN_opendeep.ipynb └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | outputs/ 3 | datasets/ 4 | 5 | *.pkl 6 | *.png 7 | *.jpg 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | 61 | # Sphinx documentation 62 | docs/_build/ 63 | 64 | # PyBuilder 65 | target/ 66 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/DAE_opendeep-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# imports and logger!\n", 12 | "from opendeep.log import config_root_logger\n", 13 | "from opendeep.models import GSN\n", 14 | "from opendeep.optimization import SGD\n", 15 | "from opendeep.data import MNIST\n", 16 | "from opendeep.utils.misc import closest_to_square_factors\n", 17 | "from PIL import Image as pil_img\n", 18 | "from opendeep.utils.image import tile_raster_images\n", 19 | "import numpy\n", 20 | "\n", 21 | "config_root_logger()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "# A denoising autoencoder (DAE) is a special case of a generative stochastic network (GSN).\n", 33 | "# GSN's can have multiple denoising layers that interact with each other both above and below.\n", 34 | "dae = GSN(\n", 35 | " input_size=28*28,\n", 36 | " hidden_size=1000,\n", 37 | " visible_activation='sigmoid',\n", 38 | " hidden_activation='tanh',\n", 39 | " layers=1,\n", 40 | " walkbacks=3,\n", 41 | " input_noise='salt_and_pepper',\n", 42 | " input_noise_level=0.3\n", 43 | ")\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# Create the MNIST data object\n", 55 | "mnist = MNIST(concat_train_valid=True)\n", 56 | "\n", 57 | "# Create the optimizer object\n", 58 | "optimizer = SGD(dataset=mnist,\n", 59 | " epochs=40, \n", 60 | " batch_size=100, \n", 61 | " learning_rate=.25,\n", 62 | " lr_decay='exponential',\n", 63 | " lr_decay_factor=.995,\n", 64 | " momentum=.5,\n", 65 | " nesterov_momentum=False)\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "# Train the model with the optimizer on the mnist dataset!\n", 77 | "dae.train(optimizer)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# Run some numbers to see the output\n", 89 | "n_examples = 100\n", 90 | "xs_test = mnist.test_inputs[:n_examples]\n", 91 | "noisy_xs_test = dae.f_noise(xs_test)\n", 92 | "reconstructed = dae.run(noisy_xs_test)\n", 93 | "# Concatenate stuff\n", 94 | "stacked = numpy.vstack(\n", 95 | " [numpy.vstack([xs_test[i * 10: (i + 1) * 10],\n", 96 | " noisy_xs_test[i * 10: (i + 1) * 10],\n", 97 | " reconstructed[i * 10: (i + 1) * 10]])\n", 98 | " for i in range(10)])\n", 99 | "number_reconstruction = pil_img.fromarray(\n", 100 | " tile_raster_images(stacked, (dae.image_height, dae.image_width), (10, 30), (1, 1))\n", 101 | ")\n", 102 | "\n", 103 | "number_reconstruction.save(\"dae_opendeep_test.png\")\n", 104 | "\n", 105 | "# Construct image from the weight matrix\n", 106 | "image = pil_img.fromarray(\n", 107 | " tile_raster_images(\n", 108 | " X=dae.weights_list[0].get_value(borrow=True).T,\n", 109 | " img_shape=(28, 28),\n", 110 | " tile_shape=closest_to_square_factors(dae.layer_sizes[1]),\n", 111 | " tile_spacing=(1, 1)\n", 112 | " )\n", 113 | ")\n", 114 | "image.save(\"dae_opendeep_filters.png\")\n", 115 | "print \"Done!\"" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": true 123 | }, 124 | "outputs": [], 125 | "source": [] 126 | } 127 | ], 128 | "metadata": { 129 | "kernelspec": { 130 | "display_name": "Python 2", 131 | "language": "python", 132 | "name": "python2" 133 | }, 134 | "language_info": { 135 | "codemirror_mode": { 136 | "name": "ipython", 137 | "version": 2 138 | }, 139 | "file_extension": ".py", 140 | "mimetype": "text/x-python", 141 | "name": "python", 142 | "nbconvert_exporter": "python", 143 | "pygments_lexer": "ipython2", 144 | "version": "2.7.6" 145 | } 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 0 149 | } 150 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/DAE_theano-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n", 12 | "\n", 13 | "# We are working with MNIST again, this time no labels are necessary - \n", 14 | "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n", 15 | "\n", 16 | "# All imports up here this time\n", 17 | "import pickle\n", 18 | "import numpy\n", 19 | "import numpy.random as rng\n", 20 | "import theano\n", 21 | "import theano.tensor as T\n", 22 | "import theano.sandbox.rng_mrg as RNG_MRG\n", 23 | "from utils import tile_raster_images\n", 24 | "from PIL import Image as pil_img\n", 25 | "from IPython.display import Image\n", 26 | "\n", 27 | "# Load our data \n", 28 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 29 | "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n", 30 | "print \"Shapes:\"\n", 31 | "print train_x.shape\n", 32 | "print valid_x.shape\n", 33 | "print test_x.shape" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "# We can specify any hyperparameters to play with up here:\n", 45 | "input_size = 784 # 28x28 images\n", 46 | "hidden_size = 1000\n", 47 | "w_mean = 0.0\n", 48 | "w_std = 0.05\n", 49 | "w_interval = numpy.sqrt(6. / (input_size + hidden_size))\n", 50 | "noise = 0.3\n", 51 | "walkbacks = 3\n", 52 | "learning_rate = 0.25\n", 53 | "lr_decay = .985\n", 54 | "batch_size = 100\n", 55 | "epochs = 200\n", 56 | "check_frequency = 10\n", 57 | "\n", 58 | "# To make the organization better, lets define all the variables and parameters here.\n", 59 | "x = T.matrix('x')\n", 60 | "# W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n", 61 | "W_x = numpy.asarray(rng.uniform(low=-w_interval, high=w_interval, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n", 62 | "W_x = theano.shared(W_x, \"W_x\")\n", 63 | "\n", 64 | "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n", 65 | "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n", 66 | "b_x = theano.shared(b_x, \"b_x\")\n", 67 | "b_h = theano.shared(b_h, \"b_h\")\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "# Now for the most important part of a denoising autoencoder - making the input noisy!\n", 79 | "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n", 80 | "# This makes it more effective for test data by reducing overfitting.\n", 81 | "noise_switch = theano.shared(1, \"noise_switch\")\n", 82 | "\n", 83 | "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n", 84 | "def salt_and_pepper(variable):\n", 85 | " mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n", 86 | " saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n", 87 | " ones = T.eq(mask, 0) * saltpepper\n", 88 | " noisy = variable*mask + ones\n", 89 | " return T.switch(noise_switch,\n", 90 | " noisy,\n", 91 | " variable)\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "# Now we are ready to create the computation graph!\n", 103 | "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n", 104 | "\n", 105 | "inputs=[x]\n", 106 | "for walkback in range(walkbacks):\n", 107 | " noisy_x = salt_and_pepper(inputs[-1])\n", 108 | "\n", 109 | " h = T.tanh(\n", 110 | " T.dot(noisy_x, W_x) + b_h\n", 111 | " )\n", 112 | "\n", 113 | " reconstruction = T.nnet.sigmoid(\n", 114 | " T.dot(h, W_x.T) + b_x\n", 115 | " )\n", 116 | "\n", 117 | " inputs.append(reconstruction)\n", 118 | " \n", 119 | "reconstructions = inputs[1:]\n" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n", 131 | "output = reconstructions[-1]\n", 132 | "\n", 133 | "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n", 134 | "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "parameters = [W_x, b_h, b_x]\n", 146 | "gradients = T.grad(cost, parameters)\n", 147 | "\n", 148 | "lr = theano.shared(numpy.asarray(learning_rate, dtype='float32'), 'lr')\n", 149 | "train_updates = [(param, param - lr*gradient) for param, gradient in zip(parameters, gradients)]\n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "# Compile our training and testing function like before!\n", 161 | "# Train function updates the parameters and returns the total train cost to monitor.\n", 162 | "f_train = theano.function(\n", 163 | " inputs=[x], \n", 164 | " outputs=cost, \n", 165 | " updates=train_updates, \n", 166 | " allow_input_downcast=True\n", 167 | ")\n", 168 | "\n", 169 | "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n", 170 | "f_test = theano.function(\n", 171 | " inputs=[x], \n", 172 | " outputs=output,\n", 173 | " allow_input_downcast=True\n", 174 | ")\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "# That's it! Now perform SGD like before.\n", 186 | "# Main training loop\n", 187 | "\n", 188 | "train_batches = len(train_x) / batch_size\n", 189 | "\n", 190 | "try:\n", 191 | " for epoch in range(epochs):\n", 192 | " print epoch+1,\":\",\n", 193 | "\n", 194 | " # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n", 195 | " noise_switch.set_value(1.)\n", 196 | "\n", 197 | " train_costs = []\n", 198 | " for i in range(train_batches):\n", 199 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 200 | "\n", 201 | " costs = f_train(batch_x)\n", 202 | "\n", 203 | " train_costs.append(costs)\n", 204 | " print \"cost:\", numpy.mean(train_costs),\n", 205 | " \n", 206 | " old_lr = lr.get_value()\n", 207 | " print \"\\tlearning rate:\", old_lr\n", 208 | " new_lr = numpy.asarray(old_lr * lr_decay, dtype='float32')\n", 209 | " lr.set_value(new_lr)\n", 210 | "\n", 211 | " if (epoch+1) % check_frequency == 0:\n", 212 | " print \"Saving images...\"\n", 213 | " train_recons = f_test(train_x[:25])\n", 214 | " train_stacked = numpy.vstack(\n", 215 | " [numpy.vstack([\n", 216 | " train_x[i*5:(i+1)*5],\n", 217 | " train_recons[i*5:(i+1)*5]\n", 218 | " ])\n", 219 | " for i in range(5)]\n", 220 | " )\n", 221 | " train_image = pil_img.fromarray(\n", 222 | " tile_raster_images(train_stacked, (28, 28), (5, 10), (1, 1))\n", 223 | " )\n", 224 | " train_image.save(\"dae_train_%d.png\"%(epoch+1))\n", 225 | " \n", 226 | " # For validation, let's run a few images through and see the reconstruction \n", 227 | " # (with the noise from training still added)\n", 228 | " valid_recons = f_test(valid_x[:25])\n", 229 | " # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n", 230 | " # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n", 231 | " valid_stacked = numpy.vstack(\n", 232 | " [numpy.vstack([\n", 233 | " valid_x[i*5:(i+1)*5],\n", 234 | " valid_recons[i*5:(i+1)*5]\n", 235 | " ])\n", 236 | " for i in range(5)]\n", 237 | " )\n", 238 | " valid_image = pil_img.fromarray(\n", 239 | " # helper from utils.py\n", 240 | " tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n", 241 | " )\n", 242 | " valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n", 243 | "\n", 244 | " # Now do the same for test, but don't add any noise\n", 245 | " # This means set the noise switches to 0. (False)\n", 246 | " noise_switch.set_value(0.)\n", 247 | "\n", 248 | " test_recons = f_test(test_x[:25])\n", 249 | " test_stacked = numpy.vstack(\n", 250 | " [numpy.vstack([\n", 251 | " test_x[i*5:(i+1)*5],\n", 252 | " test_recons[i*5:(i+1)*5]\n", 253 | " ])\n", 254 | " for i in range(5)]\n", 255 | " )\n", 256 | " test_image = pil_img.fromarray(\n", 257 | " tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n", 258 | " )\n", 259 | " test_image.save(\"dae_test_%d.png\"%(epoch+1))\n", 260 | " \n", 261 | " weight_filters = pil_img.fromarray(\n", 262 | " tile_raster_images(\n", 263 | " W_x.get_value(borrow=True).T,\n", 264 | " img_shape=(28, 28),\n", 265 | " tile_shape=(25, 40),\n", 266 | " tile_spacing=(1, 1)\n", 267 | " )\n", 268 | " )\n", 269 | " weight_filters.save(\"dae_filters_%d.png\"%(epoch+1))\n", 270 | "except KeyboardInterrupt:\n", 271 | " pass " 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "collapsed": true 279 | }, 280 | "outputs": [], 281 | "source": [] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 2", 287 | "language": "python", 288 | "name": "python2" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 2 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython2", 300 | "version": "2.7.6" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 0 305 | } 306 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/DAE_theano_with_comments-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n", 12 | "\n", 13 | "# We are working with MNIST again, this time no labels are necessary - \n", 14 | "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n", 15 | "\n", 16 | "# All imports up here this time\n", 17 | "import pickle\n", 18 | "import numpy\n", 19 | "import numpy.random as rng\n", 20 | "import theano\n", 21 | "import theano.tensor as T\n", 22 | "import theano.sandbox.rng_mrg as RNG_MRG\n", 23 | "from utils import tile_raster_images\n", 24 | "from PIL import Image as pil_img\n", 25 | "from IPython.display import Image\n", 26 | "\n", 27 | "# Load our data \n", 28 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 29 | "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n", 30 | "print \"Shapes:\"\n", 31 | "print train_x.shape\n", 32 | "print valid_x.shape\n", 33 | "print test_x.shape" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "# The DAE data flow looks like this: input -> input_(add noise) -> hiddens -> input\n", 45 | "# This can be repeated by sampling from the reconstructed input. Repeating like that is\n", 46 | "# a pseudo Gibbs sampling process. We can define how many times we want to repeat (known as walkbacks).\n", 47 | "\n", 48 | "# We can specify any hyperparameters to play with up here:\n", 49 | "input_size = 784 # 28x28 images\n", 50 | "hidden_size = 1000\n", 51 | "w_mean = 0.0\n", 52 | "w_std = 0.05\n", 53 | "noise = 0.3\n", 54 | "walkbacks = 3\n", 55 | "learning_rate = 0.1\n", 56 | "batch_size = 100\n", 57 | "epochs = 100\n", 58 | "check_frequency = 10\n", 59 | "\n", 60 | "# To make the organization better, lets define all the variables and parameters here.\n", 61 | "# Just like with the MLP, we need a symbolic matrix to input the images\n", 62 | "x = T.matrix('x')\n", 63 | "# Next, we need the weights matrix W_x. This will be used to go both from input -> hidden and\n", 64 | "# hidden -> input (by using its transpose). This is called tied weights.\n", 65 | "# Again, initialization has a lot of literature, but we are just goint to stick with gaussian at the moment.\n", 66 | "W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n", 67 | "# (Don't forget to make parameters into shared variables so they can be updated!)\n", 68 | "W_x = theano.shared(W_x, \"W_x\")\n", 69 | "# Because we are outputting back into the input space, we also need a bias vector for both the input\n", 70 | "# and hidden layers.\n", 71 | "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n", 72 | "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n", 73 | "b_x = theano.shared(b_x, \"b_x\")\n", 74 | "b_h = theano.shared(b_h, \"b_h\")\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "# Now for the most important part of a denoising autoencoder - making the input noisy!\n", 86 | "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n", 87 | "# This makes it more effective for test data by reducing overfitting.\n", 88 | "\n", 89 | "# We deal with adding noise during training but not testing in Theano with a switch variable!\n", 90 | "# Switches can be turned on or off to direct data flow in the computation graph - \n", 91 | "# so we turn on for train and off for test!\n", 92 | "# You guessed it - we need a shared variable to represent the switch condition so we can change it at runtime.\n", 93 | "noise_switch = theano.shared(1, \"noise_switch\")\n", 94 | "\n", 95 | "# One important thing to note - the type of noise has to correspond to the type of input.\n", 96 | "# i.e. we can't add real-value noise when the input is expected to be binary\n", 97 | "# So for these binary inputs, we will add salt-and-pepper masking noise!\n", 98 | "# This is a function so we can keep adding it during the computation chain when we alternate sampling\n", 99 | "# from input and reconstructing from hiddens.\n", 100 | "# Theano random number generator\n", 101 | "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n", 102 | "def salt_and_pepper(variable):\n", 103 | " mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n", 104 | " saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n", 105 | " ones = T.eq(mask, 0) * saltpepper\n", 106 | " # Randomly set some bits to 0 or 1 with equal probability.\n", 107 | " noisy = variable*mask + ones\n", 108 | " return T.switch(noise_switch,\n", 109 | " # true condition\n", 110 | " noisy,\n", 111 | " # false condition\n", 112 | " variable)\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# Now we are ready to create the computation graph!\n", 124 | "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n", 125 | "\n", 126 | "inputs=[x]\n", 127 | "for walkback in range(walkbacks):\n", 128 | " # First, we want to corrupt the input x\n", 129 | " noisy_x = salt_and_pepper(inputs[-1])\n", 130 | " # Now calculate the hiddens\n", 131 | " h = T.tanh(\n", 132 | " T.dot(noisy_x, W_x) + b_h\n", 133 | " )\n", 134 | " # From the hiddens, reconstruct x.\n", 135 | " # We have to use an appropriate activation function for the type of inputs!\n", 136 | " # In our case with MNIST images, it is binary so sigmoid works.\n", 137 | " reconstruction = T.nnet.sigmoid(\n", 138 | " T.dot(h, W_x.T) + b_x\n", 139 | " )\n", 140 | " # That is all for an autoencoder!\n", 141 | " inputs.append(reconstruction)\n", 142 | " \n", 143 | "# Remove the original input from our reconstructions list\n", 144 | "reconstructions = inputs[1:]\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n", 156 | "output = reconstructions[-1]\n", 157 | "\n", 158 | "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n", 159 | "# Again, because our input space is binary, using mean binary cross-entropy is a good analog for \n", 160 | "# reconstruction error.\n", 161 | "# For real-valued inputs, we could use mean square error.\n", 162 | "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "# Just like with the MLP, compute gradient updates for the parameters to use with training.\n", 174 | "parameters = [W_x, b_h, b_x]\n", 175 | "# Automagic differentiation! (Still love it)\n", 176 | "gradients = T.grad(cost, parameters)\n", 177 | "\n", 178 | "# Update the parameters for stochastic gradient descent!\n", 179 | "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "# Compile our training and testing function like before!\n", 191 | "# Train function updates the parameters and returns the total train cost to monitor.\n", 192 | "f_train = theano.function(\n", 193 | " inputs=[x], \n", 194 | " outputs=cost, \n", 195 | " updates=train_updates, \n", 196 | " allow_input_downcast=True\n", 197 | ")\n", 198 | "\n", 199 | "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n", 200 | "f_test = theano.function(\n", 201 | " inputs=[x], \n", 202 | " outputs=output,\n", 203 | " allow_input_downcast=True\n", 204 | ")\n" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "collapsed": false 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "# That's it! Now perform SGD like before.\n", 216 | "# Main training loop\n", 217 | "\n", 218 | "train_batches = len(train_x) / batch_size\n", 219 | "\n", 220 | "try:\n", 221 | " for epoch in range(epochs):\n", 222 | " print epoch+1, \":\",\n", 223 | "\n", 224 | " # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n", 225 | " noise_switch.set_value(1.)\n", 226 | "\n", 227 | " train_costs = []\n", 228 | " for i in range(train_batches):\n", 229 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 230 | "\n", 231 | " costs = f_train(batch_x)\n", 232 | "\n", 233 | " train_costs.append(costs)\n", 234 | " print \"cost:\", numpy.mean(train_costs)\n", 235 | "\n", 236 | " if (epoch+1) % check_frequency == 0:\n", 237 | " print \"Saving images...\"\n", 238 | " # For validation, let's run a few images through and see the reconstruction \n", 239 | " # (with the noise from training still added)\n", 240 | " valid_recons = f_test(valid_x[:25])\n", 241 | " # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n", 242 | " # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n", 243 | " valid_stacked = numpy.vstack(\n", 244 | " [numpy.vstack([\n", 245 | " valid_x[i*5:(i+1)*5],\n", 246 | " valid_recons[i*5:(i+1)*5]\n", 247 | " ])\n", 248 | " for i in range(5)]\n", 249 | " )\n", 250 | " valid_image = pil_img.fromarray(\n", 251 | " # helper from utils.py\n", 252 | " tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n", 253 | " )\n", 254 | " valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n", 255 | "\n", 256 | " # Now do the same for test, but don't add any noise\n", 257 | " # This means set the noise switches to 0. (False)\n", 258 | " noise_switch.set_value(0.)\n", 259 | "\n", 260 | " test_recons = f_test(test_x[:25])\n", 261 | " test_stacked = numpy.vstack(\n", 262 | " [numpy.vstack([\n", 263 | " test_x[i*5:(i+1)*5],\n", 264 | " test_recons[i*5:(i+1)*5]\n", 265 | " ])\n", 266 | " for i in range(5)]\n", 267 | " )\n", 268 | " test_image = pil_img.fromarray(\n", 269 | " tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n", 270 | " )\n", 271 | " test_image.save(\"dae_test_%d.png\"%(epoch+1))\n", 272 | "except KeyboardInterrupt:\n", 273 | " pass\n", 274 | " \n", 275 | "# Let's finally save an image of the filters the DAE learned - this is simply the transpose of the weights!\n", 276 | "weight_filters = pil_img.fromarray(\n", 277 | " tile_raster_images(\n", 278 | " W_x.get_value(borrow=True).T,\n", 279 | " img_shape=(28, 28),\n", 280 | " tile_shape=(25, 40),\n", 281 | " tile_spacing=(1, 1)\n", 282 | " )\n", 283 | ")\n", 284 | "print \"Saving filters...\"\n", 285 | "weight_filters.save(\"dae_filters.png\")\n", 286 | "print \"Done!\"\n", 287 | " " 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": { 294 | "collapsed": true 295 | }, 296 | "outputs": [], 297 | "source": [] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "Python 2", 303 | "language": "python", 304 | "name": "python2" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 2 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython2", 316 | "version": "2.7.6" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 0 321 | } 322 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/MLP_opendeep-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# imports and logger!\n", 12 | "from opendeep.log import config_root_logger\n", 13 | "from opendeep.models import Prototype, Dense, SoftmaxLayer\n", 14 | "from opendeep.optimization import SGD\n", 15 | "from opendeep.data import MNIST\n", 16 | "from opendeep.monitor import Monitor, FileService\n", 17 | "\n", 18 | "config_root_logger()" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Create the MLP with two hidden layers and one classification layer\n", 30 | "mlp = Prototype()\n", 31 | "mlp.add(\n", 32 | " Dense(input_size=28*28, output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n", 33 | ")\n", 34 | "mlp.add(\n", 35 | " Dense(output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n", 36 | ")\n", 37 | "mlp.add(\n", 38 | " SoftmaxLayer(output_size=10)\n", 39 | ")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# Create the MNIST data object\n", 51 | "mnist = MNIST(concat_train_valid=True)\n", 52 | "\n", 53 | "# Create the optimizer object\n", 54 | "optimizer = SGD(model=mlp, \n", 55 | " dataset=mnist, \n", 56 | " epochs=100, \n", 57 | " batch_size=500, \n", 58 | " learning_rate=.01, \n", 59 | " momentum=.9,\n", 60 | " nesterov_momentum=True)\n", 61 | "\n", 62 | "# Make a monitor to watch the train and test prediction errors\n", 63 | "errorMonitor = Monitor('error', mlp.get_monitors()['softmax_error'], train=True, test=True)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# Train the model with the optimizer!\n", 75 | "optimizer.train(monitor_channels=[errorMonitor])" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "# Make some predictions on test data!\n", 87 | "test_data, test_labels = mnist.test_inputs, mnist.test_targets\n", 88 | "\n", 89 | "n=30\n", 90 | "predictions = mlp.run(test_data)\n", 91 | "labels = test_labels.astype('int32')\n", 92 | "\n", 93 | "print \"Predictions:\", predictions[:n]\n", 94 | "print \"Correct: \", labels[:n]\n", 95 | "print \"Accuracy: \", sum((predictions==labels) * 1./len(labels))*100, \"%\"" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 2", 111 | "language": "python", 112 | "name": "python2" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 2 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython2", 124 | "version": "2.7.6" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 0 129 | } 130 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/MLP_theano-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from IPython.display import Image\n", 12 | "import pickle\n", 13 | "\n", 14 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 15 | "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n", 16 | "print \"Shapes:\"\n", 17 | "print train_x.shape, train_y.shape\n", 18 | "print valid_x.shape, valid_y.shape\n", 19 | "print test_x.shape, test_y.shape\n", 20 | "\n", 21 | "print \"--------------\"\n", 22 | "print \"Example input:\"\n", 23 | "print train_x[0]\n", 24 | "print \"Example label:\"\n", 25 | "print train_y[0]\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n", 37 | "from utils import tile_raster_images\n", 38 | "from PIL import Image as pil_img\n", 39 | "\n", 40 | "input_images = train_x[:25]\n", 41 | "im = pil_img.fromarray(\n", 42 | " tile_raster_images(input_images, \n", 43 | " img_shape=(28, 28), \n", 44 | " tile_shape=(1, 25),\n", 45 | " tile_spacing=(1, 1))\n", 46 | ")\n", 47 | "im.save(\"some_mnist_numbers.png\")\n", 48 | "Image(filename=\"some_mnist_numbers.png\")\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "# Your basic Theano imports.\n", 60 | "import theano\n", 61 | "import theano.tensor as T\n", 62 | "\n", 63 | "x = T.matrix('x')\n" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# Compute the hidden layer from the input\n", 75 | "import numpy\n", 76 | "import numpy.random as rng\n", 77 | "\n", 78 | "i = numpy.sqrt(6. / (784+500))\n", 79 | "# W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n", 80 | "W_x = numpy.asarray(rng.uniform(low=-i, high=i, size=(28*28, 500)), dtype=theano.config.floatX)\n", 81 | "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n", 82 | "\n", 83 | "W_x = theano.shared(W_x, name=\"W_x\")\n", 84 | "b_h = theano.shared(b_h, name=\"b_h\")\n", 85 | "\n", 86 | "h = T.tanh(\n", 87 | " T.dot(x, W_x) + b_h\n", 88 | ")\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "# Compute the output class probabilities from the hidden layer\n", 100 | "i = numpy.sqrt(6. / (510))\n", 101 | "# W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n", 102 | "W_h = numpy.asarray(rng.uniform(low=-i, high=i, size=(500, 10)), dtype=theano.config.floatX)\n", 103 | "b_y = numpy.zeros(shape=(10,), dtype=\"float32\")\n", 104 | "\n", 105 | "W_h = theano.shared(W_h, name=\"W_h\")\n", 106 | "b_y = theano.shared(b_y, name=\"b_y\")\n", 107 | "\n", 108 | "y = T.nnet.softmax(\n", 109 | " T.dot(h, W_h) + b_y\n", 110 | ")\n", 111 | "\n", 112 | "# The actual predicted label\n", 113 | "y_hat = T.argmax(y, axis=1)\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "# Find cost compared to correct labels\n", 125 | "correct_labels = T.ivector(\"labels\")\n", 126 | "\n", 127 | "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n", 128 | "cost = -T.mean(log_likelihood)\n" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "# Compute gradient updates for the parameters\n", 140 | "parameters = [W_x, b_h, W_h, b_y]\n", 141 | "gradients = T.grad(cost, parameters)\n", 142 | "\n", 143 | "learning_rate = 0.01\n", 144 | "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "# Compile function for training (changes parameters via updates) and testing (no updates)\n", 156 | "f_train = theano.function(\n", 157 | " inputs=[x, correct_labels], \n", 158 | " outputs=cost, \n", 159 | " updates=train_updates, \n", 160 | " allow_input_downcast=True\n", 161 | ")\n", 162 | "\n", 163 | "f_test = theano.function(\n", 164 | " inputs=[x], \n", 165 | " outputs=y_hat, \n", 166 | " allow_input_downcast=True\n", 167 | ")\n" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "# Main training loop\n", 179 | "batch_size = 100\n", 180 | "epochs = 300\n", 181 | "check_frequency = 3\n", 182 | "\n", 183 | "train_batches = len(train_x) / batch_size\n", 184 | "valid_batches = len(valid_x) / batch_size\n", 185 | "test_batches = len(test_x) / batch_size\n", 186 | "\n", 187 | "for epoch in range(epochs):\n", 188 | " print epoch+1, \":\",\n", 189 | " \n", 190 | " train_costs = []\n", 191 | " train_accuracy = []\n", 192 | " for i in range(train_batches):\n", 193 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 194 | " batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n", 195 | "\n", 196 | " costs = f_train(batch_x, batch_labels)\n", 197 | " preds = f_test(batch_x)\n", 198 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 199 | " \n", 200 | " train_costs.append(costs)\n", 201 | " train_accuracy.append(acc)\n", 202 | " print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n", 203 | " \n", 204 | " valid_accuracy = []\n", 205 | " for i in range(valid_batches):\n", 206 | " batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n", 207 | " batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n", 208 | " \n", 209 | " preds = f_test(batch_x)\n", 210 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 211 | " \n", 212 | " valid_accuracy.append(acc)\n", 213 | " print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n", 214 | " \n", 215 | " test_accuracy = []\n", 216 | " for i in range(test_batches):\n", 217 | " batch_x = test_x[i*batch_size:(i+1)*batch_size]\n", 218 | " batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n", 219 | " \n", 220 | " preds = f_test(batch_x)\n", 221 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 222 | " \n", 223 | " test_accuracy.append(acc)\n", 224 | " print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\"\n", 225 | " \n", 226 | " if (epoch+1) % check_frequency == 0:\n", 227 | " print 'saving filters...'\n", 228 | " weight_filters = pil_img.fromarray(\n", 229 | " tile_raster_images(\n", 230 | " W_x.get_value(borrow=True).T,\n", 231 | " img_shape=(28, 28),\n", 232 | " tile_shape=(20, 25),\n", 233 | " tile_spacing=(1, 1)\n", 234 | " )\n", 235 | " )\n", 236 | " weight_filters.save(\"mlp_filters_%d.png\"%(epoch+1))" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": true 244 | }, 245 | "outputs": [], 246 | "source": [] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": "Python 2", 252 | "language": "python", 253 | "name": "python2" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 2 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython2", 265 | "version": "2.7.6" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 0 270 | } 271 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/MLP_theano_with_comments-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# This tutorial covers your simplest neural network: a multilayer perceptron (MLP)\n", 12 | "# Also known as feedforward neural network.\n", 13 | "# We will learn to classify MNIST handwritten digit images into their correct label (0-9).\n", 14 | "\n", 15 | "from IPython.display import Image\n", 16 | "# First, let's load our data and take a look!\n", 17 | "import pickle\n", 18 | "\n", 19 | "# Load our data \n", 20 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 21 | "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n", 22 | "print \"Shapes:\"\n", 23 | "print train_x.shape, train_y.shape\n", 24 | "print valid_x.shape, valid_y.shape\n", 25 | "print test_x.shape, test_y.shape\n", 26 | "\n", 27 | "print \"--------------\"\n", 28 | "print \"Example input:\"\n", 29 | "print train_x[0]\n", 30 | "print \"Example label:\"\n", 31 | "print train_y[0]\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n", 43 | "from utils import tile_raster_images\n", 44 | "from PIL import Image as pil_img\n", 45 | "\n", 46 | "input_images = train_x[:25]\n", 47 | "im = pil_img.fromarray(\n", 48 | " tile_raster_images(input_images, \n", 49 | " img_shape=(28, 28), \n", 50 | " tile_shape=(1, 25),\n", 51 | " tile_spacing=(1, 1))\n", 52 | ")\n", 53 | "im.save(\"some_mnist_numbers.png\")\n", 54 | "Image(filename=\"some_mnist_numbers.png\")\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "# Cool, now we know a little about the input data, let's design the MLP to work with it!\n", 66 | "# An MLP looks like this: input -> hiddens -> output classification\n", 67 | "# Each stage is just a matrix multiplication with a nonlinear function applied after.\n", 68 | "\n", 69 | "# Your basic Theano imports.\n", 70 | "import theano\n", 71 | "import theano.tensor as T\n", 72 | "\n", 73 | "# Inputs are matrices where rows are examples and columns are pixels - so create a symbolic Theano matrix.\n", 74 | "x = T.matrix('x')\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "# Now let's start building the equation for our MLP!\n", 86 | "\n", 87 | "# The first transformation is the input x -> hidden layer h.\n", 88 | "# We defined this transformation with h = tanh(x.dot(W_x) + b_h)\n", 89 | "# where the learnable model parameters are W_x and b_h.\n", 90 | "\n", 91 | "# Therefore, we will need a weights matrix W_x and a bias vector b_h.\n", 92 | "# W_x has shape (input_size, hidden_size) and b_h has shape (hidden_size,).\n", 93 | "# Initialization is important in deep learning; we want something random so the model doesn't get stuck early.\n", 94 | "# Many papers in this subject, but for now we will just use a normal distribution with mean=0 and std=0.05.\n", 95 | "# Another good option for tanh layers is to use a uniform distribution with interval +- sqrt(6/sum(shape)).\n", 96 | "# These are hyperparameters to play with.\n", 97 | "# Bias starting as zero is fine.\n", 98 | "import numpy.random as rng\n", 99 | "W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n", 100 | "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n", 101 | "\n", 102 | "# To update a variable used in an equation (for example, while learning), \n", 103 | "# Theano needs it to be in a special wrapper called a shared variable.\n", 104 | "# These are the model parameters for our first hidden layer!\n", 105 | "W_x = theano.shared(W_x, name=\"W_x\")\n", 106 | "b_h = theano.shared(b_h, name=\"b_h\")\n", 107 | "\n", 108 | "# Now, we can finally write the equation to give our symbolic hidden layer h!\n", 109 | "h = T.tanh(\n", 110 | " T.dot(x, W_x) + b_h\n", 111 | ")\n", 112 | "\n", 113 | "# Side note - if we used softmax instead of tanh for the activation, this would be performing logistic regression!\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "# We have the hidden layer h, let's put that softmax layer on top for classification output y!\n", 125 | "\n", 126 | "# Same deal as before, the transformation is defined as:\n", 127 | "# y = softmax(h.dot(W_h) + b_y)\n", 128 | "# where the learnable parameters are W_h and b_y.\n", 129 | "# W_h has shape (hidden_size, output_size) and b_y has shape (output_size,).\n", 130 | "\n", 131 | "# We will use the same random initialization strategy as before.\n", 132 | "W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n", 133 | "b_y = numpy.zeros(shape=(10,), dtype=theano.config.floatX)\n", 134 | "# Don't forget to make them shared variables!\n", 135 | "W_h = theano.shared(W_h, name=\"W_h\")\n", 136 | "b_y = theano.shared(b_y, name=\"b_y\")\n", 137 | "\n", 138 | "# Now write the equation for the output!\n", 139 | "y = T.nnet.softmax(\n", 140 | " T.dot(h, W_h) + b_y\n", 141 | ")\n", 142 | "\n", 143 | "# The output (due to softmax) is a vector of class probabilities.\n", 144 | "# To get the output class 'guess' from the model, just take the index of the highest probability!\n", 145 | "y_hat = T.argmax(y, axis=1)\n", 146 | "\n", 147 | "# That's everything! Just four model parameters and one input variable.\n" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "# The variable y_hat represents the output of running our model, but we need a cost function to use for training.\n", 159 | "# For a softmax (probability) output, we want to maximize the likelihood of P(Y=y|X).\n", 160 | "# This means we want to minimize the negative log-likelihood cost! (For a primer, see machine learning Coursera.)\n", 161 | "\n", 162 | "# Cost functions always need the truth outputs to compare against (this is supervised learning).\n", 163 | "# From before, we saw the labels were a vector of ints - so let's make a symbolic variable for this!\n", 164 | "correct_labels = T.ivector(\"labels\") # integer vector\n", 165 | "\n", 166 | "# Now we can compare our output probability from y with the true labels.\n", 167 | "# Because the labels are integers, we will want to make an indexing mask to pick out the probabilities\n", 168 | "# our model thought was the likelihood of the correct label.\n", 169 | "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n", 170 | "# We use mean instead of sum to be less dependent on batch size (better for flexibility)\n", 171 | "cost = -T.mean(log_likelihood)\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "# Easiest way to train neural nets is with Stochastic Gradient Descent\n", 183 | "# This takes each example, calculates the gradient, and changes the model parameters a small amount\n", 184 | "# in the direction of the gradient.\n", 185 | "\n", 186 | "# Fancier add-ons to stochastic gradient descent will reduce the learning rate over time, add a momentum\n", 187 | "# factor to the parameters, etc.\n", 188 | "\n", 189 | "# Before we can start training, we need to know what the gradients are.\n", 190 | "# Luckily we don't have to do any math! Theano has symbolic auto-differentiation which means it can\n", 191 | "# calculate the gradients for arbitrary equations with respect to a cost and parameters.\n", 192 | "parameters = [W_x, b_h, W_h, b_y]\n", 193 | "gradients = T.grad(cost, parameters)\n", 194 | "# Now gradients contains the list of derivatives: [d_cost/d_W_x, d_cost/d_b_h, d_cost/d_W_h, d_cost/d_b_y]\n", 195 | "\n", 196 | "# One last thing we need to do before training is to use these gradients to update the parameters!\n", 197 | "# Remember how parameters are shared variables? Well, Theano uses something called updates\n", 198 | "# which are just pairs of (shared_variable, new_variable_expression) to change its value.\n", 199 | "# So, let's create these updates to show how we change the parameter values during training with gradients!\n", 200 | "# We use a learning rate to make small steps over time.\n", 201 | "learning_rate = 0.01\n", 202 | "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "# Now we can create a Theano function that takes in real inputs and trains our model.\n", 214 | "f_train = theano.function(inputs=[x, correct_labels], outputs=cost, updates=train_updates, allow_input_downcast=True)\n", 215 | "\n", 216 | "# For testing purposes, we don't want to use updates to change the parameters - so create a separate function!\n", 217 | "# We also care more about the output guesses, so let's return those instead of the cost.\n", 218 | "# error = sum(T.neq(y_hat, correct_labels))/float(y_hat.shape[0])\n", 219 | "f_test = theano.function(inputs=[x], outputs=y_hat, allow_input_downcast=True)\n" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": false 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "# Our training can begin!\n", 231 | "# The two hyperparameters we have for this part are minibatch size (how many examples to process in parallel)\n", 232 | "# and the total number of passes over all examples (epochs).\n", 233 | "batch_size = 100\n", 234 | "epochs = 30\n", 235 | "\n", 236 | "# Given our batch size, compute how many batches we can fit into each data set\n", 237 | "train_batches = len(train_x) / batch_size\n", 238 | "valid_batches = len(valid_x) / batch_size\n", 239 | "test_batches = len(test_x) / batch_size\n", 240 | "\n", 241 | "# Our main training loop!\n", 242 | "for epoch in range(epochs):\n", 243 | " print epoch+1, \":\",\n", 244 | " \n", 245 | " train_costs = []\n", 246 | " train_accuracy = []\n", 247 | " for i in range(train_batches):\n", 248 | " # Grab our minibatch of examples from the whole train set.\n", 249 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 250 | " batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n", 251 | " # Compute the costs from the train function (which also updates the parameters)\n", 252 | " costs = f_train(batch_x, batch_labels)\n", 253 | " # Compute the predictions from the test function (which does not update parameters)\n", 254 | " preds = f_test(batch_x)\n", 255 | " # Compute the accuracy of our predictions against the correct batch labels\n", 256 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 257 | " \n", 258 | " train_costs.append(costs)\n", 259 | " train_accuracy.append(acc)\n", 260 | " # Show the mean cost and accuracy across minibatches (the entire train set!)\n", 261 | " print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n", 262 | " \n", 263 | " valid_accuracy = []\n", 264 | " for i in range(valid_batches):\n", 265 | " batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n", 266 | " batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n", 267 | " \n", 268 | " preds = f_test(batch_x)\n", 269 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 270 | " \n", 271 | " valid_accuracy.append(acc)\n", 272 | " print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n", 273 | " \n", 274 | " test_accuracy = []\n", 275 | " for i in range(test_batches):\n", 276 | " batch_x = test_x[i*batch_size:(i+1)*batch_size]\n", 277 | " batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n", 278 | " \n", 279 | " preds = f_test(batch_x)\n", 280 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 281 | " \n", 282 | " test_accuracy.append(acc)\n", 283 | " print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\"" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "collapsed": true 291 | }, 292 | "outputs": [], 293 | "source": [] 294 | } 295 | ], 296 | "metadata": { 297 | "kernelspec": { 298 | "display_name": "Python 2", 299 | "language": "python", 300 | "name": "python2" 301 | }, 302 | "language_info": { 303 | "codemirror_mode": { 304 | "name": "ipython", 305 | "version": 2 306 | }, 307 | "file_extension": ".py", 308 | "mimetype": "text/x-python", 309 | "name": "python", 310 | "nbconvert_exporter": "python", 311 | "pygments_lexer": "ipython2", 312 | "version": "2.7.6" 313 | } 314 | }, 315 | "nbformat": 4, 316 | "nbformat_minor": 0 317 | } 318 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/RNN-GSN_opendeep-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# Imports!\n", 12 | "# standard libraries\n", 13 | "import logging\n", 14 | "import math\n", 15 | "# third party\n", 16 | "import theano\n", 17 | "# internal references\n", 18 | "from opendeep.data import MNIST\n", 19 | "from opendeep.log import config_root_logger\n", 20 | "from opendeep.models import Model, RNN, GSN\n", 21 | "from opendeep.optimization import RMSProp\n", 22 | "\n", 23 | "config_root_logger()\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "# Let's define a new model combining the RNN and GSNs.\n", 35 | "class RNN_GSN(Model):\n", 36 | " def __init__(self):\n", 37 | " super(RNN_GSN, self).__init__()\n", 38 | "\n", 39 | " gsn_hiddens = 500\n", 40 | " gsn_layers = 2\n", 41 | "\n", 42 | " # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)\n", 43 | " self.rnn = RNN(\n", 44 | " input_size=28 * 28,\n", 45 | " hidden_size=100,\n", 46 | " # needs to output hidden units for odd layers of GSN\n", 47 | " output_size=gsn_hiddens * (math.ceil(gsn_layers/2.)),\n", 48 | " layers=1,\n", 49 | " activation='tanh',\n", 50 | " hidden_activation='relu',\n", 51 | " weights_init='uniform', weights_interval='montreal',\n", 52 | " r_weights_init='identity'\n", 53 | " )\n", 54 | "\n", 55 | " # Create the GSN that will encode the input space\n", 56 | " gsn = GSN(\n", 57 | " input_size=28 * 28,\n", 58 | " hidden_size=gsn_hiddens,\n", 59 | " layers=gsn_layers,\n", 60 | " walkbacks=4,\n", 61 | " visible_activation='sigmoid',\n", 62 | " hidden_activation='tanh',\n", 63 | " image_height=28,\n", 64 | " image_width=28\n", 65 | " )\n", 66 | " # grab the input arguments\n", 67 | " gsn_args = gsn.args.copy()\n", 68 | " # grab the parameters it initialized\n", 69 | " gsn_params = gsn.get_params()\n", 70 | "\n", 71 | " # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)\n", 72 | " # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens\n", 73 | " def step(hiddens, x):\n", 74 | " gsn = GSN(\n", 75 | " inputs_hook=(28*28, x),\n", 76 | " hiddens_hook=(gsn_hiddens, hiddens),\n", 77 | " params_hook=(gsn_params),\n", 78 | " **gsn_args\n", 79 | " )\n", 80 | " # return the reconstruction and cost!\n", 81 | " return gsn.get_outputs(), gsn.get_train_cost()\n", 82 | "\n", 83 | " (outputs, costs), scan_updates = theano.scan(\n", 84 | " fn=lambda h, x: step(h, x),\n", 85 | " sequences=[self.rnn.output, self.rnn.input],\n", 86 | " outputs_info=[None, None]\n", 87 | " )\n", 88 | "\n", 89 | " self.outputs = outputs\n", 90 | "\n", 91 | " self.updates = dict()\n", 92 | " self.updates.update(self.rnn.get_updates())\n", 93 | " self.updates.update(scan_updates)\n", 94 | "\n", 95 | " self.cost = costs.sum()\n", 96 | " self.params = gsn_params + self.rnn.get_params()\n", 97 | "\n", 98 | " # Model functions necessary for training\n", 99 | " def get_inputs(self):\n", 100 | " return self.rnn.get_inputs()\n", 101 | " def get_params(self):\n", 102 | " return self.params\n", 103 | " def get_train_cost(self):\n", 104 | " return self.cost\n", 105 | " def get_updates(self):\n", 106 | " return self.updates\n", 107 | " def get_outputs(self):\n", 108 | " return self.outputs\n", 109 | " " 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "# Now we can instantiate and train the model!\n", 121 | "rnn_gsn = RNN_GSN()\n", 122 | "\n", 123 | "# data! (needs to be 3d for rnn).\n", 124 | "mnist = MNIST(sequence_number=1, seq_3d=True, seq_length=30)\n", 125 | "\n", 126 | "# optimizer!\n", 127 | "optimizer = RMSProp(\n", 128 | " model=rnn_gsn,\n", 129 | " dataset=mnist,\n", 130 | " epochs=500,\n", 131 | " batch_size=50,\n", 132 | " save_freq=10,\n", 133 | " stop_patience=30,\n", 134 | " stop_threshold=.9995,\n", 135 | " learning_rate=1e-6,\n", 136 | " decay=.95,\n", 137 | " max_scaling=1e5,\n", 138 | " grad_clip=5.,\n", 139 | " hard_clip=False\n", 140 | ")\n", 141 | "# train!\n", 142 | "optimizer.train()\n" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [] 153 | } 154 | ], 155 | "metadata": { 156 | "kernelspec": { 157 | "display_name": "Python 2", 158 | "language": "python", 159 | "name": "python2" 160 | }, 161 | "language_info": { 162 | "codemirror_mode": { 163 | "name": "ipython", 164 | "version": 2 165 | }, 166 | "file_extension": ".py", 167 | "mimetype": "text/x-python", 168 | "name": "python", 169 | "nbconvert_exporter": "python", 170 | "pygments_lexer": "ipython2", 171 | "version": "2.7.6" 172 | } 173 | }, 174 | "nbformat": 4, 175 | "nbformat_minor": 0 176 | } 177 | -------------------------------------------------------------------------------- /DAE_opendeep.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# imports and logger!\n", 12 | "from opendeep.log import config_root_logger\n", 13 | "from opendeep.models import GSN\n", 14 | "from opendeep.optimization import SGD\n", 15 | "from opendeep.data import MNIST\n", 16 | "from opendeep.utils.misc import closest_to_square_factors\n", 17 | "from PIL import Image as pil_img\n", 18 | "from opendeep.utils.image import tile_raster_images\n", 19 | "import numpy\n", 20 | "\n", 21 | "config_root_logger()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "# A denoising autoencoder (DAE) is a special case of a generative stochastic network (GSN).\n", 33 | "# GSN's can have multiple denoising layers that interact with each other both above and below.\n", 34 | "dae = GSN(\n", 35 | " input_size=28*28,\n", 36 | " hidden_size=1000,\n", 37 | " visible_activation='sigmoid',\n", 38 | " hidden_activation='tanh',\n", 39 | " layers=1,\n", 40 | " walkbacks=3,\n", 41 | " input_noise='salt_and_pepper',\n", 42 | " input_noise_level=0.3\n", 43 | ")\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# Create the MNIST data object\n", 55 | "mnist = MNIST(concat_train_valid=True)\n", 56 | "\n", 57 | "# Create the optimizer object\n", 58 | "optimizer = SGD(dataset=mnist,\n", 59 | " epochs=40, \n", 60 | " batch_size=100, \n", 61 | " learning_rate=.25,\n", 62 | " lr_decay='exponential',\n", 63 | " lr_decay_factor=.995,\n", 64 | " momentum=.5,\n", 65 | " nesterov_momentum=False)\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "# Train the model with the optimizer on the mnist dataset!\n", 77 | "dae.train(optimizer)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# Run some numbers to see the output\n", 89 | "n_examples = 100\n", 90 | "xs_test = mnist.test_inputs[:n_examples]\n", 91 | "noisy_xs_test = dae.f_noise(xs_test)\n", 92 | "reconstructed = dae.run(noisy_xs_test)\n", 93 | "# Concatenate stuff\n", 94 | "stacked = numpy.vstack(\n", 95 | " [numpy.vstack([xs_test[i * 10: (i + 1) * 10],\n", 96 | " noisy_xs_test[i * 10: (i + 1) * 10],\n", 97 | " reconstructed[i * 10: (i + 1) * 10]])\n", 98 | " for i in range(10)])\n", 99 | "number_reconstruction = pil_img.fromarray(\n", 100 | " tile_raster_images(stacked, (dae.image_height, dae.image_width), (10, 30), (1, 1))\n", 101 | ")\n", 102 | "\n", 103 | "number_reconstruction.save(\"dae_opendeep_test.png\")\n", 104 | "\n", 105 | "# Construct image from the weight matrix\n", 106 | "image = pil_img.fromarray(\n", 107 | " tile_raster_images(\n", 108 | " X=dae.weights_list[0].get_value(borrow=True).T,\n", 109 | " img_shape=(28, 28),\n", 110 | " tile_shape=closest_to_square_factors(dae.layer_sizes[1]),\n", 111 | " tile_spacing=(1, 1)\n", 112 | " )\n", 113 | ")\n", 114 | "image.save(\"dae_opendeep_filters.png\")\n", 115 | "print \"Done!\"" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": true 123 | }, 124 | "outputs": [], 125 | "source": [] 126 | } 127 | ], 128 | "metadata": { 129 | "kernelspec": { 130 | "display_name": "Python 2", 131 | "language": "python", 132 | "name": "python2" 133 | }, 134 | "language_info": { 135 | "codemirror_mode": { 136 | "name": "ipython", 137 | "version": 2 138 | }, 139 | "file_extension": ".py", 140 | "mimetype": "text/x-python", 141 | "name": "python", 142 | "nbconvert_exporter": "python", 143 | "pygments_lexer": "ipython2", 144 | "version": "2.7.6" 145 | } 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 0 149 | } 150 | -------------------------------------------------------------------------------- /DAE_theano.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n", 12 | "\n", 13 | "# We are working with MNIST again, this time no labels are necessary - \n", 14 | "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n", 15 | "\n", 16 | "# All imports up here this time\n", 17 | "import pickle\n", 18 | "import numpy\n", 19 | "import numpy.random as rng\n", 20 | "import theano\n", 21 | "import theano.tensor as T\n", 22 | "import theano.sandbox.rng_mrg as RNG_MRG\n", 23 | "from utils import tile_raster_images\n", 24 | "from PIL import Image as pil_img\n", 25 | "from IPython.display import Image\n", 26 | "\n", 27 | "# Load our data \n", 28 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 29 | "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n", 30 | "print \"Shapes:\"\n", 31 | "print train_x.shape\n", 32 | "print valid_x.shape\n", 33 | "print test_x.shape" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "# We can specify any hyperparameters to play with up here:\n", 45 | "input_size = 784 # 28x28 images\n", 46 | "hidden_size = 1000\n", 47 | "w_mean = 0.0\n", 48 | "w_std = 0.05\n", 49 | "w_interval = numpy.sqrt(6. / (input_size + hidden_size))\n", 50 | "noise = 0.3\n", 51 | "walkbacks = 3\n", 52 | "learning_rate = 0.25\n", 53 | "lr_decay = .985\n", 54 | "batch_size = 100\n", 55 | "epochs = 200\n", 56 | "check_frequency = 10\n", 57 | "\n", 58 | "# To make the organization better, lets define all the variables and parameters here.\n", 59 | "x = T.matrix('x')\n", 60 | "# W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n", 61 | "W_x = numpy.asarray(rng.uniform(low=-w_interval, high=w_interval, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n", 62 | "W_x = theano.shared(W_x, \"W_x\")\n", 63 | "\n", 64 | "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n", 65 | "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n", 66 | "b_x = theano.shared(b_x, \"b_x\")\n", 67 | "b_h = theano.shared(b_h, \"b_h\")\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "# Now for the most important part of a denoising autoencoder - making the input noisy!\n", 79 | "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n", 80 | "# This makes it more effective for test data by reducing overfitting.\n", 81 | "noise_switch = theano.shared(1, \"noise_switch\")\n", 82 | "\n", 83 | "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n", 84 | "def salt_and_pepper(variable):\n", 85 | " mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n", 86 | " saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n", 87 | " ones = T.eq(mask, 0) * saltpepper\n", 88 | " noisy = variable*mask + ones\n", 89 | " return T.switch(noise_switch,\n", 90 | " noisy,\n", 91 | " variable)\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "# Now we are ready to create the computation graph!\n", 103 | "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n", 104 | "\n", 105 | "inputs=[x]\n", 106 | "for walkback in range(walkbacks):\n", 107 | " noisy_x = salt_and_pepper(inputs[-1])\n", 108 | "\n", 109 | " h = T.tanh(\n", 110 | " T.dot(noisy_x, W_x) + b_h\n", 111 | " )\n", 112 | "\n", 113 | " reconstruction = T.nnet.sigmoid(\n", 114 | " T.dot(h, W_x.T) + b_x\n", 115 | " )\n", 116 | "\n", 117 | " inputs.append(reconstruction)\n", 118 | " \n", 119 | "reconstructions = inputs[1:]\n" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n", 131 | "output = reconstructions[-1]\n", 132 | "\n", 133 | "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n", 134 | "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "parameters = [W_x, b_h, b_x]\n", 146 | "gradients = T.grad(cost, parameters)\n", 147 | "\n", 148 | "lr = theano.shared(numpy.asarray(learning_rate, dtype='float32'), 'lr')\n", 149 | "train_updates = [(param, param - lr*gradient) for param, gradient in zip(parameters, gradients)]\n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "# Compile our training and testing function like before!\n", 161 | "# Train function updates the parameters and returns the total train cost to monitor.\n", 162 | "f_train = theano.function(\n", 163 | " inputs=[x], \n", 164 | " outputs=cost, \n", 165 | " updates=train_updates, \n", 166 | " allow_input_downcast=True\n", 167 | ")\n", 168 | "\n", 169 | "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n", 170 | "f_test = theano.function(\n", 171 | " inputs=[x], \n", 172 | " outputs=output,\n", 173 | " allow_input_downcast=True\n", 174 | ")\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "# That's it! Now perform SGD like before.\n", 186 | "# Main training loop\n", 187 | "\n", 188 | "train_batches = len(train_x) / batch_size\n", 189 | "\n", 190 | "try:\n", 191 | " for epoch in range(epochs):\n", 192 | " print epoch+1,\":\",\n", 193 | "\n", 194 | " # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n", 195 | " noise_switch.set_value(1.)\n", 196 | "\n", 197 | " train_costs = []\n", 198 | " for i in range(train_batches):\n", 199 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 200 | "\n", 201 | " costs = f_train(batch_x)\n", 202 | "\n", 203 | " train_costs.append(costs)\n", 204 | " print \"cost:\", numpy.mean(train_costs),\n", 205 | " \n", 206 | " old_lr = lr.get_value()\n", 207 | " print \"\\tlearning rate:\", old_lr\n", 208 | " new_lr = numpy.asarray(old_lr * lr_decay, dtype='float32')\n", 209 | " lr.set_value(new_lr)\n", 210 | "\n", 211 | " if (epoch+1) % check_frequency == 0:\n", 212 | " print \"Saving images...\"\n", 213 | " train_recons = f_test(train_x[:25])\n", 214 | " train_stacked = numpy.vstack(\n", 215 | " [numpy.vstack([\n", 216 | " train_x[i*5:(i+1)*5],\n", 217 | " train_recons[i*5:(i+1)*5]\n", 218 | " ])\n", 219 | " for i in range(5)]\n", 220 | " )\n", 221 | " train_image = pil_img.fromarray(\n", 222 | " tile_raster_images(train_stacked, (28, 28), (5, 10), (1, 1))\n", 223 | " )\n", 224 | " train_image.save(\"dae_train_%d.png\"%(epoch+1))\n", 225 | " \n", 226 | " # For validation, let's run a few images through and see the reconstruction \n", 227 | " # (with the noise from training still added)\n", 228 | " valid_recons = f_test(valid_x[:25])\n", 229 | " # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n", 230 | " # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n", 231 | " valid_stacked = numpy.vstack(\n", 232 | " [numpy.vstack([\n", 233 | " valid_x[i*5:(i+1)*5],\n", 234 | " valid_recons[i*5:(i+1)*5]\n", 235 | " ])\n", 236 | " for i in range(5)]\n", 237 | " )\n", 238 | " valid_image = pil_img.fromarray(\n", 239 | " # helper from utils.py\n", 240 | " tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n", 241 | " )\n", 242 | " valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n", 243 | "\n", 244 | " # Now do the same for test, but don't add any noise\n", 245 | " # This means set the noise switches to 0. (False)\n", 246 | " noise_switch.set_value(0.)\n", 247 | "\n", 248 | " test_recons = f_test(test_x[:25])\n", 249 | " test_stacked = numpy.vstack(\n", 250 | " [numpy.vstack([\n", 251 | " test_x[i*5:(i+1)*5],\n", 252 | " test_recons[i*5:(i+1)*5]\n", 253 | " ])\n", 254 | " for i in range(5)]\n", 255 | " )\n", 256 | " test_image = pil_img.fromarray(\n", 257 | " tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n", 258 | " )\n", 259 | " test_image.save(\"dae_test_%d.png\"%(epoch+1))\n", 260 | " \n", 261 | " weight_filters = pil_img.fromarray(\n", 262 | " tile_raster_images(\n", 263 | " W_x.get_value(borrow=True).T,\n", 264 | " img_shape=(28, 28),\n", 265 | " tile_shape=(25, 40),\n", 266 | " tile_spacing=(1, 1)\n", 267 | " )\n", 268 | " )\n", 269 | " weight_filters.save(\"dae_filters_%d.png\"%(epoch+1))\n", 270 | "except KeyboardInterrupt:\n", 271 | " pass " 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "collapsed": true 279 | }, 280 | "outputs": [], 281 | "source": [] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 2", 287 | "language": "python", 288 | "name": "python2" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 2 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython2", 300 | "version": "2.7.6" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 0 305 | } 306 | -------------------------------------------------------------------------------- /DAE_theano_with_comments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# This tutorial is meant to be done after MLP_theano_with_comments.ipynb\n", 12 | "\n", 13 | "# We are working with MNIST again, this time no labels are necessary - \n", 14 | "# the denoising autoencoder (DAE) is an unsupervised model that tries to reconstruct the original input.\n", 15 | "\n", 16 | "# All imports up here this time\n", 17 | "import pickle\n", 18 | "import numpy\n", 19 | "import numpy.random as rng\n", 20 | "import theano\n", 21 | "import theano.tensor as T\n", 22 | "import theano.sandbox.rng_mrg as RNG_MRG\n", 23 | "from utils import tile_raster_images\n", 24 | "from PIL import Image as pil_img\n", 25 | "from IPython.display import Image\n", 26 | "\n", 27 | "# Load our data \n", 28 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 29 | "(train_x, _), (valid_x, _), (test_x, _) = pickle.load(open('data/mnist.pkl', 'r'))\n", 30 | "print \"Shapes:\"\n", 31 | "print train_x.shape\n", 32 | "print valid_x.shape\n", 33 | "print test_x.shape" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "# The DAE data flow looks like this: input -> input_(add noise) -> hiddens -> input\n", 45 | "# This can be repeated by sampling from the reconstructed input. Repeating like that is\n", 46 | "# a pseudo Gibbs sampling process. We can define how many times we want to repeat (known as walkbacks).\n", 47 | "\n", 48 | "# We can specify any hyperparameters to play with up here:\n", 49 | "input_size = 784 # 28x28 images\n", 50 | "hidden_size = 1000\n", 51 | "w_mean = 0.0\n", 52 | "w_std = 0.05\n", 53 | "noise = 0.3\n", 54 | "walkbacks = 3\n", 55 | "learning_rate = 0.1\n", 56 | "batch_size = 100\n", 57 | "epochs = 100\n", 58 | "check_frequency = 10\n", 59 | "\n", 60 | "# To make the organization better, lets define all the variables and parameters here.\n", 61 | "# Just like with the MLP, we need a symbolic matrix to input the images\n", 62 | "x = T.matrix('x')\n", 63 | "# Next, we need the weights matrix W_x. This will be used to go both from input -> hidden and\n", 64 | "# hidden -> input (by using its transpose). This is called tied weights.\n", 65 | "# Again, initialization has a lot of literature, but we are just goint to stick with gaussian at the moment.\n", 66 | "W_x = numpy.asarray(rng.normal(loc=w_mean, scale=w_std, size=(input_size, hidden_size)), dtype=theano.config.floatX)\n", 67 | "# (Don't forget to make parameters into shared variables so they can be updated!)\n", 68 | "W_x = theano.shared(W_x, \"W_x\")\n", 69 | "# Because we are outputting back into the input space, we also need a bias vector for both the input\n", 70 | "# and hidden layers.\n", 71 | "b_x = numpy.zeros((input_size,), dtype=theano.config.floatX)\n", 72 | "b_h = numpy.zeros((hidden_size,), dtype=theano.config.floatX)\n", 73 | "b_x = theano.shared(b_x, \"b_x\")\n", 74 | "b_h = theano.shared(b_h, \"b_h\")\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "# Now for the most important part of a denoising autoencoder - making the input noisy!\n", 86 | "# Noise acts as regularization so the autoencoder doesn't just memorize the training set.\n", 87 | "# This makes it more effective for test data by reducing overfitting.\n", 88 | "\n", 89 | "# We deal with adding noise during training but not testing in Theano with a switch variable!\n", 90 | "# Switches can be turned on or off to direct data flow in the computation graph - \n", 91 | "# so we turn on for train and off for test!\n", 92 | "# You guessed it - we need a shared variable to represent the switch condition so we can change it at runtime.\n", 93 | "noise_switch = theano.shared(1, \"noise_switch\")\n", 94 | "\n", 95 | "# One important thing to note - the type of noise has to correspond to the type of input.\n", 96 | "# i.e. we can't add real-value noise when the input is expected to be binary\n", 97 | "# So for these binary inputs, we will add salt-and-pepper masking noise!\n", 98 | "# This is a function so we can keep adding it during the computation chain when we alternate sampling\n", 99 | "# from input and reconstructing from hiddens.\n", 100 | "# Theano random number generator\n", 101 | "theano_rng = RNG_MRG.MRG_RandomStreams(1)\n", 102 | "def salt_and_pepper(variable):\n", 103 | " mask = theano_rng.binomial(size=variable.shape, n=1, p=(1-noise), dtype=theano.config.floatX)\n", 104 | " saltpepper = theano_rng.binomial(size=variable.shape, n=1, p=0.5, dtype=theano.config.floatX)\n", 105 | " ones = T.eq(mask, 0) * saltpepper\n", 106 | " # Randomly set some bits to 0 or 1 with equal probability.\n", 107 | " noisy = variable*mask + ones\n", 108 | " return T.switch(noise_switch,\n", 109 | " # true condition\n", 110 | " noisy,\n", 111 | " # false condition\n", 112 | " variable)\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# Now we are ready to create the computation graph!\n", 124 | "# Remember it is noisy_x -> hiddens -> x -> hiddens -> x .....\n", 125 | "\n", 126 | "inputs=[x]\n", 127 | "for walkback in range(walkbacks):\n", 128 | " # First, we want to corrupt the input x\n", 129 | " noisy_x = salt_and_pepper(inputs[-1])\n", 130 | " # Now calculate the hiddens\n", 131 | " h = T.tanh(\n", 132 | " T.dot(noisy_x, W_x) + b_h\n", 133 | " )\n", 134 | " # From the hiddens, reconstruct x.\n", 135 | " # We have to use an appropriate activation function for the type of inputs!\n", 136 | " # In our case with MNIST images, it is binary so sigmoid works.\n", 137 | " reconstruction = T.nnet.sigmoid(\n", 138 | " T.dot(h, W_x.T) + b_x\n", 139 | " )\n", 140 | " # That is all for an autoencoder!\n", 141 | " inputs.append(reconstruction)\n", 142 | " \n", 143 | "# Remove the original input from our reconstructions list\n", 144 | "reconstructions = inputs[1:]\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "# The output of our computation graph is the last reconstructed input in the Gibbs chain.\n", 156 | "output = reconstructions[-1]\n", 157 | "\n", 158 | "# Our cost function is now the reconstruction error between all reconstructions and the original input.\n", 159 | "# Again, because our input space is binary, using mean binary cross-entropy is a good analog for \n", 160 | "# reconstruction error.\n", 161 | "# For real-valued inputs, we could use mean square error.\n", 162 | "cost = numpy.sum([T.mean(T.nnet.binary_crossentropy(recon, x)) for recon in reconstructions])\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "# Just like with the MLP, compute gradient updates for the parameters to use with training.\n", 174 | "parameters = [W_x, b_h, b_x]\n", 175 | "# Automagic differentiation! (Still love it)\n", 176 | "gradients = T.grad(cost, parameters)\n", 177 | "\n", 178 | "# Update the parameters for stochastic gradient descent!\n", 179 | "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "# Compile our training and testing function like before!\n", 191 | "# Train function updates the parameters and returns the total train cost to monitor.\n", 192 | "f_train = theano.function(\n", 193 | " inputs=[x], \n", 194 | " outputs=cost, \n", 195 | " updates=train_updates, \n", 196 | " allow_input_downcast=True\n", 197 | ")\n", 198 | "\n", 199 | "# Our test function will return the final reconstruction, and it needs to include updates from scan.\n", 200 | "f_test = theano.function(\n", 201 | " inputs=[x], \n", 202 | " outputs=output,\n", 203 | " allow_input_downcast=True\n", 204 | ")\n" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "collapsed": false 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "# That's it! Now perform SGD like before.\n", 216 | "# Main training loop\n", 217 | "\n", 218 | "train_batches = len(train_x) / batch_size\n", 219 | "\n", 220 | "try:\n", 221 | " for epoch in range(epochs):\n", 222 | " print epoch+1, \":\",\n", 223 | "\n", 224 | " # Don't forget to turn on our noise switch for training! Just set the shared variable to 1 (True)\n", 225 | " noise_switch.set_value(1.)\n", 226 | "\n", 227 | " train_costs = []\n", 228 | " for i in range(train_batches):\n", 229 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 230 | "\n", 231 | " costs = f_train(batch_x)\n", 232 | "\n", 233 | " train_costs.append(costs)\n", 234 | " print \"cost:\", numpy.mean(train_costs)\n", 235 | "\n", 236 | " if (epoch+1) % check_frequency == 0:\n", 237 | " print \"Saving images...\"\n", 238 | " # For validation, let's run a few images through and see the reconstruction \n", 239 | " # (with the noise from training still added)\n", 240 | " valid_recons = f_test(valid_x[:25])\n", 241 | " # Use the tile_raster_image helper function to rearrange the matrix into a 5x10 image of digits\n", 242 | " # (Two 5x5 images next to each other - the first the inputs, the second the reconstructions.)\n", 243 | " valid_stacked = numpy.vstack(\n", 244 | " [numpy.vstack([\n", 245 | " valid_x[i*5:(i+1)*5],\n", 246 | " valid_recons[i*5:(i+1)*5]\n", 247 | " ])\n", 248 | " for i in range(5)]\n", 249 | " )\n", 250 | " valid_image = pil_img.fromarray(\n", 251 | " # helper from utils.py\n", 252 | " tile_raster_images(valid_stacked, (28, 28), (5, 10), (1, 1))\n", 253 | " )\n", 254 | " valid_image.save(\"dae_valid_%d.png\"%(epoch+1))\n", 255 | "\n", 256 | " # Now do the same for test, but don't add any noise\n", 257 | " # This means set the noise switches to 0. (False)\n", 258 | " noise_switch.set_value(0.)\n", 259 | "\n", 260 | " test_recons = f_test(test_x[:25])\n", 261 | " test_stacked = numpy.vstack(\n", 262 | " [numpy.vstack([\n", 263 | " test_x[i*5:(i+1)*5],\n", 264 | " test_recons[i*5:(i+1)*5]\n", 265 | " ])\n", 266 | " for i in range(5)]\n", 267 | " )\n", 268 | " test_image = pil_img.fromarray(\n", 269 | " tile_raster_images(test_stacked, (28, 28), (5, 10), (1, 1))\n", 270 | " )\n", 271 | " test_image.save(\"dae_test_%d.png\"%(epoch+1))\n", 272 | "except KeyboardInterrupt:\n", 273 | " pass\n", 274 | " \n", 275 | "# Let's finally save an image of the filters the DAE learned - this is simply the transpose of the weights!\n", 276 | "weight_filters = pil_img.fromarray(\n", 277 | " tile_raster_images(\n", 278 | " W_x.get_value(borrow=True).T,\n", 279 | " img_shape=(28, 28),\n", 280 | " tile_shape=(25, 40),\n", 281 | " tile_spacing=(1, 1)\n", 282 | " )\n", 283 | ")\n", 284 | "print \"Saving filters...\"\n", 285 | "weight_filters.save(\"dae_filters.png\")\n", 286 | "print \"Done!\"\n", 287 | " " 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": { 294 | "collapsed": true 295 | }, 296 | "outputs": [], 297 | "source": [] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "Python 2", 303 | "language": "python", 304 | "name": "python2" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 2 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython2", 316 | "version": "2.7.6" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 0 321 | } 322 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Markus Beissinger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MLP_opendeep.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# imports and logger!\n", 12 | "from opendeep.log import config_root_logger\n", 13 | "from opendeep.models import Prototype, Dense, SoftmaxLayer\n", 14 | "from opendeep.optimization import SGD\n", 15 | "from opendeep.data import MNIST\n", 16 | "from opendeep.monitor import Monitor, FileService\n", 17 | "\n", 18 | "config_root_logger()" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Create the MLP with two hidden layers and one classification layer\n", 30 | "mlp = Prototype()\n", 31 | "mlp.add(\n", 32 | " Dense(input_size=28*28, output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n", 33 | ")\n", 34 | "mlp.add(\n", 35 | " Dense(output_size=1000, activation='tanh', noise='dropout', noise_level=0.3)\n", 36 | ")\n", 37 | "mlp.add(\n", 38 | " SoftmaxLayer(output_size=10)\n", 39 | ")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# Create the MNIST data object\n", 51 | "mnist = MNIST(concat_train_valid=True)\n", 52 | "\n", 53 | "# Create the optimizer object\n", 54 | "optimizer = SGD(model=mlp, \n", 55 | " dataset=mnist, \n", 56 | " epochs=100, \n", 57 | " batch_size=500, \n", 58 | " learning_rate=.01, \n", 59 | " momentum=.9,\n", 60 | " nesterov_momentum=True)\n", 61 | "\n", 62 | "# Make a monitor to watch the train and test prediction errors\n", 63 | "errorMonitor = Monitor('error', mlp.get_monitors()['softmax_error'], train=True, test=True)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# Train the model with the optimizer!\n", 75 | "optimizer.train(monitor_channels=[errorMonitor])" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "# Make some predictions on test data!\n", 87 | "test_data, test_labels = mnist.test_inputs, mnist.test_targets\n", 88 | "\n", 89 | "n=30\n", 90 | "predictions = mlp.run(test_data)\n", 91 | "labels = test_labels.astype('int32')\n", 92 | "\n", 93 | "print \"Predictions:\", predictions[:n]\n", 94 | "print \"Correct: \", labels[:n]\n", 95 | "print \"Accuracy: \", sum((predictions==labels) * 1./len(labels))*100, \"%\"" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 2", 111 | "language": "python", 112 | "name": "python2" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 2 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython2", 124 | "version": "2.7.6" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 0 129 | } 130 | -------------------------------------------------------------------------------- /MLP_theano.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 41, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | " Shapes:\n", 15 | "(50000, 784) (50000,)\n", 16 | "(10000, 784) (10000,)\n", 17 | "(10000, 784) (10000,)\n", 18 | "--------------\n", 19 | "Example input:\n", 20 | "[ 0. 0. 0. 0. 0. 0. 0.\n", 21 | " 0. 0. 0. 0. 0. 0. 0.\n", 22 | " 0. 0. 0. 0. 0. 0. 0.\n", 23 | " 0. 0. 0. 0. 0. 0. 0.\n", 24 | " 0. 0. 0. 0. 0. 0. 0.\n", 25 | " 0. 0. 0. 0. 0. 0. 0.\n", 26 | " 0. 0. 0. 0. 0. 0. 0.\n", 27 | " 0. 0. 0. 0. 0. 0. 0.\n", 28 | " 0. 0. 0. 0. 0. 0. 0.\n", 29 | " 0. 0. 0. 0. 0. 0. 0.\n", 30 | " 0. 0. 0. 0. 0. 0. 0.\n", 31 | " 0. 0. 0. 0. 0. 0. 0.\n", 32 | " 0. 0. 0. 0. 0. 0. 0.\n", 33 | " 0. 0. 0. 0. 0. 0. 0.\n", 34 | " 0. 0. 0. 0. 0. 0. 0.\n", 35 | " 0. 0. 0. 0. 0. 0. 0.\n", 36 | " 0. 0. 0. 0. 0. 0. 0.\n", 37 | " 0. 0. 0. 0. 0. 0. 0.\n", 38 | " 0. 0. 0. 0. 0. 0. 0.\n", 39 | " 0. 0. 0. 0. 0. 0. 0.\n", 40 | " 0. 0. 0. 0. 0. 0. 0.\n", 41 | " 0. 0. 0. 0. 0. 0.01171875\n", 42 | " 0.0703125 0.0703125 0.0703125 0.4921875 0.53125 0.68359375\n", 43 | " 0.1015625 0.6484375 0.99609375 0.96484375 0.49609375 0. 0.\n", 44 | " 0. 0. 0. 0. 0. 0. 0.\n", 45 | " 0. 0. 0. 0.1171875 0.140625 0.3671875\n", 46 | " 0.6015625 0.6640625 0.98828125 0.98828125 0.98828125 0.98828125\n", 47 | " 0.98828125 0.87890625 0.671875 0.98828125 0.9453125 0.76171875\n", 48 | " 0.25 0. 0. 0. 0. 0. 0.\n", 49 | " 0. 0. 0. 0. 0. 0.19140625\n", 50 | " 0.9296875 0.98828125 0.98828125 0.98828125 0.98828125 0.98828125\n", 51 | " 0.98828125 0.98828125 0.98828125 0.98046875 0.36328125 0.3203125\n", 52 | " 0.3203125 0.21875 0.15234375 0. 0. 0. 0.\n", 53 | " 0. 0. 0. 0. 0. 0. 0.\n", 54 | " 0. 0.0703125 0.85546875 0.98828125 0.98828125 0.98828125\n", 55 | " 0.98828125 0.98828125 0.7734375 0.7109375 0.96484375 0.94140625\n", 56 | " 0. 0. 0. 0. 0. 0. 0.\n", 57 | " 0. 0. 0. 0. 0. 0. 0.\n", 58 | " 0. 0. 0. 0. 0.3125 0.609375\n", 59 | " 0.41796875 0.98828125 0.98828125 0.80078125 0.04296875 0.\n", 60 | " 0.16796875 0.6015625 0. 0. 0. 0. 0.\n", 61 | " 0. 0. 0. 0. 0. 0. 0.\n", 62 | " 0. 0. 0. 0. 0. 0. 0.\n", 63 | " 0.0546875 0.00390625 0.6015625 0.98828125 0.3515625 0. 0.\n", 64 | " 0. 0. 0. 0. 0. 0. 0.\n", 65 | " 0. 0. 0. 0. 0. 0. 0.\n", 66 | " 0. 0. 0. 0. 0. 0. 0.\n", 67 | " 0. 0. 0.54296875 0.98828125 0.7421875 0.0078125 0.\n", 68 | " 0. 0. 0. 0. 0. 0. 0.\n", 69 | " 0. 0. 0. 0. 0. 0. 0.\n", 70 | " 0. 0. 0. 0. 0. 0. 0.\n", 71 | " 0. 0. 0.04296875 0.7421875 0.98828125 0.2734375 0.\n", 72 | " 0. 0. 0. 0. 0. 0. 0.\n", 73 | " 0. 0. 0. 0. 0. 0. 0.\n", 74 | " 0. 0. 0. 0. 0. 0. 0.\n", 75 | " 0. 0. 0. 0.13671875 0.94140625 0.87890625\n", 76 | " 0.625 0.421875 0.00390625 0. 0. 0. 0.\n", 77 | " 0. 0. 0. 0. 0. 0. 0.\n", 78 | " 0. 0. 0. 0. 0. 0. 0.\n", 79 | " 0. 0. 0. 0. 0. 0.31640625\n", 80 | " 0.9375 0.98828125 0.98828125 0.46484375 0.09765625 0. 0.\n", 81 | " 0. 0. 0. 0. 0. 0. 0.\n", 82 | " 0. 0. 0. 0. 0. 0. 0.\n", 83 | " 0. 0. 0. 0. 0. 0. 0.\n", 84 | " 0.17578125 0.7265625 0.98828125 0.98828125 0.5859375 0.10546875\n", 85 | " 0. 0. 0. 0. 0. 0. 0.\n", 86 | " 0. 0. 0. 0. 0. 0. 0.\n", 87 | " 0. 0. 0. 0. 0. 0. 0.\n", 88 | " 0. 0. 0.0625 0.36328125 0.984375 0.98828125\n", 89 | " 0.73046875 0. 0. 0. 0. 0. 0.\n", 90 | " 0. 0. 0. 0. 0. 0. 0.\n", 91 | " 0. 0. 0. 0. 0. 0. 0.\n", 92 | " 0. 0. 0. 0. 0. 0.97265625\n", 93 | " 0.98828125 0.97265625 0.25 0. 0. 0. 0.\n", 94 | " 0. 0. 0. 0. 0. 0. 0.\n", 95 | " 0. 0. 0. 0. 0. 0. 0.\n", 96 | " 0. 0. 0. 0.1796875 0.5078125 0.71484375\n", 97 | " 0.98828125 0.98828125 0.80859375 0.0078125 0. 0. 0.\n", 98 | " 0. 0. 0. 0. 0. 0. 0.\n", 99 | " 0. 0. 0. 0. 0. 0. 0.\n", 100 | " 0. 0. 0.15234375 0.578125 0.89453125 0.98828125\n", 101 | " 0.98828125 0.98828125 0.9765625 0.7109375 0. 0. 0.\n", 102 | " 0. 0. 0. 0. 0. 0. 0.\n", 103 | " 0. 0. 0. 0. 0. 0. 0.\n", 104 | " 0. 0.09375 0.4453125 0.86328125 0.98828125 0.98828125\n", 105 | " 0.98828125 0.98828125 0.78515625 0.3046875 0. 0. 0.\n", 106 | " 0. 0. 0. 0. 0. 0. 0.\n", 107 | " 0. 0. 0. 0. 0. 0. 0.\n", 108 | " 0.08984375 0.2578125 0.83203125 0.98828125 0.98828125 0.98828125\n", 109 | " 0.98828125 0.7734375 0.31640625 0.0078125 0. 0. 0.\n", 110 | " 0. 0. 0. 0. 0. 0. 0.\n", 111 | " 0. 0. 0. 0. 0. 0. 0.0703125\n", 112 | " 0.66796875 0.85546875 0.98828125 0.98828125 0.98828125 0.98828125\n", 113 | " 0.76171875 0.3125 0.03515625 0. 0. 0. 0.\n", 114 | " 0. 0. 0. 0. 0. 0. 0.\n", 115 | " 0. 0. 0. 0. 0. 0.21484375\n", 116 | " 0.671875 0.8828125 0.98828125 0.98828125 0.98828125 0.98828125\n", 117 | " 0.953125 0.51953125 0.04296875 0. 0. 0. 0.\n", 118 | " 0. 0. 0. 0. 0. 0. 0.\n", 119 | " 0. 0. 0. 0. 0. 0. 0.\n", 120 | " 0.53125 0.98828125 0.98828125 0.98828125 0.828125 0.52734375\n", 121 | " 0.515625 0.0625 0. 0. 0. 0. 0.\n", 122 | " 0. 0. 0. 0. 0. 0. 0.\n", 123 | " 0. 0. 0. 0. 0. 0. 0.\n", 124 | " 0. 0. 0. 0. 0. 0. 0.\n", 125 | " 0. 0. 0. 0. 0. 0. 0.\n", 126 | " 0. 0. 0. 0. 0. 0. 0.\n", 127 | " 0. 0. 0. 0. 0. 0. 0.\n", 128 | " 0. 0. 0. 0. 0. 0. 0.\n", 129 | " 0. 0. 0. 0. 0. 0. 0.\n", 130 | " 0. 0. 0. 0. 0. 0. 0.\n", 131 | " 0. 0. 0. 0. 0. 0. 0.\n", 132 | " 0. 0. 0. 0. 0. 0. 0.\n", 133 | " 0. 0. 0. 0. 0. 0. 0.\n", 134 | " 0. 0. 0. 0. 0. 0. 0.\n", 135 | " 0. 0. 0. 0. ]\n", 136 | "Example label:\n", 137 | "5\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "from IPython.display import Image\n", 143 | "import pickle\n", 144 | "\n", 145 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 146 | "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n", 147 | "print \"Shapes:\"\n", 148 | "print train_x.shape, train_y.shape\n", 149 | "print valid_x.shape, valid_y.shape\n", 150 | "print test_x.shape, test_y.shape\n", 151 | "\n", 152 | "print \"--------------\"\n", 153 | "print \"Example input:\"\n", 154 | "print train_x[0]\n", 155 | "print \"Example label:\"\n", 156 | "print train_y[0]\n" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 42, 162 | "metadata": { 163 | "collapsed": false 164 | }, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtQAAAAcCAAAAABkYnfcAAAQtklEQVR4nO1beVxV5dZ+EgQHUJFS\nLBw+FUcKmhyupab3NthgJlamxrW+zMzUrlfDm+ZshqalmUJq95opNpioOXyCQ0maESY4oCiiIIMT\ng4Cw13rPvn+cI3DOXu9RKX99f/D8tfdae717nX2e/b5reDdQgxrUoAY1qEENbgRBpzL+bBdqUIM/\nFIvy6Ls/2wdn/E9Mefs/24cbxpr0rtW0vG8lr7z3D/Xl9+AjW3KLW3qDuHhXSaeJaWsmTvT642/V\nZC/TwcZ//Li/A90LTy5oVE3boPgAvbIX23pWc1w9EswTtWVNLT8/vynvr2/2pa30PUEdcomILrod\nu09OO53q3er9Fp+AVyeJLGp5kelRnVVQp5E2Zmb+Vvqpnj333sCdF5Quc5G8XmSapmn2uQHja6j/\nyksfRpd8Hx0dHT3jfu1VQbHE/+ytUa4tvOsmblgFQ79ILi/f08D9RfUOZLWU5P1KF9S9iXv5BFS9\neixN9tBdGZ5ocOIYrdotItT7sqJ5uWkK7jZvMyxqHTMzZ3zNhT/0sl7x4Fmmy7nUrSpNHn7W+Zp3\nNulIHX7F4IdvwG9ntPr0IBF9LKnqrdeSulPk6TPEREREK3yten91rul1b/1+aeEgF1HjXNM0TfOy\n9l2y4gNVCTo0qZV8VTciflEzQt1M9eqN368S/t/xxU2bitQR/SXN7rmn+cvqqI+galu89babudtM\nGlfl7CGiNpoLw+MMgw1DXmNbLCxXX+pv4nOOr74iau42zfW1LNLQS+yAMXTAgK5WatbtcZqYfh5I\nPKmKdMoqp4tuW3ZIF4a9Z8ik7rL4EPO45xd3EXTtl14hPp1MOeKgC7WkjiWia6Smv1j1/kqFaNys\nxC76P4tsZLGZYZrmh+4tW7Sflpe30n6cppQ6HxcXF7ckLlEp1U80CDrF/Ix2uHg1SasDgH9E/If5\nsEX8y4U5fkD7QpoiWgVHbduWzjxrvW2HEPZ479onTAbX4DfyLLOzUzMpv8ovGKghdcNe6VcMI/mw\nhtTDC375+xTWBvIeE5gzOkoaz9Wm6ToDAfA7wczMCd+XFsgjrrLzJDyO11SRpjmT+k71H41DfS8Y\nyS28rfLnc4jjDhHxWouqwdLLRHSsRVuiHsKIDXdqST2a6NzMWTNnxmlJbdOT+uFtfgDw4oXUe6zK\ng2ayaZqttcYA+i65rJj5qP2s9ROtWzviS5/TSrkGNHbMoI136gd8TvtQAfQctc4gIip3nZD/6vif\npqt00fAtZi75dyazGiJoI0vdhDzdfmIiopVVZTOJLlWEV/UTiKQXsf9mgw3DGBYukrp2ROmshriX\ntY/iIWZ+QtQsMkVSo/9no5gT66FTlGh23yXm+Lc5855nbFXpd8qZ1FvUZNmfHpmGMcwq9uhWSPG9\nPet/TzzeogwnIkoNhIbUzU4yvSuvYh6BgU0BwPcM0ddCQO6vVDfZTwDHuAcApNieFZRhSaZpmuJs\nYcdn+5VSBUuGC+/vYKVKH5BsEkrS2upHRKC6KqddAbvOnCkg/pmIiDJclE+k2mfOu1W2NOdOLeYV\nc29HSC7nCq56ZW/R++OfTDlLH19DR6vGoa8Q0eZGjpNQEkk9hJltzPxyOLPwx43g0QDeydbdt+Wv\nzNvrSJrXSjSkhi+ieLBuwJBLRBvr94u4HeCiygLI3cXOpE5QmsJKNHOcIA4n2uILDCHKuN2i3EyU\ntqY58JRMakxmojd1DgMAwoqIFgpyf6X0hr9SXwAhhXKQG3DINM2vdbaNo9SFA8+1ay6oai8tUSpU\nMnqGaY67VDDQpkZI8r7pRETUrnG73qeJXFno7cib2in1umAcqdIDgDbr1JU3BO3kIjdFrr30PYC2\n54uqrmS1JhPRtdi/2XEp/BhywShKyzWMogHiTO2XF+MBtEjTkjqZ+fJfJcXwMjNRQ2ogkuM1yUHQ\nas49ONB+zLS6Qv6OciJ1k2wVKNr7s3H+Eat4JtPHvgCOEgkhZbOp3e8AgFc1pMb1SP1CHBFJ01TD\ny2qBzmiGkXI7UG8N7fUUtEM+UKZpjtMZL+SP6suaR1YoVTZCmBTRcB7TBAAYM2+eaBqo1GhJvp2I\nit98AMASojR/jUueKWq+IO6Swv+u1yyWL4yTjH7YqhkNAHbQCABtz2c5xQkNjhElOaLzUBJI3Z8N\nYwfCDWM0JFJ7HE3xA/CDitTdVzHPcpX59Hhl6SXTfKuNltT14vlvosIrlvIfbeyYTZh+qFCsVO9U\nvW6VOtZQsm+ZyIaQrkzh0u/qAN5PF/M02SMAwHIdqW3Mbkj9UkopEf0irlexWlIH5pT2BLCMzgjK\nDkcN09TG1HWnnXr6GYm2AB40lFJXn5DeE5+dNu4KvD3uFLNNiic1pP5bIdEpe8IQSyQR144kkdRe\n0ZzZP51ZfF0eMjoA6NVJHjCOB3l3ioxPDnYWLyeiEAC1R4/eI5A6PJ+LdnRE+C+jPVB/r5XUL3BP\nAIONwiaa3/Ghjbdbnm7HVNPMjwrybKUlNVoXZHwu0aQbUWWJ2ZnUlTVb30GxV9VL4rivG7zVWi5t\nmEPfAWiznyimnsajtyImRfxMe6zVGrsnmpm65eTdu3czEV16TV7XtaQOPkELAIwvI2nFH3DVNE3T\nNBeJtrN5jYbSwDx7Ue/nycEW1ZPE6W0R8i1R4RH+SYg1NaTeRrSnDwA0GpzvOJLgdVRNleTzmVlx\nlLiyLj3khfCLqnSUOGIO7dtPNNBV/CoRjUT38VOJiCjFtaRy0uAIAG2aAEC8ldRbjngCTXOtk7ED\nn2SpJGuICu/g4BYAWpnm3zWGeDafeaI1K0ngKr01G/9YcbxShQG4J3T8wiUFRXkbC0gsx/TPN3YL\n798dRM3vmLi3gMl4SvSm7gMbmW3MmZpqg47UwaeulfQ2yIaIlQsKHuFs432TvAL2l68Q7caUmqY2\nprappzW3A7pvznPUqufd4azxGU2Z0xG0mnO/CO3JRyRS20RSP5cYby+2RxD9pi+7t1OqK+Df512X\nKu1bzKw2Bok25QNRO32gz/Mlj0naw8XEVGhNlr+wl4TtTS92qSeH5HGVs53WRFG9C/ju5U80lcQH\ns1iNlFUA4J9trtcqg7czL3FdA58sobGVZ0yLK46X8MWkpCRW5fkJC166yzO3XBqzJTOvFOQNs4mJ\n6MxZEnMDzwfPUlHmukKi7H/KHVAtqdMrHq5cAUKsypfEQ4g4lWifxiMATwwePKxAQ+r9fEbMY+xo\nfu/j0ayUUjud85bHiaagSSzlL/bqfDR/sWCpi6kdeKqUrkrJHgDAq/VwpQ4uT8pQBZ87KWp9pZTa\nKBt1Uv3R9VMAi3aL+q4DmT63ikPtfQFHfyDaSdf5pGFUngnhRx8VjEdPq+O6/udSpQ430+gAIMEc\no1c2HErs2nQIo3MVs7fXHN5eJReauGHDhg0bhttrHq+pNGnITw3DEPuMXc5z6gcdm+4iKRao/TTR\n5L/A7yAR0fNio9zGvE78ES3+dX/nzp07LyDSkHqcSOrnjdLs3iFxREzGWW0t+rZpZpq1sdylNvym\nckEHnRUA4KV9Sik1wUk2kQjYS9QT3YjETDFQKXf7C5joNUFcp8WAuQcOpCillJGe/t79rZzVXzEz\nx8oj9lEd4NMYQEeWLwhmEqZ4B6lTj82fYiF1vFGV1EKiGFxy5OzVApuOmWMNZnecRoI2pgYAlHGZ\nS488jCrq914zKEPbJI5RcwVpyEnD0NbAADxsk7IVzzlEmxri9gNcOu0boq2PhIZarmEiclMyRgMt\nqZ9TxcI6H39yOICOPxIT6fsd3qZ51DUSDUg8PwTwZ+7uxh0AHruUUs6NgDn8LUKyeCyCTvFY0ShQ\nKTcVv9k2ZuuUUWfuEaWUys8sVyrKWp5r9i/FBz7jffKQfZSjce6rIfWLNtaQOu+XFx1HFlKPqzhp\nn2ecsISjw3avefpMnCYpCUzh8k9klQMJStjY4cDd07cwJ7kU9sLoI8dRyGr6Rj9wjNjSyDOMHzV1\nLgDAo0zWBKDW+1TwRiM8sI+O9YbvY6sKqPLFqsAnJJehr2GQltTPqBLhXxkTCAAPX6ZBHTvqm8SR\npvlPV1l28ZsAZvI2N71lAMB8pZRzm2kOfYOQs7TiTN7+O+XH5JbUtbcwjbIWYrep0o0f9+lxF46p\nNGHUoawifIaynDigr81B6qfEIA3oz3HCJijfFbs+tefBoUSU0aiqLr5K7619KmcLzVrgozKxNwW0\nOcIszZd2BHXpcvedYafC7mi5Stpr0G5xFjOXf+8iHsQZ9oO3L7Gbjq2G1GwYur06jgsEUo+kwhf8\nHl93habYJ8UXN22yNt1Gi6T27Oeo4g0v1JIaR9QSjabBYj4uyf1j7ZWdZgVCSS+iWCmVqtI1LYuA\nKY61sdYOpcofctJ1I+r2ej4R5+p8DVRKGwvVfY14lVBGtZ0KBQCPuVdyBJ96XeYnvVueYHlXSMVM\n7blZLvN02JhjqX04IZSIyGku3snsmPXrf8t8QoxGe/F0zXj9mFnY7QAAXh0/LDbNsoumaeblm9Ya\nd9O3TzIz77fk8GFU9nFIYFhsBqevcbcVO8YmdMJX2sSeaCXEmTqbihOPEdG7cjHPgePMNtf/+6Et\nFAgAfkMuExX11lguLNBV3yIoW5wXV5vHerXF/YOTTHOetfY9fnVubt6mdrK3TX9T9vynyVyl1G/O\nyvsK7amVmCMCcJso+qwlektqmKlfPQB4b1Sl0qo8S+2E55t5SpNfBmSNBADPzw6LHZ0GGfQPna92\ntMokouVVd5f2uWAYO+PHPBa/c69RFCHv5Dx3VregD2WOayRqmq41TdM8l7U9MjIyMjLStWDa5JHD\nzMwJz1qfURgRZR0loh91r5IdMSrcIgvJ5NJ52vItALwhkTqJiCh2fBv3227XE7ErqQ8SLZo9e/bs\nA0y04zmd5cJ8Te+0xUljmqjonmCa6ZsLTVMdcRdMSVirVEgdoM57BUrZCl2Tvn5xTLR8rD4XrJ2i\nJXUHolRRcUyt2DA9/DD9FCppZ3K8ZxhfkHf5ABhVMtI39OXU3+StRcvoC62vDnTJIiKn3kPPCwYb\nhmGwsUOY+QDg/nKpnQ8AOM2sWRreNk1zU2/N5wF+X51gZv6hv9SAu+snIibK/Uh3UwdihE1ovQwW\nSyKVCLaxldQ+QxdENNH4WonHZVI7SkvnlulfpoVqgKw4LhWrAADz37B3Xtx/JSHhf5VSifHxiUop\nVXgzXxg4cEBpyhTto0kqbAPAjNKysrKvxDozsIxjdjHLzQEAwKgS5vzp8vPvW1yk3yZ7DQ/kVO3X\nAcCd09gwDCNrraZo5/1ruq4D1+m8LlBCq0Nbh2tUXb4+w8x8ZZZm2ICpxDTf3U4yAECMrTqkxnGq\n7udlLZKtpA5dbi8sJX1sbd5V4lypZtfuJNKwHfCeMOFL08y/76bdbPXltc8Eyj+Qto1fD9HK8l2Z\nHauJtBVqdxjLrC5MEzcQXBctLxZrn1AVDP51hiuZXk4xkodpQmNgJGv/r1eu8sQbda8S7zNzypyZ\n4r6Nm0C4MFM33X1dUodTnLvC3M3Ca8R5+nrEdb5sWfvbrf280RleL0SN37MnKuqF0GqZt/xJXpg7\nbaQl2q/n3KHRhKKd46rlCuosppjqWV4HR5L0oWbG+dBbcs9bCN+ttE638tTADebSyWpx+vfgDf7x\nFnx3DCCnWmvO/1/4LnLbQ6mBBn2kDbu3Fg9mTqvmV8A1qEENfgf+C/434fbpcuVtAAAAAElFTkSu\nQmCC\n", 169 | "text/plain": [ 170 | "" 171 | ] 172 | }, 173 | "execution_count": 42, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n", 180 | "from utils import tile_raster_images\n", 181 | "from PIL import Image as pil_img\n", 182 | "\n", 183 | "input_images = train_x[:25]\n", 184 | "im = pil_img.fromarray(\n", 185 | " tile_raster_images(input_images, \n", 186 | " img_shape=(28, 28), \n", 187 | " tile_shape=(1, 25),\n", 188 | " tile_spacing=(1, 1))\n", 189 | ")\n", 190 | "im.save(\"some_mnist_numbers.png\")\n", 191 | "Image(filename=\"some_mnist_numbers.png\")\n" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 43, 197 | "metadata": { 198 | "collapsed": false 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "# Your basic Theano imports.\n", 203 | "import theano\n", 204 | "import theano.tensor as T\n", 205 | "\n", 206 | "x = T.matrix('x')\n" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 44, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "# Compute the hidden layer from the input\n", 218 | "import numpy\n", 219 | "import numpy.random as rng\n", 220 | "\n", 221 | "i = numpy.sqrt(6. / (784+500))\n", 222 | "# W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n", 223 | "W_x = numpy.asarray(rng.uniform(low=-i, high=i, size=(28*28, 500)), dtype=theano.config.floatX)\n", 224 | "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n", 225 | "\n", 226 | "W_x = theano.shared(W_x, name=\"W_x\")\n", 227 | "b_h = theano.shared(b_h, name=\"b_h\")\n", 228 | "\n", 229 | "h = T.tanh(\n", 230 | " T.dot(x, W_x) + b_h\n", 231 | ")\n" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 45, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "# Compute the output class probabilities from the hidden layer\n", 243 | "i = numpy.sqrt(6. / (510))\n", 244 | "# W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n", 245 | "W_h = numpy.asarray(rng.uniform(low=-i, high=i, size=(500, 10)), dtype=theano.config.floatX)\n", 246 | "b_y = numpy.zeros(shape=(10,), dtype=\"float32\")\n", 247 | "\n", 248 | "W_h = theano.shared(W_h, name=\"W_h\")\n", 249 | "b_y = theano.shared(b_y, name=\"b_y\")\n", 250 | "\n", 251 | "y = T.nnet.softmax(\n", 252 | " T.dot(h, W_h) + b_y\n", 253 | ")\n", 254 | "\n", 255 | "# The actual predicted label\n", 256 | "y_hat = T.argmax(y, axis=1)\n" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 46, 262 | "metadata": { 263 | "collapsed": false 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "# Find cost compared to correct labels\n", 268 | "correct_labels = T.ivector(\"labels\")\n", 269 | "\n", 270 | "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n", 271 | "cost = -T.mean(log_likelihood)\n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 47, 277 | "metadata": { 278 | "collapsed": false 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "# Compute gradient updates for the parameters\n", 283 | "parameters = [W_x, b_h, W_h, b_y]\n", 284 | "gradients = T.grad(cost, parameters)\n", 285 | "\n", 286 | "learning_rate = 0.01\n", 287 | "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 48, 293 | "metadata": { 294 | "collapsed": false 295 | }, 296 | "outputs": [], 297 | "source": [ 298 | "# Compile function for training (changes parameters via updates) and testing (no updates)\n", 299 | "f_train = theano.function(\n", 300 | " inputs=[x, correct_labels], \n", 301 | " outputs=cost, \n", 302 | " updates=train_updates, \n", 303 | " allow_input_downcast=True\n", 304 | ")\n", 305 | "\n", 306 | "f_test = theano.function(\n", 307 | " inputs=[x], \n", 308 | " outputs=y_hat, \n", 309 | " allow_input_downcast=True\n", 310 | ")\n" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 49, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [ 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "text": [ 324 | "1 : cost: 0.910093 \ttrain: 79.788% \tvalid: 88.05% \ttest: 87.49%\n", 325 | "2 : cost: 0.492869 \ttrain: 87.558% \tvalid: 89.6% \ttest: 88.99%\n", 326 | "3 : cost: 0.419445 \ttrain: 88.848% \tvalid: 90.26% \ttest: 89.9%\n", 327 | "saving filters...\n", 328 | "4 : cost: 0.38414 \ttrain: 89.536% \tvalid: 90.67% \ttest: 90.37%\n", 329 | "5 : cost: 0.362308 \ttrain: 89.99% \tvalid: 91.1% \ttest: 90.69%\n", 330 | "6 : cost: 0.346996 \ttrain: 90.424% \tvalid: 91.34% \ttest: 90.95%\n", 331 | "saving filters...\n", 332 | "7 : cost: 0.335397 \ttrain: 90.724% \tvalid: 91.51% \ttest: 91.15%\n", 333 | "8 : cost: 0.326137 \ttrain: 90.982% \tvalid: 91.76% \ttest: 91.42%\n", 334 | "9 : cost: 0.318454 \ttrain: 91.17% \tvalid: 91.89% \ttest: 91.65%\n", 335 | "saving filters...\n", 336 | "10 : cost: 0.311887 \ttrain: 91.354% \tvalid: 92.02% \ttest: 91.8%\n", 337 | "11 : cost: 0.306141 \ttrain: 91.554% \tvalid: 92.08% \ttest: 91.98%\n", 338 | "12 : cost: 0.301014 \ttrain: 91.684% \tvalid: 92.2% \ttest: 92.04%\n", 339 | "saving filters...\n", 340 | "13 : cost: 0.296363 \ttrain: 91.804% \tvalid: 92.22% \ttest: 92.08%\n", 341 | "14 : cost: 0.292087 \ttrain: 91.922% \tvalid: 92.22% \ttest: 92.17%\n", 342 | "15 : cost: 0.288107 \ttrain: 92.034% \tvalid: 92.31% \ttest: 92.23%\n", 343 | "saving filters...\n", 344 | "16 : cost: 0.284364 \ttrain: 92.126% \tvalid: 92.35% \ttest: 92.32%\n", 345 | "17 : cost: 0.280813 \ttrain: 92.228% \tvalid: 92.4% \ttest: 92.39%\n", 346 | "18 : cost: 0.277417 \ttrain: 92.332% \tvalid: 92.45% \ttest: 92.44%\n", 347 | "saving filters...\n", 348 | "19 : cost: 0.274148 \ttrain: 92.442% \tvalid: 92.52% \ttest: 92.48%\n", 349 | "20 : cost: 0.270981 \ttrain: 92.53% \tvalid: 92.6% \ttest: 92.57%\n", 350 | "21 : cost: 0.267898 \ttrain: 92.618% \tvalid: 92.64% \ttest: 92.67%\n", 351 | "saving filters...\n", 352 | "22 : cost: 0.264884 \ttrain: 92.734% \tvalid: 92.73% \ttest: 92.72%\n", 353 | "23 : cost: 0.261926 \ttrain: 92.814% \tvalid: 92.85% \ttest: 92.8%\n", 354 | "24 : cost: 0.259013 \ttrain: 92.89% \tvalid: 92.93% \ttest: 92.93%\n", 355 | "saving filters...\n", 356 | "25 : cost: 0.256139 \ttrain: 92.984% \tvalid: 93.05% \ttest: 92.95%\n", 357 | "26 : cost: 0.253295 \ttrain: 93.068% \tvalid: 93.16% \ttest: 92.98%\n", 358 | "27 : cost: 0.250478 \ttrain: 93.148% \tvalid: 93.22% \ttest: 93.08%\n", 359 | "saving filters...\n", 360 | "28 : cost: 0.247683 \ttrain: 93.214% \tvalid: 93.29% \ttest: 93.14%\n", 361 | "29 : cost: 0.244908 \ttrain: 93.292% \tvalid: 93.42% \ttest: 93.14%\n", 362 | "30 : cost: 0.24215 \ttrain: 93.374% \tvalid: 93.53% \ttest: 93.21%\n", 363 | "saving filters...\n", 364 | "31 : cost: 0.239409 \ttrain: 93.462% \tvalid: 93.61% \ttest: 93.26%\n", 365 | "32 : cost: 0.236685 \ttrain: 93.538% \tvalid: 93.72% \ttest: 93.36%\n", 366 | "33 : cost: 0.233977 \ttrain: 93.642% \tvalid: 93.79% \ttest: 93.39%\n", 367 | "saving filters...\n", 368 | "34 : cost: 0.231286 \ttrain: 93.712% \tvalid: 93.86% \ttest: 93.43%\n", 369 | "35 : cost: 0.228612 \ttrain: 93.782% \tvalid: 93.94% \ttest: 93.47%\n", 370 | "36 : cost: 0.225956 \ttrain: 93.858% \tvalid: 94.06% \ttest: 93.55%\n", 371 | "saving filters...\n", 372 | "37 : cost: 0.223321 \ttrain: 93.916% \tvalid: 94.19% \ttest: 93.62%\n", 373 | "38 : cost: 0.220706 \ttrain: 94.0% \tvalid: 94.35% \ttest: 93.72%\n", 374 | "39 : cost: 0.218114 \ttrain: 94.068% \tvalid: 94.43% \ttest: 93.84%\n", 375 | "saving filters...\n", 376 | "40 : cost: 0.215546 \ttrain: 94.152% \tvalid: 94.48% \ttest: 93.89%\n", 377 | "41 : cost: 0.213002 \ttrain: 94.232% \tvalid: 94.54% \ttest: 93.96%\n", 378 | "42 : cost: 0.210484 \ttrain: 94.286% \tvalid: 94.61% \ttest: 94.05%\n", 379 | "saving filters...\n", 380 | "43 : cost: 0.207995 \ttrain: 94.368% \tvalid: 94.69% \ttest: 94.09%\n", 381 | "44 : cost: 0.205533 \ttrain: 94.46% \tvalid: 94.75% \ttest: 94.15%\n", 382 | "45 : cost: 0.203102 \ttrain: 94.514% \tvalid: 94.81% \ttest: 94.22%\n", 383 | "saving filters...\n", 384 | "46 : cost: 0.2007 \ttrain: 94.576% \tvalid: 94.91% \ttest: 94.29%\n", 385 | "47 : cost: 0.19833 \ttrain: 94.64% \tvalid: 94.94% \ttest: 94.38%\n", 386 | "48 : cost: 0.195992 \ttrain: 94.71% \tvalid: 94.99% \ttest: 94.45%\n", 387 | "saving filters...\n", 388 | "49 : cost: 0.193687 \ttrain: 94.764% \tvalid: 95.02% \ttest: 94.49%\n", 389 | "50 : cost: 0.191414 \ttrain: 94.826% \tvalid: 95.06% \ttest: 94.53%\n", 390 | "51 : cost: 0.189174 \ttrain: 94.872% \tvalid: 95.13% \ttest: 94.56%\n", 391 | "saving filters...\n", 392 | "52 : cost: 0.186968 \ttrain: 94.924% \tvalid: 95.18% \ttest: 94.61%\n", 393 | "53 : cost: 0.184796 \ttrain: 94.982% \tvalid: 95.18% \ttest: 94.65%\n", 394 | "54 : cost: 0.182657 \ttrain: 95.036% \tvalid: 95.24% \ttest: 94.68%\n", 395 | "saving filters...\n", 396 | "55 : cost: 0.180553 \ttrain: 95.088% \tvalid: 95.3% \ttest: 94.68%\n", 397 | "56 : cost: 0.178481 \ttrain: 95.134% \tvalid: 95.34% \ttest: 94.78%\n", 398 | "57 : cost: 0.176444 \ttrain: 95.194% \tvalid: 95.38% \ttest: 94.83%\n", 399 | "saving filters...\n", 400 | "58 : cost: 0.174439 \ttrain: 95.24% \tvalid: 95.41% \ttest: 94.9%\n", 401 | "59 : cost: 0.172468 \ttrain: 95.288% \tvalid: 95.4% \ttest: 94.98%\n", 402 | "60 : cost: 0.170529 \ttrain: 95.358% \tvalid: 95.44% \ttest: 95.01%\n", 403 | "saving filters...\n", 404 | "61 : cost: 0.168623 \ttrain: 95.422% \tvalid: 95.5% \ttest: 95.02%\n", 405 | "62 : cost: 0.166749 \ttrain: 95.47% \tvalid: 95.54% \ttest: 95.05%\n", 406 | "63 : cost: 0.164906 \ttrain: 95.518% \tvalid: 95.62% \ttest: 95.07%\n", 407 | "saving filters...\n", 408 | "64 : cost: 0.163095 \ttrain: 95.568% \tvalid: 95.67% \ttest: 95.13%\n", 409 | "65 : cost: 0.161314 \ttrain: 95.612% \tvalid: 95.71% \ttest: 95.19%\n", 410 | "66 : cost: 0.159564 \ttrain: 95.666% \tvalid: 95.72% \ttest: 95.21%\n", 411 | "saving filters...\n", 412 | "67 : cost: 0.157843 \ttrain: 95.706% \tvalid: 95.76% \ttest: 95.27%\n", 413 | "68 : cost: 0.156151 \ttrain: 95.756% \tvalid: 95.83% \ttest: 95.31%\n", 414 | "69 : cost: 0.154488 \ttrain: 95.806% \tvalid: 95.87% \ttest: 95.31%\n", 415 | "saving filters...\n", 416 | "70 : cost: 0.152853 \ttrain: 95.854% \tvalid: 95.88% \ttest: 95.36%\n", 417 | "71 : cost: 0.151246 \ttrain: 95.904% \tvalid: 95.94% \ttest: 95.38%\n", 418 | "72 : cost: 0.149666 \ttrain: 95.964% \tvalid: 95.96% \ttest: 95.46%\n", 419 | "saving filters...\n", 420 | "73 : cost: 0.148112 \ttrain: 96.016% \tvalid: 95.99% \ttest: 95.5%\n", 421 | "74 : cost: 0.146585 \ttrain: 96.072% \tvalid: 96.01% \ttest: 95.56%\n", 422 | "75 : cost: 0.145083 \ttrain: 96.138% \tvalid: 96.05% \ttest: 95.6%\n", 423 | "saving filters...\n", 424 | "76 : cost: 0.143606 \ttrain: 96.18% \tvalid: 96.09% \ttest: 95.64%\n", 425 | "77 : cost: 0.142153 \ttrain: 96.224% \tvalid: 96.1% \ttest: 95.68%\n", 426 | "78 : cost: 0.140724 \ttrain: 96.254% \tvalid: 96.12% \ttest: 95.74%\n", 427 | "saving filters...\n", 428 | "79 : cost: 0.139319 \ttrain: 96.306% \tvalid: 96.14% \ttest: 95.77%\n", 429 | "80 : cost: 0.137937 \ttrain: 96.348% \tvalid: 96.15% \ttest: 95.77%\n", 430 | "81 : cost: 0.136577 \ttrain: 96.384% \tvalid: 96.16% \ttest: 95.81%\n", 431 | "saving filters...\n", 432 | "82 : cost: 0.13524 \ttrain: 96.42% \tvalid: 96.22% \ttest: 95.86%\n", 433 | "83 : cost: 0.133923 \ttrain: 96.468% \tvalid: 96.24% \ttest: 95.87%\n", 434 | "84 : cost: 0.132628 \ttrain: 96.502% \tvalid: 96.3% \ttest: 95.93%\n", 435 | "saving filters...\n", 436 | "85 : cost: 0.131354 \ttrain: 96.544% \tvalid: 96.33% \ttest: 95.96%\n", 437 | "86 : cost: 0.130099 \ttrain: 96.58% \tvalid: 96.36% \ttest: 96.03%\n", 438 | "87 : cost: 0.128864 \ttrain: 96.608% \tvalid: 96.38% \ttest: 96.07%\n", 439 | "saving filters...\n", 440 | "88 : cost: 0.127649 \ttrain: 96.658% \tvalid: 96.42% \ttest: 96.09%\n", 441 | "89 : cost: 0.126453 \ttrain: 96.688% \tvalid: 96.43% \ttest: 96.1%\n", 442 | "90 : cost: 0.125275 \ttrain: 96.71% \tvalid: 96.44% \ttest: 96.14%\n", 443 | "saving filters...\n", 444 | "91 : cost: 0.124115 \ttrain: 96.742% \tvalid: 96.44% \ttest: 96.16%\n", 445 | "92 : cost: 0.122972 \ttrain: 96.79% \tvalid: 96.5% \ttest: 96.18%\n", 446 | "93 : cost: 0.121848 \ttrain: 96.822% \tvalid: 96.51% \ttest: 96.18%\n", 447 | "saving filters...\n", 448 | "94 : cost: 0.120739 \ttrain: 96.854% \tvalid: 96.52% \ttest: 96.2%\n", 449 | "95 : cost: 0.119648 \ttrain: 96.892% \tvalid: 96.53% \ttest: 96.22%\n", 450 | "96 : cost: 0.118573 \ttrain: 96.93% \tvalid: 96.56% \ttest: 96.26%\n", 451 | "saving filters...\n", 452 | "97 : cost: 0.117514 \ttrain: 96.954% \tvalid: 96.58% \ttest: 96.29%\n", 453 | "98 : cost: 0.11647 \ttrain: 96.97% \tvalid: 96.58% \ttest: 96.3%\n", 454 | "99 : cost: 0.115442 \ttrain: 96.994% \tvalid: 96.6% \ttest: 96.33%\n", 455 | "saving filters...\n", 456 | "100 : cost: 0.114428 \ttrain: 97.022% \tvalid: 96.61% \ttest: 96.33%\n", 457 | "101 : cost: 0.113429 \ttrain: 97.062% \tvalid: 96.62% \ttest: 96.33%\n", 458 | "102 : cost: 0.112444 \ttrain: 97.092% \tvalid: 96.61% \ttest: 96.36%\n", 459 | "saving filters...\n", 460 | "103 : cost: 0.111474 \ttrain: 97.136% \tvalid: 96.64% \ttest: 96.37%\n", 461 | "104 : cost: 0.110517 \ttrain: 97.154% \tvalid: 96.69% \ttest: 96.39%\n", 462 | "105 : cost: 0.109573 \ttrain: 97.18% \tvalid: 96.72% \ttest: 96.4%\n", 463 | "saving filters...\n", 464 | "106 : cost: 0.108643 \ttrain: 97.208% \tvalid: 96.73% \ttest: 96.44%\n", 465 | "107 : cost: 0.107725 \ttrain: 97.246% \tvalid: 96.74% \ttest: 96.45%\n", 466 | "108 : cost: 0.10682 \ttrain: 97.27% \tvalid: 96.74% \ttest: 96.48%\n", 467 | "saving filters...\n", 468 | "109 : cost: 0.105928 \ttrain: 97.288% \tvalid: 96.76% \ttest: 96.5%\n", 469 | "110 : cost: 0.105048 \ttrain: 97.322% \tvalid: 96.78% \ttest: 96.53%\n", 470 | "111 : cost: 0.104179 \ttrain: 97.338% \tvalid: 96.8% \ttest: 96.56%\n", 471 | "saving filters...\n", 472 | "112 : cost: 0.103322 \ttrain: 97.364% \tvalid: 96.8% \ttest: 96.59%\n", 473 | "113 : cost: 0.102477 \ttrain: 97.386% \tvalid: 96.8% \ttest: 96.63%\n", 474 | "114 : cost: 0.101643 \ttrain: 97.41% \tvalid: 96.81% \ttest: 96.64%\n", 475 | "saving filters...\n", 476 | "115 : cost: 0.10082 \ttrain: 97.438% \tvalid: 96.82% \ttest: 96.65%\n", 477 | "116 : cost: 0.100007 \ttrain: 97.46% \tvalid: 96.84% \ttest: 96.66%\n", 478 | "117 : cost: 0.0992054 \ttrain: 97.476% \tvalid: 96.85% \ttest: 96.67%\n", 479 | "saving filters...\n", 480 | "118 : cost: 0.0984139 \ttrain: 97.498% \tvalid: 96.87% \ttest: 96.68%\n", 481 | "119 : cost: 0.0976325 \ttrain: 97.532% \tvalid: 96.89% \ttest: 96.68%\n", 482 | "120 : cost: 0.0968612 \ttrain: 97.546% \tvalid: 96.89% \ttest: 96.69%\n", 483 | "saving filters...\n", 484 | "121 : cost: 0.0960996 \ttrain: 97.558% \tvalid: 96.9% \ttest: 96.7%\n", 485 | "122 : cost: 0.0953476 \ttrain: 97.592% \tvalid: 96.91% \ttest: 96.7%\n", 486 | "123 : cost: 0.094605 \ttrain: 97.612% \tvalid: 96.92% \ttest: 96.72%\n", 487 | "saving filters...\n", 488 | "124 : cost: 0.0938718 \ttrain: 97.63% \tvalid: 96.94% \ttest: 96.74%\n", 489 | "125 : cost: 0.0931474 \ttrain: 97.652% \tvalid: 96.95% \ttest: 96.74%\n", 490 | "126 : cost: 0.0924321 \ttrain: 97.668% \tvalid: 96.98% \ttest: 96.74%\n", 491 | "saving filters...\n", 492 | "127 : cost: 0.0917255 \ttrain: 97.684% \tvalid: 96.99% \ttest: 96.74%\n", 493 | "128 : cost: 0.0910275 \ttrain: 97.706% \tvalid: 97.0% \ttest: 96.77%\n", 494 | "129 : cost: 0.0903379 \ttrain: 97.724% \tvalid: 97.01% \ttest: 96.79%\n", 495 | "saving filters...\n", 496 | "130 : cost: 0.0896565 \ttrain: 97.736% \tvalid: 97.02% \ttest: 96.79%\n", 497 | "131 : cost: 0.0889833 \ttrain: 97.756% \tvalid: 97.02% \ttest: 96.79%\n", 498 | "132 : cost: 0.0883181 \ttrain: 97.772% \tvalid: 97.03% \ttest: 96.8%\n", 499 | "saving filters...\n", 500 | "133 : cost: 0.0876607 \ttrain: 97.786% \tvalid: 97.07% \ttest: 96.8%\n", 501 | "134 : cost: 0.087011 \ttrain: 97.804% \tvalid: 97.08% \ttest: 96.81%\n", 502 | "135 : cost: 0.0863689 \ttrain: 97.812% \tvalid: 97.09% \ttest: 96.82%\n", 503 | "saving filters...\n", 504 | "136 : cost: 0.0857343 \ttrain: 97.824% \tvalid: 97.12% \ttest: 96.83%\n", 505 | "137 : cost: 0.085107 \ttrain: 97.842% \tvalid: 97.12% \ttest: 96.86%\n", 506 | "138 : cost: 0.0844868 \ttrain: 97.86% \tvalid: 97.13% \ttest: 96.88%\n", 507 | "saving filters...\n", 508 | "139 : cost: 0.0838737 \ttrain: 97.884% \tvalid: 97.12% \ttest: 96.9%\n", 509 | "140 : cost: 0.0832676 \ttrain: 97.898% \tvalid: 97.11% \ttest: 96.91%\n", 510 | "141 : cost: 0.0826682 \ttrain: 97.906% \tvalid: 97.11% \ttest: 96.94%\n", 511 | "saving filters...\n", 512 | "142 : cost: 0.0820756 \ttrain: 97.916% \tvalid: 97.12% \ttest: 96.93%\n", 513 | "143 : cost: 0.0814896 \ttrain: 97.93% \tvalid: 97.14% \ttest: 96.93%\n", 514 | "144 : cost: 0.0809101 \ttrain: 97.948% \tvalid: 97.15% \ttest: 96.96%\n", 515 | "saving filters...\n", 516 | "145 : cost: 0.080337 \ttrain: 97.966% \tvalid: 97.15% \ttest: 96.97%\n", 517 | "146 : cost: 0.0797701 \ttrain: 97.99% \tvalid: 97.16% \ttest: 96.97%\n", 518 | "147 : cost: 0.0792095 \ttrain: 98.012% \tvalid: 97.18% \ttest: 96.98%\n", 519 | "saving filters...\n", 520 | "148 : cost: 0.0786549 \ttrain: 98.03% \tvalid: 97.19% \ttest: 96.98%\n", 521 | "149 : cost: 0.0781063 \ttrain: 98.058% \tvalid: 97.21% \ttest: 97.0%\n", 522 | "150 : cost: 0.0775635 \ttrain: 98.074% \tvalid: 97.21% \ttest: 97.01%\n", 523 | "saving filters...\n", 524 | "151 : cost: 0.0770266 \ttrain: 98.082% \tvalid: 97.21% \ttest: 97.02%\n", 525 | "152 : cost: 0.0764954 \ttrain: 98.096% \tvalid: 97.21% \ttest: 97.02%\n", 526 | "153 : cost: 0.0759698 \ttrain: 98.12% \tvalid: 97.22% \ttest: 97.02%\n", 527 | "saving filters...\n", 528 | "154 : cost: 0.0754497 \ttrain: 98.134% \tvalid: 97.22% \ttest: 97.03%\n", 529 | "155 : cost: 0.0749351 \ttrain: 98.144% \tvalid: 97.22% \ttest: 97.04%\n", 530 | "156 : cost: 0.0744257 \ttrain: 98.166% \tvalid: 97.24% \ttest: 97.06%\n", 531 | "saving filters...\n", 532 | "157 : cost: 0.0739218 \ttrain: 98.184% \tvalid: 97.24% \ttest: 97.06%\n", 533 | "158 : cost: 0.0734229 \ttrain: 98.208% \tvalid: 97.27% \ttest: 97.07%\n", 534 | "159 : cost: 0.0729292 \ttrain: 98.224% \tvalid: 97.28% \ttest: 97.07%\n", 535 | "saving filters...\n", 536 | "160 : cost: 0.0724405 \ttrain: 98.234% \tvalid: 97.29% \ttest: 97.08%\n", 537 | "161 : cost: 0.0719567 \ttrain: 98.246% \tvalid: 97.3% \ttest: 97.1%\n", 538 | "162 : cost: 0.0714778 \ttrain: 98.262% \tvalid: 97.29% \ttest: 97.12%\n", 539 | "saving filters...\n", 540 | "163 : cost: 0.0710037 \ttrain: 98.27% \tvalid: 97.3% \ttest: 97.12%\n", 541 | "164 : cost: 0.0705344 \ttrain: 98.284% \tvalid: 97.32% \ttest: 97.13%\n", 542 | "165 : cost: 0.0700698 \ttrain: 98.292% \tvalid: 97.31% \ttest: 97.13%\n", 543 | "saving filters...\n", 544 | "166 : cost: 0.0696097 \ttrain: 98.306% \tvalid: 97.3% \ttest: 97.14%\n", 545 | "167 : cost: 0.0691542 \ttrain: 98.314% \tvalid: 97.29% \ttest: 97.15%\n", 546 | "168 : cost: 0.0687031 \ttrain: 98.328% \tvalid: 97.31% \ttest: 97.16%\n", 547 | "saving filters...\n", 548 | "169 : cost: 0.0682565 \ttrain: 98.344% \tvalid: 97.32% \ttest: 97.16%\n", 549 | "170 : cost: 0.0678142 \ttrain: 98.344% \tvalid: 97.33% \ttest: 97.17%\n", 550 | "171 : cost: 0.0673761 \ttrain: 98.348% \tvalid: 97.34% \ttest: 97.17%\n", 551 | "saving filters...\n", 552 | "172 : cost: 0.0669422 \ttrain: 98.366% \tvalid: 97.34% \ttest: 97.17%\n", 553 | "173 : cost: 0.0665125 \ttrain: 98.382% \tvalid: 97.33% \ttest: 97.17%\n", 554 | "174 : cost: 0.0660869 \ttrain: 98.39% \tvalid: 97.33% \ttest: 97.18%\n", 555 | "saving filters...\n", 556 | "175 : cost: 0.0656654 \ttrain: 98.406% \tvalid: 97.33% \ttest: 97.18%\n", 557 | "176 : cost: 0.0652478 \ttrain: 98.418% \tvalid: 97.33% \ttest: 97.16%\n", 558 | "177 : cost: 0.0648342 \ttrain: 98.43% \tvalid: 97.33% \ttest: 97.17%\n", 559 | "saving filters...\n", 560 | "178 : cost: 0.0644244 \ttrain: 98.442% \tvalid: 97.35% \ttest: 97.17%\n", 561 | "179 : cost: 0.0640184 \ttrain: 98.458% \tvalid: 97.35% \ttest: 97.17%\n", 562 | "180 : cost: 0.0636162 \ttrain: 98.466% \tvalid: 97.34% \ttest: 97.18%\n", 563 | "saving filters...\n", 564 | "181 : cost: 0.0632178 \ttrain: 98.48% \tvalid: 97.34% \ttest: 97.18%\n", 565 | "182 : cost: 0.0628229 \ttrain: 98.494% \tvalid: 97.37% \ttest: 97.19%\n", 566 | "183 : cost: 0.0624317 \ttrain: 98.508% \tvalid: 97.37% \ttest: 97.2%\n", 567 | "saving filters...\n", 568 | "184 : cost: 0.0620441 \ttrain: 98.53% \tvalid: 97.37% \ttest: 97.21%\n", 569 | "185 : cost: 0.06166 \ttrain: 98.538% \tvalid: 97.37% \ttest: 97.23%\n", 570 | "186 : cost: 0.0612793 \ttrain: 98.55% \tvalid: 97.36% \ttest: 97.24%\n", 571 | "saving filters...\n", 572 | "187 : cost: 0.0609021 \ttrain: 98.568% \tvalid: 97.36% \ttest: 97.24%\n", 573 | "188 : cost: 0.0605283 \ttrain: 98.582% \tvalid: 97.36% \ttest: 97.26%\n", 574 | "189 : cost: 0.0601578 \ttrain: 98.6% \tvalid: 97.37% \ttest: 97.27%\n", 575 | "saving filters...\n", 576 | "190 : cost: 0.0597905 \ttrain: 98.606% \tvalid: 97.37% \ttest: 97.28%\n", 577 | "191 : cost: 0.0594266 \ttrain: 98.62% \tvalid: 97.38% \ttest: 97.29%\n", 578 | "192 : cost: 0.0590659 \ttrain: 98.624% \tvalid: 97.38% \ttest: 97.3%\n", 579 | "saving filters...\n", 580 | "193 : cost: 0.0587082 \ttrain: 98.628% \tvalid: 97.38% \ttest: 97.3%\n", 581 | "194 : cost: 0.0583538 \ttrain: 98.632% \tvalid: 97.38% \ttest: 97.3%\n", 582 | "195 : cost: 0.0580024 \ttrain: 98.634% \tvalid: 97.39% \ttest: 97.31%\n", 583 | "saving filters...\n", 584 | "196 : cost: 0.057654 \ttrain: 98.64% \tvalid: 97.38% \ttest: 97.3%\n", 585 | "197 : cost: 0.0573087 \ttrain: 98.644% \tvalid: 97.39% \ttest: 97.31%\n", 586 | "198 : cost: 0.0569663 \ttrain: 98.658% \tvalid: 97.38% \ttest: 97.33%\n", 587 | "saving filters...\n", 588 | "199 : cost: 0.0566269 \ttrain: 98.666% \tvalid: 97.38% \ttest: 97.33%\n", 589 | "200 : cost: 0.0562904 \ttrain: 98.674% \tvalid: 97.39% \ttest: 97.34%\n", 590 | "201 : cost: 0.0559567 \ttrain: 98.688% \tvalid: 97.39% \ttest: 97.34%\n", 591 | "saving filters...\n", 592 | "202 : cost: 0.0556259 \ttrain: 98.704% \tvalid: 97.39% \ttest: 97.35%\n", 593 | "203 : cost: 0.0552979 \ttrain: 98.72% \tvalid: 97.4% \ttest: 97.35%\n", 594 | "204 : cost: 0.0549725 \ttrain: 98.738% \tvalid: 97.41% \ttest: 97.36%\n", 595 | "saving filters...\n", 596 | "205 : cost: 0.05465 \ttrain: 98.742% \tvalid: 97.41% \ttest: 97.36%\n", 597 | "206 : cost: 0.0543301 \ttrain: 98.744% \tvalid: 97.41% \ttest: 97.37%\n", 598 | "207 : cost: 0.0540128 \ttrain: 98.756% \tvalid: 97.41% \ttest: 97.37%\n", 599 | "saving filters...\n", 600 | "208 : cost: 0.0536982 \ttrain: 98.762% \tvalid: 97.42% \ttest: 97.38%\n", 601 | "209 : cost: 0.0533862 \ttrain: 98.768% \tvalid: 97.43% \ttest: 97.4%\n", 602 | "210 : cost: 0.0530767 \ttrain: 98.778% \tvalid: 97.44% \ttest: 97.41%\n", 603 | "saving filters...\n", 604 | "211 : cost: 0.0527698 \ttrain: 98.786% \tvalid: 97.46% \ttest: 97.41%\n", 605 | "212 : cost: 0.0524655 \ttrain: 98.792% \tvalid: 97.46% \ttest: 97.41%\n", 606 | "213 : cost: 0.0521635 \ttrain: 98.804% \tvalid: 97.46% \ttest: 97.41%\n", 607 | "saving filters...\n", 608 | "214 : cost: 0.051864 \ttrain: 98.814% \tvalid: 97.46% \ttest: 97.41%\n", 609 | "215 : cost: 0.0515669 \ttrain: 98.816% \tvalid: 97.46% \ttest: 97.41%\n", 610 | "216 : cost: 0.0512722 \ttrain: 98.824% \tvalid: 97.46% \ttest: 97.41%\n", 611 | "saving filters...\n", 612 | "217 : cost: 0.0509799 \ttrain: 98.838% \tvalid: 97.46% \ttest: 97.4%\n", 613 | "218 : cost: 0.0506899 \ttrain: 98.842% \tvalid: 97.45% \ttest: 97.4%\n", 614 | "219 : cost: 0.0504022 \ttrain: 98.852% \tvalid: 97.46% \ttest: 97.41%\n", 615 | "saving filters...\n", 616 | "220 : cost: 0.0501168 \ttrain: 98.86% \tvalid: 97.46% \ttest: 97.43%\n", 617 | "221 : cost: 0.0498336 \ttrain: 98.876% \tvalid: 97.47% \ttest: 97.43%\n", 618 | "222 : cost: 0.0495527 \ttrain: 98.888% \tvalid: 97.48% \ttest: 97.43%\n", 619 | "saving filters...\n", 620 | "223 : cost: 0.0492739 \ttrain: 98.894% \tvalid: 97.49% \ttest: 97.44%\n", 621 | "224 : cost: 0.0489974 \ttrain: 98.904% \tvalid: 97.5% \ttest: 97.44%\n", 622 | "225 : cost: 0.048723 \ttrain: 98.914% \tvalid: 97.51% \ttest: 97.44%\n", 623 | "saving filters...\n", 624 | "226 : cost: 0.0484507 \ttrain: 98.926% \tvalid: 97.52% \ttest: 97.45%\n", 625 | "227 : cost: 0.0481805 \ttrain: 98.942% \tvalid: 97.52% \ttest: 97.45%\n", 626 | "228 : cost: 0.0479125 \ttrain: 98.946% \tvalid: 97.53% \ttest: 97.45%\n", 627 | "saving filters...\n", 628 | "229 : cost: 0.0476465 \ttrain: 98.956% \tvalid: 97.54% \ttest: 97.47%\n", 629 | "230 : cost: 0.0473825 \ttrain: 98.97% \tvalid: 97.54% \ttest: 97.47%\n", 630 | "231 : cost: 0.0471206 \ttrain: 98.98% \tvalid: 97.54% \ttest: 97.48%\n", 631 | "saving filters...\n", 632 | "232 : cost: 0.0468606 \ttrain: 98.99% \tvalid: 97.54% \ttest: 97.5%\n", 633 | "233 : cost: 0.0466026 \ttrain: 98.996% \tvalid: 97.54% \ttest: 97.5%\n", 634 | "234 : cost: 0.0463466 \ttrain: 99.004% \tvalid: 97.54% \ttest: 97.51%\n", 635 | "saving filters...\n", 636 | "235 : cost: 0.0460925 \ttrain: 99.014% \tvalid: 97.53% \ttest: 97.51%\n", 637 | "236 : cost: 0.0458403 \ttrain: 99.02% \tvalid: 97.52% \ttest: 97.52%\n", 638 | "237 : cost: 0.0455901 \ttrain: 99.026% \tvalid: 97.52% \ttest: 97.52%\n", 639 | "saving filters...\n", 640 | "238 : cost: 0.0453416 \ttrain: 99.032% \tvalid: 97.52% \ttest: 97.52%\n", 641 | "239 : cost: 0.0450951 \ttrain: 99.04% \tvalid: 97.52% \ttest: 97.52%\n", 642 | "240 : cost: 0.0448504 \ttrain: 99.048% \tvalid: 97.52% \ttest: 97.52%\n", 643 | "saving filters...\n", 644 | "241 : cost: 0.0446075 \ttrain: 99.06% \tvalid: 97.52% \ttest: 97.52%\n", 645 | "242 : cost: 0.0443663 \ttrain: 99.064% \tvalid: 97.52% \ttest: 97.53%\n", 646 | "243 : cost: 0.044127 \ttrain: 99.066% \tvalid: 97.52% \ttest: 97.54%\n", 647 | "saving filters...\n", 648 | "244 : cost: 0.0438895 \ttrain: 99.072% \tvalid: 97.53% \ttest: 97.54%\n", 649 | "245 : cost: 0.0436537 \ttrain: 99.082% \tvalid: 97.54% \ttest: 97.54%\n", 650 | "246 : cost: 0.0434195 \ttrain: 99.088% \tvalid: 97.53% \ttest: 97.54%\n", 651 | "saving filters...\n", 652 | "247 : cost: 0.0431871 \ttrain: 99.094% \tvalid: 97.54% \ttest: 97.54%\n", 653 | "248 : cost: 0.0429565 \ttrain: 99.1% \tvalid: 97.55% \ttest: 97.54%\n", 654 | "249 : cost: 0.0427274 \ttrain: 99.108% \tvalid: 97.55% \ttest: 97.54%\n", 655 | "saving filters...\n", 656 | "250 : cost: 0.0425001 \ttrain: 99.11% \tvalid: 97.56% \ttest: 97.55%\n", 657 | "251 : cost: 0.0422743 \ttrain: 99.112% \tvalid: 97.56% \ttest: 97.55%\n", 658 | "252 : cost: 0.0420502 \ttrain: 99.114% \tvalid: 97.56% \ttest: 97.56%\n", 659 | "saving filters...\n", 660 | "253 : cost: 0.0418277 \ttrain: 99.122% \tvalid: 97.56% \ttest: 97.57%\n", 661 | "254 : cost: 0.0416068 \ttrain: 99.132% \tvalid: 97.56% \ttest: 97.57%\n", 662 | "255 : cost: 0.0413875 \ttrain: 99.138% \tvalid: 97.58% \ttest: 97.58%\n", 663 | "saving filters...\n", 664 | "256 : cost: 0.0411697 \ttrain: 99.146% \tvalid: 97.59% \ttest: 97.58%\n", 665 | "257 : cost: 0.0409535 \ttrain: 99.154% \tvalid: 97.59% \ttest: 97.61%\n", 666 | "258 : cost: 0.0407388 \ttrain: 99.162% \tvalid: 97.6% \ttest: 97.61%\n", 667 | "saving filters...\n", 668 | "259 : cost: 0.0405257 \ttrain: 99.166% \tvalid: 97.6% \ttest: 97.6%\n", 669 | "260 : cost: 0.040314 \ttrain: 99.17% \tvalid: 97.6% \ttest: 97.6%\n", 670 | "261 : cost: 0.0401038 \ttrain: 99.174% \tvalid: 97.61% \ttest: 97.6%\n", 671 | "saving filters...\n", 672 | "262 : cost: 0.0398951 \ttrain: 99.184% \tvalid: 97.61% \ttest: 97.6%\n", 673 | "263 : cost: 0.0396879 \ttrain: 99.19% \tvalid: 97.61% \ttest: 97.6%\n", 674 | "264 : cost: 0.0394821 \ttrain: 99.194% \tvalid: 97.61% \ttest: 97.6%\n", 675 | "saving filters...\n", 676 | "265 : cost: 0.0392777 \ttrain: 99.204% \tvalid: 97.62% \ttest: 97.6%\n", 677 | "266 : cost: 0.0390748 \ttrain: 99.206% \tvalid: 97.62% \ttest: 97.61%\n", 678 | "267 : cost: 0.0388732 \ttrain: 99.214% \tvalid: 97.62% \ttest: 97.61%\n", 679 | "saving filters...\n", 680 | "268 : cost: 0.0386731 \ttrain: 99.22% \tvalid: 97.62% \ttest: 97.61%\n", 681 | "269 : cost: 0.0384743 \ttrain: 99.23% \tvalid: 97.64% \ttest: 97.61%\n", 682 | "270 : cost: 0.0382769 \ttrain: 99.248% \tvalid: 97.64% \ttest: 97.61%\n", 683 | "saving filters...\n", 684 | "271 : cost: 0.0380808 \ttrain: 99.25% \tvalid: 97.64% \ttest: 97.61%\n", 685 | "272 : cost: 0.0378862 \ttrain: 99.252% \tvalid: 97.63% \ttest: 97.62%\n", 686 | "273 : cost: 0.0376928 \ttrain: 99.272% \tvalid: 97.63% \ttest: 97.62%\n", 687 | "saving filters...\n", 688 | "274 : cost: 0.0375007 \ttrain: 99.274% \tvalid: 97.64% \ttest: 97.63%\n", 689 | "275 : cost: 0.03731 \ttrain: 99.278% \tvalid: 97.64% \ttest: 97.63%\n", 690 | "276 : cost: 0.0371205 \ttrain: 99.28% \tvalid: 97.64% \ttest: 97.63%\n", 691 | "saving filters...\n", 692 | "277 : cost: 0.0369324 \ttrain: 99.29% \tvalid: 97.65% \ttest: 97.63%\n", 693 | "278 : cost: 0.0367454 \ttrain: 99.294% \tvalid: 97.65% \ttest: 97.63%\n", 694 | "279 : cost: 0.0365598 \ttrain: 99.3% \tvalid: 97.65% \ttest: 97.64%\n", 695 | "saving filters...\n", 696 | "280 : cost: 0.0363754 \ttrain: 99.306% \tvalid: 97.65% \ttest: 97.64%\n", 697 | "281 : cost: 0.0361922 \ttrain: 99.31% \tvalid: 97.64% \ttest: 97.65%\n", 698 | "282 : cost: 0.0360103 \ttrain: 99.316% \tvalid: 97.65% \ttest: 97.65%\n", 699 | "saving filters...\n", 700 | "283 : cost: 0.0358296 \ttrain: 99.32% \tvalid: 97.65% \ttest: 97.65%\n", 701 | "284 : cost: 0.0356501 \ttrain: 99.322% \tvalid: 97.65% \ttest: 97.65%\n", 702 | "285 : cost: 0.0354718 \ttrain: 99.324% \tvalid: 97.65% \ttest: 97.65%\n", 703 | "saving filters...\n", 704 | "286 : cost: 0.0352947 \ttrain: 99.334% \tvalid: 97.65% \ttest: 97.66%\n", 705 | "287 : cost: 0.0351187 \ttrain: 99.334% \tvalid: 97.66% \ttest: 97.66%\n", 706 | "288 : cost: 0.0349439 \ttrain: 99.338% \tvalid: 97.66% \ttest: 97.66%\n", 707 | "saving filters...\n", 708 | "289 : cost: 0.0347703 \ttrain: 99.34% \tvalid: 97.66% \ttest: 97.68%\n", 709 | "290 : cost: 0.0345977 \ttrain: 99.342% \tvalid: 97.66% \ttest: 97.69%\n", 710 | "291 : cost: 0.0344264 \ttrain: 99.346% \tvalid: 97.66% \ttest: 97.69%\n", 711 | "saving filters...\n", 712 | "292 : cost: 0.0342562 \ttrain: 99.356% \tvalid: 97.66% \ttest: 97.69%\n", 713 | "293 : cost: 0.0340871 \ttrain: 99.358% \tvalid: 97.66% \ttest: 97.69%\n", 714 | "294 : cost: 0.033919 \ttrain: 99.362% \tvalid: 97.66% \ttest: 97.72%\n", 715 | "saving filters...\n", 716 | "295 : cost: 0.0337521 \ttrain: 99.368% \tvalid: 97.67% \ttest: 97.72%\n", 717 | "296 : cost: 0.0335863 \ttrain: 99.37% \tvalid: 97.67% \ttest: 97.73%\n", 718 | "297 : cost: 0.0334215 \ttrain: 99.378% \tvalid: 97.67% \ttest: 97.73%\n", 719 | "saving filters...\n", 720 | "298 : cost: 0.0332579 \ttrain: 99.388% \tvalid: 97.67% \ttest: 97.74%\n", 721 | "299 : cost: 0.0330952 \ttrain: 99.39% \tvalid: 97.67% \ttest: 97.74%\n", 722 | "300 : cost: 0.0329337 \ttrain: 99.392% \tvalid: 97.68% \ttest: 97.75%\n", 723 | "saving filters...\n" 724 | ] 725 | } 726 | ], 727 | "source": [ 728 | "# Main training loop\n", 729 | "batch_size = 100\n", 730 | "epochs = 300\n", 731 | "check_frequency = 3\n", 732 | "\n", 733 | "train_batches = len(train_x) / batch_size\n", 734 | "valid_batches = len(valid_x) / batch_size\n", 735 | "test_batches = len(test_x) / batch_size\n", 736 | "\n", 737 | "for epoch in range(epochs):\n", 738 | " print epoch+1, \":\",\n", 739 | " \n", 740 | " train_costs = []\n", 741 | " train_accuracy = []\n", 742 | " for i in range(train_batches):\n", 743 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 744 | " batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n", 745 | "\n", 746 | " costs = f_train(batch_x, batch_labels)\n", 747 | " preds = f_test(batch_x)\n", 748 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 749 | " \n", 750 | " train_costs.append(costs)\n", 751 | " train_accuracy.append(acc)\n", 752 | " print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n", 753 | " \n", 754 | " valid_accuracy = []\n", 755 | " for i in range(valid_batches):\n", 756 | " batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n", 757 | " batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n", 758 | " \n", 759 | " preds = f_test(batch_x)\n", 760 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 761 | " \n", 762 | " valid_accuracy.append(acc)\n", 763 | " print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n", 764 | " \n", 765 | " test_accuracy = []\n", 766 | " for i in range(test_batches):\n", 767 | " batch_x = test_x[i*batch_size:(i+1)*batch_size]\n", 768 | " batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n", 769 | " \n", 770 | " preds = f_test(batch_x)\n", 771 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 772 | " \n", 773 | " test_accuracy.append(acc)\n", 774 | " print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\"\n", 775 | " \n", 776 | " if (epoch+1) % check_frequency == 0:\n", 777 | " print 'saving filters...'\n", 778 | " weight_filters = pil_img.fromarray(\n", 779 | " tile_raster_images(\n", 780 | " W_x.get_value(borrow=True).T,\n", 781 | " img_shape=(28, 28),\n", 782 | " tile_shape=(20, 25),\n", 783 | " tile_spacing=(1, 1)\n", 784 | " )\n", 785 | " )\n", 786 | " weight_filters.save(\"mlp_filters_%d.png\"%(epoch+1))" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": null, 792 | "metadata": { 793 | "collapsed": true 794 | }, 795 | "outputs": [], 796 | "source": [] 797 | } 798 | ], 799 | "metadata": { 800 | "kernelspec": { 801 | "display_name": "Python 2", 802 | "language": "python", 803 | "name": "python2" 804 | }, 805 | "language_info": { 806 | "codemirror_mode": { 807 | "name": "ipython", 808 | "version": 2 809 | }, 810 | "file_extension": ".py", 811 | "mimetype": "text/x-python", 812 | "name": "python", 813 | "nbconvert_exporter": "python", 814 | "pygments_lexer": "ipython2", 815 | "version": "2.7.6" 816 | } 817 | }, 818 | "nbformat": 4, 819 | "nbformat_minor": 0 820 | } 821 | -------------------------------------------------------------------------------- /MLP_theano_with_comments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# This tutorial covers your simplest neural network: a multilayer perceptron (MLP)\n", 12 | "# Also known as feedforward neural network.\n", 13 | "# We will learn to classify MNIST handwritten digit images into their correct label (0-9).\n", 14 | "\n", 15 | "from IPython.display import Image\n", 16 | "# First, let's load our data and take a look!\n", 17 | "import pickle\n", 18 | "\n", 19 | "# Load our data \n", 20 | "# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz\n", 21 | "(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))\n", 22 | "print \"Shapes:\"\n", 23 | "print train_x.shape, train_y.shape\n", 24 | "print valid_x.shape, valid_y.shape\n", 25 | "print test_x.shape, test_y.shape\n", 26 | "\n", 27 | "print \"--------------\"\n", 28 | "print \"Example input:\"\n", 29 | "print train_x[0]\n", 30 | "print \"Example label:\"\n", 31 | "print train_y[0]\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.\n", 43 | "from utils import tile_raster_images\n", 44 | "from PIL import Image as pil_img\n", 45 | "\n", 46 | "input_images = train_x[:25]\n", 47 | "im = pil_img.fromarray(\n", 48 | " tile_raster_images(input_images, \n", 49 | " img_shape=(28, 28), \n", 50 | " tile_shape=(1, 25),\n", 51 | " tile_spacing=(1, 1))\n", 52 | ")\n", 53 | "im.save(\"some_mnist_numbers.png\")\n", 54 | "Image(filename=\"some_mnist_numbers.png\")\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "# Cool, now we know a little about the input data, let's design the MLP to work with it!\n", 66 | "# An MLP looks like this: input -> hiddens -> output classification\n", 67 | "# Each stage is just a matrix multiplication with a nonlinear function applied after.\n", 68 | "\n", 69 | "# Your basic Theano imports.\n", 70 | "import theano\n", 71 | "import theano.tensor as T\n", 72 | "\n", 73 | "# Inputs are matrices where rows are examples and columns are pixels - so create a symbolic Theano matrix.\n", 74 | "x = T.matrix('x')\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "# Now let's start building the equation for our MLP!\n", 86 | "\n", 87 | "# The first transformation is the input x -> hidden layer h.\n", 88 | "# We defined this transformation with h = tanh(x.dot(W_x) + b_h)\n", 89 | "# where the learnable model parameters are W_x and b_h.\n", 90 | "\n", 91 | "# Therefore, we will need a weights matrix W_x and a bias vector b_h.\n", 92 | "# W_x has shape (input_size, hidden_size) and b_h has shape (hidden_size,).\n", 93 | "# Initialization is important in deep learning; we want something random so the model doesn't get stuck early.\n", 94 | "# Many papers in this subject, but for now we will just use a normal distribution with mean=0 and std=0.05.\n", 95 | "# Another good option for tanh layers is to use a uniform distribution with interval +- sqrt(6/sum(shape)).\n", 96 | "# These are hyperparameters to play with.\n", 97 | "# Bias starting as zero is fine.\n", 98 | "import numpy.random as rng\n", 99 | "W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)\n", 100 | "b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)\n", 101 | "\n", 102 | "# To update a variable used in an equation (for example, while learning), \n", 103 | "# Theano needs it to be in a special wrapper called a shared variable.\n", 104 | "# These are the model parameters for our first hidden layer!\n", 105 | "W_x = theano.shared(W_x, name=\"W_x\")\n", 106 | "b_h = theano.shared(b_h, name=\"b_h\")\n", 107 | "\n", 108 | "# Now, we can finally write the equation to give our symbolic hidden layer h!\n", 109 | "h = T.tanh(\n", 110 | " T.dot(x, W_x) + b_h\n", 111 | ")\n", 112 | "\n", 113 | "# Side note - if we used softmax instead of tanh for the activation, this would be performing logistic regression!\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "# We have the hidden layer h, let's put that softmax layer on top for classification output y!\n", 125 | "\n", 126 | "# Same deal as before, the transformation is defined as:\n", 127 | "# y = softmax(h.dot(W_h) + b_y)\n", 128 | "# where the learnable parameters are W_h and b_y.\n", 129 | "# W_h has shape (hidden_size, output_size) and b_y has shape (output_size,).\n", 130 | "\n", 131 | "# We will use the same random initialization strategy as before.\n", 132 | "W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)\n", 133 | "b_y = numpy.zeros(shape=(10,), dtype=theano.config.floatX)\n", 134 | "# Don't forget to make them shared variables!\n", 135 | "W_h = theano.shared(W_h, name=\"W_h\")\n", 136 | "b_y = theano.shared(b_y, name=\"b_y\")\n", 137 | "\n", 138 | "# Now write the equation for the output!\n", 139 | "y = T.nnet.softmax(\n", 140 | " T.dot(h, W_h) + b_y\n", 141 | ")\n", 142 | "\n", 143 | "# The output (due to softmax) is a vector of class probabilities.\n", 144 | "# To get the output class 'guess' from the model, just take the index of the highest probability!\n", 145 | "y_hat = T.argmax(y, axis=1)\n", 146 | "\n", 147 | "# That's everything! Just four model parameters and one input variable.\n" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "# The variable y_hat represents the output of running our model, but we need a cost function to use for training.\n", 159 | "# For a softmax (probability) output, we want to maximize the likelihood of P(Y=y|X).\n", 160 | "# This means we want to minimize the negative log-likelihood cost! (For a primer, see machine learning Coursera.)\n", 161 | "\n", 162 | "# Cost functions always need the truth outputs to compare against (this is supervised learning).\n", 163 | "# From before, we saw the labels were a vector of ints - so let's make a symbolic variable for this!\n", 164 | "correct_labels = T.ivector(\"labels\") # integer vector\n", 165 | "\n", 166 | "# Now we can compare our output probability from y with the true labels.\n", 167 | "# Because the labels are integers, we will want to make an indexing mask to pick out the probabilities\n", 168 | "# our model thought was the likelihood of the correct label.\n", 169 | "log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]\n", 170 | "# We use mean instead of sum to be less dependent on batch size (better for flexibility)\n", 171 | "cost = -T.mean(log_likelihood)\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "# Easiest way to train neural nets is with Stochastic Gradient Descent\n", 183 | "# This takes each example, calculates the gradient, and changes the model parameters a small amount\n", 184 | "# in the direction of the gradient.\n", 185 | "\n", 186 | "# Fancier add-ons to stochastic gradient descent will reduce the learning rate over time, add a momentum\n", 187 | "# factor to the parameters, etc.\n", 188 | "\n", 189 | "# Before we can start training, we need to know what the gradients are.\n", 190 | "# Luckily we don't have to do any math! Theano has symbolic auto-differentiation which means it can\n", 191 | "# calculate the gradients for arbitrary equations with respect to a cost and parameters.\n", 192 | "parameters = [W_x, b_h, W_h, b_y]\n", 193 | "gradients = T.grad(cost, parameters)\n", 194 | "# Now gradients contains the list of derivatives: [d_cost/d_W_x, d_cost/d_b_h, d_cost/d_W_h, d_cost/d_b_y]\n", 195 | "\n", 196 | "# One last thing we need to do before training is to use these gradients to update the parameters!\n", 197 | "# Remember how parameters are shared variables? Well, Theano uses something called updates\n", 198 | "# which are just pairs of (shared_variable, new_variable_expression) to change its value.\n", 199 | "# So, let's create these updates to show how we change the parameter values during training with gradients!\n", 200 | "# We use a learning rate to make small steps over time.\n", 201 | "learning_rate = 0.01\n", 202 | "train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]\n" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "# Now we can create a Theano function that takes in real inputs and trains our model.\n", 214 | "f_train = theano.function(inputs=[x, correct_labels], outputs=cost, updates=train_updates, allow_input_downcast=True)\n", 215 | "\n", 216 | "# For testing purposes, we don't want to use updates to change the parameters - so create a separate function!\n", 217 | "# We also care more about the output guesses, so let's return those instead of the cost.\n", 218 | "# error = sum(T.neq(y_hat, correct_labels))/float(y_hat.shape[0])\n", 219 | "f_test = theano.function(inputs=[x], outputs=y_hat, allow_input_downcast=True)\n" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": false 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "# Our training can begin!\n", 231 | "# The two hyperparameters we have for this part are minibatch size (how many examples to process in parallel)\n", 232 | "# and the total number of passes over all examples (epochs).\n", 233 | "batch_size = 100\n", 234 | "epochs = 30\n", 235 | "\n", 236 | "# Given our batch size, compute how many batches we can fit into each data set\n", 237 | "train_batches = len(train_x) / batch_size\n", 238 | "valid_batches = len(valid_x) / batch_size\n", 239 | "test_batches = len(test_x) / batch_size\n", 240 | "\n", 241 | "# Our main training loop!\n", 242 | "for epoch in range(epochs):\n", 243 | " print epoch+1, \":\",\n", 244 | " \n", 245 | " train_costs = []\n", 246 | " train_accuracy = []\n", 247 | " for i in range(train_batches):\n", 248 | " # Grab our minibatch of examples from the whole train set.\n", 249 | " batch_x = train_x[i*batch_size:(i+1)*batch_size]\n", 250 | " batch_labels = train_y[i*batch_size:(i+1)*batch_size]\n", 251 | " # Compute the costs from the train function (which also updates the parameters)\n", 252 | " costs = f_train(batch_x, batch_labels)\n", 253 | " # Compute the predictions from the test function (which does not update parameters)\n", 254 | " preds = f_test(batch_x)\n", 255 | " # Compute the accuracy of our predictions against the correct batch labels\n", 256 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 257 | " \n", 258 | " train_costs.append(costs)\n", 259 | " train_accuracy.append(acc)\n", 260 | " # Show the mean cost and accuracy across minibatches (the entire train set!)\n", 261 | " print \"cost:\", numpy.mean(train_costs), \"\\ttrain:\", str(numpy.mean(train_accuracy)*100)+\"%\",\n", 262 | " \n", 263 | " valid_accuracy = []\n", 264 | " for i in range(valid_batches):\n", 265 | " batch_x = valid_x[i*batch_size:(i+1)*batch_size]\n", 266 | " batch_labels = valid_y[i*batch_size:(i+1)*batch_size]\n", 267 | " \n", 268 | " preds = f_test(batch_x)\n", 269 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 270 | " \n", 271 | " valid_accuracy.append(acc)\n", 272 | " print \"\\tvalid:\", str(numpy.mean(valid_accuracy)*100)+\"%\",\n", 273 | " \n", 274 | " test_accuracy = []\n", 275 | " for i in range(test_batches):\n", 276 | " batch_x = test_x[i*batch_size:(i+1)*batch_size]\n", 277 | " batch_labels = test_y[i*batch_size:(i+1)*batch_size]\n", 278 | " \n", 279 | " preds = f_test(batch_x)\n", 280 | " acc = sum(preds==batch_labels)/float(len(batch_labels))\n", 281 | " \n", 282 | " test_accuracy.append(acc)\n", 283 | " print \"\\ttest:\", str(numpy.mean(test_accuracy)*100)+\"%\"" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "collapsed": true 291 | }, 292 | "outputs": [], 293 | "source": [] 294 | } 295 | ], 296 | "metadata": { 297 | "kernelspec": { 298 | "display_name": "Python 2", 299 | "language": "python", 300 | "name": "python2" 301 | }, 302 | "language_info": { 303 | "codemirror_mode": { 304 | "name": "ipython", 305 | "version": 2 306 | }, 307 | "file_extension": ".py", 308 | "mimetype": "text/x-python", 309 | "name": "python", 310 | "nbconvert_exporter": "python", 311 | "pygments_lexer": "ipython2", 312 | "version": "2.7.6" 313 | } 314 | }, 315 | "nbformat": 4, 316 | "nbformat_minor": 0 317 | } 318 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # intro_deep 2 | Introduction tutorials to deep learning with Theano and OpenDeep 3 | 4 | ## Dependencies 5 | Install [OpenDeep](https://github.com/vitruvianscience/opendeep) and its dependencies. GPU support is highly recommendended. 6 | You need at least version 0.0.9a of OpenDeep. 7 | -------------------------------------------------------------------------------- /RNN-GSN_opendeep.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# Imports!\n", 12 | "# standard libraries\n", 13 | "import logging\n", 14 | "import math\n", 15 | "# third party\n", 16 | "import theano\n", 17 | "# internal references\n", 18 | "from opendeep.data import MNIST\n", 19 | "from opendeep.log import config_root_logger\n", 20 | "from opendeep.models import Model, RNN, GSN\n", 21 | "from opendeep.optimization import RMSProp\n", 22 | "\n", 23 | "config_root_logger()\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "# Let's define a new model combining the RNN and GSNs.\n", 35 | "class RNN_GSN(Model):\n", 36 | " def __init__(self):\n", 37 | " super(RNN_GSN, self).__init__()\n", 38 | "\n", 39 | " gsn_hiddens = 500\n", 40 | " gsn_layers = 2\n", 41 | "\n", 42 | " # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them)\n", 43 | " self.rnn = RNN(\n", 44 | " input_size=28 * 28,\n", 45 | " hidden_size=100,\n", 46 | " # needs to output hidden units for odd layers of GSN\n", 47 | " output_size=gsn_hiddens * (math.ceil(gsn_layers/2.)),\n", 48 | " layers=1,\n", 49 | " activation='tanh',\n", 50 | " hidden_activation='relu',\n", 51 | " weights_init='uniform', weights_interval='montreal',\n", 52 | " r_weights_init='identity'\n", 53 | " )\n", 54 | "\n", 55 | " # Create the GSN that will encode the input space\n", 56 | " gsn = GSN(\n", 57 | " input_size=28 * 28,\n", 58 | " hidden_size=gsn_hiddens,\n", 59 | " layers=gsn_layers,\n", 60 | " walkbacks=4,\n", 61 | " visible_activation='sigmoid',\n", 62 | " hidden_activation='tanh',\n", 63 | " image_height=28,\n", 64 | " image_width=28\n", 65 | " )\n", 66 | " # grab the input arguments\n", 67 | " gsn_args = gsn.args.copy()\n", 68 | " # grab the parameters it initialized\n", 69 | " gsn_params = gsn.get_params()\n", 70 | "\n", 71 | " # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep)\n", 72 | " # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens\n", 73 | " def step(hiddens, x):\n", 74 | " gsn = GSN(\n", 75 | " inputs_hook=(28*28, x),\n", 76 | " hiddens_hook=(gsn_hiddens, hiddens),\n", 77 | " params_hook=(gsn_params),\n", 78 | " **gsn_args\n", 79 | " )\n", 80 | " # return the reconstruction and cost!\n", 81 | " return gsn.get_outputs(), gsn.get_train_cost()\n", 82 | "\n", 83 | " (outputs, costs), scan_updates = theano.scan(\n", 84 | " fn=lambda h, x: step(h, x),\n", 85 | " sequences=[self.rnn.output, self.rnn.input],\n", 86 | " outputs_info=[None, None]\n", 87 | " )\n", 88 | "\n", 89 | " self.outputs = outputs\n", 90 | "\n", 91 | " self.updates = dict()\n", 92 | " self.updates.update(self.rnn.get_updates())\n", 93 | " self.updates.update(scan_updates)\n", 94 | "\n", 95 | " self.cost = costs.sum()\n", 96 | " self.params = gsn_params + self.rnn.get_params()\n", 97 | "\n", 98 | " # Model functions necessary for training\n", 99 | " def get_inputs(self):\n", 100 | " return self.rnn.get_inputs()\n", 101 | " def get_params(self):\n", 102 | " return self.params\n", 103 | " def get_train_cost(self):\n", 104 | " return self.cost\n", 105 | " def get_updates(self):\n", 106 | " return self.updates\n", 107 | " def get_outputs(self):\n", 108 | " return self.outputs\n", 109 | " " 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "# Now we can instantiate and train the model!\n", 121 | "rnn_gsn = RNN_GSN()\n", 122 | "\n", 123 | "# data! (needs to be 3d for rnn).\n", 124 | "mnist = MNIST(sequence_number=1, seq_3d=True, seq_length=30)\n", 125 | "\n", 126 | "# optimizer!\n", 127 | "optimizer = RMSProp(\n", 128 | " model=rnn_gsn,\n", 129 | " dataset=mnist,\n", 130 | " epochs=500,\n", 131 | " batch_size=50,\n", 132 | " save_freq=10,\n", 133 | " stop_patience=30,\n", 134 | " stop_threshold=.9995,\n", 135 | " learning_rate=1e-6,\n", 136 | " decay=.95,\n", 137 | " max_scaling=1e5,\n", 138 | " grad_clip=5.,\n", 139 | " hard_clip=False\n", 140 | ")\n", 141 | "# train!\n", 142 | "optimizer.train()\n" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [] 153 | } 154 | ], 155 | "metadata": { 156 | "kernelspec": { 157 | "display_name": "Python 2", 158 | "language": "python", 159 | "name": "python2" 160 | }, 161 | "language_info": { 162 | "codemirror_mode": { 163 | "name": "ipython", 164 | "version": 2 165 | }, 166 | "file_extension": ".py", 167 | "mimetype": "text/x-python", 168 | "name": "python", 169 | "nbconvert_exporter": "python", 170 | "pygments_lexer": "ipython2", 171 | "version": "2.7.6" 172 | } 173 | }, 174 | "nbformat": 4, 175 | "nbformat_minor": 0 176 | } 177 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | (Copied from OpenDeep https://github.com/vitruvianscience/opendeep) 3 | 4 | This module contains different utility functions that are not connected 5 | in anyway to the networks presented in the tutorials, but rather help in 6 | processing the outputs into a more understandable way. 7 | 8 | For example ``tile_raster_images`` helps in generating a easy to grasp 9 | image from a set of samples or weights. 10 | 11 | Written by Li Yao (University of Montreal) 12 | https://github.com/yaoli/GSN 13 | """ 14 | 15 | import numpy 16 | 17 | def scale_to_unit_interval(ndar, eps=1e-8): 18 | """ 19 | Scales all values in the ndarray 'ndar' to be between 0 and 1. 20 | 21 | Parameters 22 | ---------- 23 | ndar : numpy.ndarray 24 | The input array to scale values. 25 | eps : float 26 | Small value to avoid divide-by-zero when scaling. 27 | 28 | Returns 29 | ------- 30 | numpy.ndarray 31 | The input array scaled to be between 0 and 1. 32 | """ 33 | ndar = ndar.copy() 34 | ndar -= ndar.min() 35 | ndar *= 1.0 / (ndar.max() + eps) 36 | return ndar 37 | 38 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 39 | scale_rows_to_unit_interval=True, 40 | output_pixel_vals=True): 41 | """ 42 | Transform an array with one flattened image per row, into an array in 43 | which images are reshaped and layed out like tiles on a floor. 44 | 45 | This function is useful for visualizing datasets whose rows are images, 46 | and also columns of matrices for transforming those rows 47 | (such as the first layer of a neural net). 48 | 49 | Parameters 50 | ---------- 51 | X : 2D ndarray or a tuple of 4 channels, elements of which can be 2D ndarrays or None 52 | A 2D array in which every row is a flattened image. 53 | img_shape : tuple 54 | The original (height, width) shape of each image. 55 | tile_shape : tuple 56 | The number of images to tile (rows, cols). 57 | tile_spacing : tuple 58 | The amount of pixels to put between image tiles (like a border size). 59 | scale_rows_to_unit_interval : bool 60 | If the values need to be scaled before being plotted to [0,1] or not. 61 | output_pixel_vals : bool 62 | If output should be pixel values (i.e. int8 values) or floats. 63 | 64 | Returns 65 | ------- 66 | 2D array 67 | Array suitable for viewing as an image. (See:`PIL.Image.fromarray`.) 68 | """ 69 | 70 | assert len(img_shape) == 2 71 | assert len(tile_shape) == 2 72 | assert len(tile_spacing) == 2 73 | 74 | # The expression below can be re-written in a more C style as 75 | # follows : 76 | # 77 | # out_shape = [0,0] 78 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 79 | # tile_spacing[0] 80 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 81 | # tile_spacing[1] 82 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 83 | in zip(img_shape, tile_shape, tile_spacing)] 84 | 85 | if isinstance(X, tuple): 86 | assert len(X) == 4 87 | # Create an output numpy ndarray to store the image 88 | if output_pixel_vals: 89 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 90 | dtype='uint8') 91 | else: 92 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 93 | dtype=X.dtype) 94 | 95 | #colors default to 0, alpha defaults to 1 (opaque) 96 | if output_pixel_vals: 97 | channel_defaults = [0, 0, 0, 255] 98 | else: 99 | channel_defaults = [0., 0., 0., 1.] 100 | 101 | for i in xrange(4): 102 | if X[i] is None: 103 | # if channel is None, fill it with zeros of the correct 104 | # dtype 105 | dt = out_array.dtype 106 | if output_pixel_vals: 107 | dt = 'uint8' 108 | out_array[:, :, i] = numpy.zeros(out_shape, 109 | dtype=dt) + channel_defaults[i] 110 | else: 111 | # use a recurrent call to run the channel and store it 112 | # in the output 113 | out_array[:, :, i] = tile_raster_images( 114 | X[i], img_shape, tile_shape, tile_spacing, 115 | scale_rows_to_unit_interval, output_pixel_vals) 116 | return out_array 117 | 118 | else: 119 | # if we are dealing with only one channel 120 | H, W = img_shape 121 | Hs, Ws = tile_spacing 122 | 123 | # generate a matrix to store the output 124 | dt = X.dtype 125 | if output_pixel_vals: 126 | dt = 'uint8' 127 | out_array = numpy.zeros(out_shape, dtype=dt) 128 | 129 | for tile_row in xrange(tile_shape[0]): 130 | for tile_col in xrange(tile_shape[1]): 131 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 132 | this_x = X[tile_row * tile_shape[1] + tile_col] 133 | if scale_rows_to_unit_interval: 134 | # if we should scale values to be between 0 and 1 135 | # do this by calling the `scale_to_unit_interval` 136 | # function 137 | this_img = scale_to_unit_interval( 138 | this_x.reshape(img_shape)) 139 | else: 140 | this_img = this_x.reshape(img_shape) 141 | # add the slice to the corresponding position in the 142 | # output array 143 | c = 1 144 | if output_pixel_vals: 145 | c = 255 146 | out_array[ 147 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 148 | tile_col * (W + Ws): tile_col * (W + Ws) + W 149 | ] = this_img * c 150 | return out_array --------------------------------------------------------------------------------