├── .gitignore ├── 0_multiply.py ├── 1_linear_regression.py ├── 2_logistic_regression.py ├── 3_net.py ├── 4_modern_net.py ├── 5_convolutional_net.py ├── LICENSE ├── README.md ├── download_mnist.sh └── load.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /0_multiply.py: -------------------------------------------------------------------------------- 1 | import theano 2 | from theano import tensor as T 3 | 4 | a = T.scalar() 5 | b = T.scalar() 6 | 7 | y = a * b 8 | 9 | multiply = theano.function(inputs=[a, b], outputs=y) 10 | 11 | print multiply(1, 2) #2 12 | print multiply(3, 3) #9 13 | 14 | -------------------------------------------------------------------------------- /1_linear_regression.py: -------------------------------------------------------------------------------- 1 | import theano 2 | from theano import tensor as T 3 | import numpy as np 4 | 5 | trX = np.linspace(-1, 1, 101) 6 | trY = 2 * trX + np.random.randn(*trX.shape) * 0.33 7 | 8 | X = T.scalar() 9 | Y = T.scalar() 10 | 11 | def model(X, w): 12 | return X * w 13 | 14 | w = theano.shared(np.asarray(0., dtype=theano.config.floatX)) 15 | y = model(X, w) 16 | 17 | cost = T.mean(T.sqr(y - Y)) 18 | gradient = T.grad(cost=cost, wrt=w) 19 | updates = [[w, w - gradient * 0.01]] 20 | 21 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) 22 | 23 | for i in range(100): 24 | for x, y in zip(trX, trY): 25 | train(x, y) 26 | 27 | print w.get_value() #something around 2 28 | 29 | -------------------------------------------------------------------------------- /2_logistic_regression.py: -------------------------------------------------------------------------------- 1 | import theano 2 | from theano import tensor as T 3 | import numpy as np 4 | from load import mnist 5 | 6 | def floatX(X): 7 | return np.asarray(X, dtype=theano.config.floatX) 8 | 9 | def init_weights(shape): 10 | return theano.shared(floatX(np.random.randn(*shape) * 0.01)) 11 | 12 | def model(X, w): 13 | return T.nnet.softmax(T.dot(X, w)) 14 | 15 | trX, teX, trY, teY = mnist(onehot=True) 16 | 17 | X = T.fmatrix() 18 | Y = T.fmatrix() 19 | 20 | w = init_weights((784, 10)) 21 | 22 | py_x = model(X, w) 23 | y_pred = T.argmax(py_x, axis=1) 24 | 25 | cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) 26 | gradient = T.grad(cost=cost, wrt=w) 27 | update = [[w, w - gradient * 0.05]] 28 | 29 | train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True) 30 | predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True) 31 | 32 | for i in range(100): 33 | for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): 34 | cost = train(trX[start:end], trY[start:end]) 35 | print i, np.mean(np.argmax(teY, axis=1) == predict(teX)) 36 | 37 | -------------------------------------------------------------------------------- /3_net.py: -------------------------------------------------------------------------------- 1 | import theano 2 | from theano import tensor as T 3 | import numpy as np 4 | from load import mnist 5 | from foxhound.utils.vis import grayscale_grid_vis, unit_scale 6 | from scipy.misc import imsave 7 | 8 | def floatX(X): 9 | return np.asarray(X, dtype=theano.config.floatX) 10 | 11 | def init_weights(shape): 12 | return theano.shared(floatX(np.random.randn(*shape) * 0.01)) 13 | 14 | def sgd(cost, params, lr=0.05): 15 | grads = T.grad(cost=cost, wrt=params) 16 | updates = [] 17 | for p, g in zip(params, grads): 18 | updates.append([p, p - g * lr]) 19 | return updates 20 | 21 | def model(X, w_h, w_o): 22 | h = T.nnet.sigmoid(T.dot(X, w_h)) 23 | pyx = T.nnet.softmax(T.dot(h, w_o)) 24 | return pyx 25 | 26 | trX, teX, trY, teY = mnist(onehot=True) 27 | 28 | X = T.fmatrix() 29 | Y = T.fmatrix() 30 | 31 | w_h = init_weights((784, 625)) 32 | w_o = init_weights((625, 10)) 33 | 34 | py_x = model(X, w_h, w_o) 35 | y_x = T.argmax(py_x, axis=1) 36 | 37 | cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) 38 | params = [w_h, w_o] 39 | updates = sgd(cost, params) 40 | 41 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) 42 | predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) 43 | 44 | for i in range(100): 45 | for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): 46 | cost = train(trX[start:end], trY[start:end]) 47 | print np.mean(np.argmax(teY, axis=1) == predict(teX)) 48 | 49 | -------------------------------------------------------------------------------- /4_modern_net.py: -------------------------------------------------------------------------------- 1 | import theano 2 | from theano import tensor as T 3 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 4 | import numpy as np 5 | from load import mnist 6 | 7 | srng = RandomStreams() 8 | 9 | def floatX(X): 10 | return np.asarray(X, dtype=theano.config.floatX) 11 | 12 | def init_weights(shape): 13 | return theano.shared(floatX(np.random.randn(*shape) * 0.01)) 14 | 15 | def rectify(X): 16 | return T.maximum(X, 0.) 17 | 18 | def softmax(X): 19 | e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) 20 | return e_x / e_x.sum(axis=1).dimshuffle(0, 'x') 21 | 22 | def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6): 23 | grads = T.grad(cost=cost, wrt=params) 24 | updates = [] 25 | for p, g in zip(params, grads): 26 | acc = theano.shared(p.get_value() * 0.) 27 | acc_new = rho * acc + (1 - rho) * g ** 2 28 | gradient_scaling = T.sqrt(acc_new + epsilon) 29 | g = g / gradient_scaling 30 | updates.append((acc, acc_new)) 31 | updates.append((p, p - lr * g)) 32 | return updates 33 | 34 | def dropout(X, p=0.): 35 | if p > 0: 36 | retain_prob = 1 - p 37 | X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) 38 | X /= retain_prob 39 | return X 40 | 41 | def model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden): 42 | X = dropout(X, p_drop_input) 43 | h = rectify(T.dot(X, w_h)) 44 | 45 | h = dropout(h, p_drop_hidden) 46 | h2 = rectify(T.dot(h, w_h2)) 47 | 48 | h2 = dropout(h2, p_drop_hidden) 49 | py_x = softmax(T.dot(h2, w_o)) 50 | return h, h2, py_x 51 | 52 | trX, teX, trY, teY = mnist(onehot=True) 53 | 54 | X = T.fmatrix() 55 | Y = T.fmatrix() 56 | 57 | w_h = init_weights((784, 625)) 58 | w_h2 = init_weights((625, 625)) 59 | w_o = init_weights((625, 10)) 60 | 61 | noise_h, noise_h2, noise_py_x = model(X, w_h, w_h2, w_o, 0.2, 0.5) 62 | h, h2, py_x = model(X, w_h, w_h2, w_o, 0., 0.) 63 | y_x = T.argmax(py_x, axis=1) 64 | 65 | cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) 66 | params = [w_h, w_h2, w_o] 67 | updates = RMSprop(cost, params, lr=0.001) 68 | 69 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) 70 | predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) 71 | 72 | for i in range(100): 73 | for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): 74 | cost = train(trX[start:end], trY[start:end]) 75 | print np.mean(np.argmax(teY, axis=1) == predict(teX)) 76 | 77 | -------------------------------------------------------------------------------- /5_convolutional_net.py: -------------------------------------------------------------------------------- 1 | import theano 2 | from theano import tensor as T 3 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 4 | import numpy as np 5 | from load import mnist 6 | from theano.tensor.nnet.conv import conv2d 7 | from theano.tensor.signal.downsample import max_pool_2d 8 | 9 | srng = RandomStreams() 10 | 11 | def floatX(X): 12 | return np.asarray(X, dtype=theano.config.floatX) 13 | 14 | def init_weights(shape): 15 | return theano.shared(floatX(np.random.randn(*shape) * 0.01)) 16 | 17 | def rectify(X): 18 | return T.maximum(X, 0.) 19 | 20 | def softmax(X): 21 | e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) 22 | return e_x / e_x.sum(axis=1).dimshuffle(0, 'x') 23 | 24 | def dropout(X, p=0.): 25 | if p > 0: 26 | retain_prob = 1 - p 27 | X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) 28 | X /= retain_prob 29 | return X 30 | 31 | def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6): 32 | grads = T.grad(cost=cost, wrt=params) 33 | updates = [] 34 | for p, g in zip(params, grads): 35 | acc = theano.shared(p.get_value() * 0.) 36 | acc_new = rho * acc + (1 - rho) * g ** 2 37 | gradient_scaling = T.sqrt(acc_new + epsilon) 38 | g = g / gradient_scaling 39 | updates.append((acc, acc_new)) 40 | updates.append((p, p - lr * g)) 41 | return updates 42 | 43 | def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden): 44 | l1a = rectify(conv2d(X, w, border_mode='full')) 45 | l1 = max_pool_2d(l1a, (2, 2)) 46 | l1 = dropout(l1, p_drop_conv) 47 | 48 | l2a = rectify(conv2d(l1, w2)) 49 | l2 = max_pool_2d(l2a, (2, 2)) 50 | l2 = dropout(l2, p_drop_conv) 51 | 52 | l3a = rectify(conv2d(l2, w3)) 53 | l3b = max_pool_2d(l3a, (2, 2)) 54 | l3 = T.flatten(l3b, outdim=2) 55 | l3 = dropout(l3, p_drop_conv) 56 | 57 | l4 = rectify(T.dot(l3, w4)) 58 | l4 = dropout(l4, p_drop_hidden) 59 | 60 | pyx = softmax(T.dot(l4, w_o)) 61 | return l1, l2, l3, l4, pyx 62 | 63 | trX, teX, trY, teY = mnist(onehot=True) 64 | 65 | trX = trX.reshape(-1, 1, 28, 28) 66 | teX = teX.reshape(-1, 1, 28, 28) 67 | 68 | X = T.ftensor4() 69 | Y = T.fmatrix() 70 | 71 | w = init_weights((32, 1, 3, 3)) 72 | w2 = init_weights((64, 32, 3, 3)) 73 | w3 = init_weights((128, 64, 3, 3)) 74 | w4 = init_weights((128 * 3 * 3, 625)) 75 | w_o = init_weights((625, 10)) 76 | 77 | noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5) 78 | l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.) 79 | y_x = T.argmax(py_x, axis=1) 80 | 81 | 82 | cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) 83 | params = [w, w2, w3, w4, w_o] 84 | updates = RMSprop(cost, params, lr=0.001) 85 | 86 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) 87 | predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) 88 | 89 | for i in range(100): 90 | for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): 91 | cost = train(trX[start:end], trY[start:end]) 92 | print np.mean(np.argmax(teY, axis=1) == predict(teX)) 93 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Alec Radford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Theano-Tutorials 2 | ================ 3 | 4 | Bare bones introduction to machine learning from linear regression to convolutional neural networks using Theano. 5 | 6 | ***Dataset*** 7 | It's worth noting that this library assumes that the reader has access to the mnist dataset. This dataset is freely available and is accessible through Yann LeCun's [personal website](http://yann.lecun.com/exdb/mnist/). 8 | 9 | If you want to automate the download of the dataset, there is an included file that will do this for you. Simply run the following: 10 | `sudo ./download_mnist.sh` 11 | 12 | ***Known Issues*** 13 | `Library not loaded: /usr/local/opt/openssl/lib/libssl.1.0.0.dylib` 14 | This results from a broken openssl installation on mac. It can be fixed by uninstalling and reinstalling openssl: 15 | `sudo brew remove openssl` 16 | `brew install openssl` 17 | -------------------------------------------------------------------------------- /download_mnist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p /media/datasets/mnist 4 | 5 | if ! [ -e /media/datasets/mnist/train-images-idx3-ubyte.gz ] 6 | then 7 | wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 8 | fi 9 | gzip -d /media/datasets/mnist/train-images-idx3-ubyte.gz 10 | 11 | if ! [ -e /media/datasets/mnist/train-labels-idx1-ubyte.gz ] 12 | then 13 | wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 14 | fi 15 | gzip -d /media/datasets/mnist/train-labels-idx1-ubyte.gz 16 | 17 | if ! [ -e /media/datasets/mnist/t10k-images-idx3-ubyte.gz ] 18 | then 19 | wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 20 | fi 21 | gzip -d /media/datasets/mnist/t10k-images-idx3-ubyte.gz 22 | 23 | if ! [ -e /media/datasets/mnist/t10k-labels-idx1-ubyte.gz ] 24 | then 25 | wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 26 | fi 27 | gzip -d /media/datasets/mnist/t10k-labels-idx1-ubyte.gz 28 | -------------------------------------------------------------------------------- /load.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | datasets_dir = '/media/datasets/' 5 | 6 | def one_hot(x,n): 7 | if type(x) == list: 8 | x = np.array(x) 9 | x = x.flatten() 10 | o_h = np.zeros((len(x),n)) 11 | o_h[np.arange(len(x)),x] = 1 12 | return o_h 13 | 14 | def mnist(ntrain=60000,ntest=10000,onehot=True): 15 | data_dir = os.path.join(datasets_dir,'mnist/') 16 | fd = open(os.path.join(data_dir,'train-images-idx3-ubyte')) 17 | loaded = np.fromfile(file=fd,dtype=np.uint8) 18 | trX = loaded[16:].reshape((60000,28*28)).astype(float) 19 | 20 | fd = open(os.path.join(data_dir,'train-labels-idx1-ubyte')) 21 | loaded = np.fromfile(file=fd,dtype=np.uint8) 22 | trY = loaded[8:].reshape((60000)) 23 | 24 | fd = open(os.path.join(data_dir,'t10k-images-idx3-ubyte')) 25 | loaded = np.fromfile(file=fd,dtype=np.uint8) 26 | teX = loaded[16:].reshape((10000,28*28)).astype(float) 27 | 28 | fd = open(os.path.join(data_dir,'t10k-labels-idx1-ubyte')) 29 | loaded = np.fromfile(file=fd,dtype=np.uint8) 30 | teY = loaded[8:].reshape((10000)) 31 | 32 | trX = trX/255. 33 | teX = teX/255. 34 | 35 | trX = trX[:ntrain] 36 | trY = trY[:ntrain] 37 | 38 | teX = teX[:ntest] 39 | teY = teY[:ntest] 40 | 41 | if onehot: 42 | trY = one_hot(trY, 10) 43 | teY = one_hot(teY, 10) 44 | else: 45 | trY = np.asarray(trY) 46 | teY = np.asarray(teY) 47 | 48 | return trX,teX,trY,teY --------------------------------------------------------------------------------