├── .gitignore
├── 0_multiply.py
├── 1_linear_regression.py
├── 2_logistic_regression.py
├── 3_net.py
├── 4_modern_net.py
├── 5_convolutional_net.py
├── LICENSE
├── README.md
├── download_mnist.sh
└── load.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # PyInstaller
26 | #  Usually these files are written by a python script from a template
27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 | 
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 | 
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 | 
43 | # Translations
44 | *.mo
45 | *.pot
46 | 
47 | # Django stuff:
48 | *.log
49 | 
50 | # Sphinx documentation
51 | docs/_build/
52 | 
53 | # PyBuilder
54 | target/
55 | 


--------------------------------------------------------------------------------
/0_multiply.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | from theano import tensor as T
 3 | 
 4 | a = T.scalar()
 5 | b = T.scalar()
 6 | 
 7 | y = a * b
 8 | 
 9 | multiply = theano.function(inputs=[a, b], outputs=y)
10 | 
11 | print multiply(1, 2) #2
12 | print multiply(3, 3) #9
13 | 
14 | 


--------------------------------------------------------------------------------
/1_linear_regression.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | from theano import tensor as T
 3 | import numpy as np
 4 | 
 5 | trX = np.linspace(-1, 1, 101)
 6 | trY = 2 * trX + np.random.randn(*trX.shape) * 0.33
 7 | 
 8 | X = T.scalar()
 9 | Y = T.scalar()
10 | 
11 | def model(X, w):
12 |     return X * w
13 | 
14 | w = theano.shared(np.asarray(0., dtype=theano.config.floatX))
15 | y = model(X, w)
16 | 
17 | cost = T.mean(T.sqr(y - Y))
18 | gradient = T.grad(cost=cost, wrt=w)
19 | updates = [[w, w - gradient * 0.01]]
20 | 
21 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
22 | 
23 | for i in range(100):
24 |     for x, y in zip(trX, trY):
25 |         train(x, y)
26 |         
27 | print w.get_value() #something around 2
28 | 
29 | 


--------------------------------------------------------------------------------
/2_logistic_regression.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | from theano import tensor as T
 3 | import numpy as np
 4 | from load import mnist
 5 | 
 6 | def floatX(X):
 7 |     return np.asarray(X, dtype=theano.config.floatX)
 8 | 
 9 | def init_weights(shape):
10 |     return theano.shared(floatX(np.random.randn(*shape) * 0.01))
11 | 
12 | def model(X, w):
13 |     return T.nnet.softmax(T.dot(X, w))
14 | 
15 | trX, teX, trY, teY = mnist(onehot=True)
16 | 
17 | X = T.fmatrix()
18 | Y = T.fmatrix()
19 | 
20 | w = init_weights((784, 10))
21 | 
22 | py_x = model(X, w)
23 | y_pred = T.argmax(py_x, axis=1)
24 | 
25 | cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
26 | gradient = T.grad(cost=cost, wrt=w)
27 | update = [[w, w - gradient * 0.05]]
28 | 
29 | train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
30 | predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
31 | 
32 | for i in range(100):
33 |     for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
34 |         cost = train(trX[start:end], trY[start:end])
35 |     print i, np.mean(np.argmax(teY, axis=1) == predict(teX))
36 | 
37 | 


--------------------------------------------------------------------------------
/3_net.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | from theano import tensor as T
 3 | import numpy as np
 4 | from load import mnist
 5 | from foxhound.utils.vis import grayscale_grid_vis, unit_scale
 6 | from scipy.misc import imsave
 7 | 
 8 | def floatX(X):
 9 |     return np.asarray(X, dtype=theano.config.floatX)
10 | 
11 | def init_weights(shape):
12 |     return theano.shared(floatX(np.random.randn(*shape) * 0.01))
13 | 
14 | def sgd(cost, params, lr=0.05):
15 |     grads = T.grad(cost=cost, wrt=params)
16 |     updates = []
17 |     for p, g in zip(params, grads):
18 |         updates.append([p, p - g * lr])
19 |     return updates
20 | 
21 | def model(X, w_h, w_o):
22 |     h = T.nnet.sigmoid(T.dot(X, w_h))
23 |     pyx = T.nnet.softmax(T.dot(h, w_o))
24 |     return pyx
25 | 
26 | trX, teX, trY, teY = mnist(onehot=True)
27 | 
28 | X = T.fmatrix()
29 | Y = T.fmatrix()
30 | 
31 | w_h = init_weights((784, 625))
32 | w_o = init_weights((625, 10))
33 | 
34 | py_x = model(X, w_h, w_o)
35 | y_x = T.argmax(py_x, axis=1)
36 | 
37 | cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
38 | params = [w_h, w_o]
39 | updates = sgd(cost, params)
40 | 
41 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
42 | predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
43 | 
44 | for i in range(100):
45 |     for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
46 |         cost = train(trX[start:end], trY[start:end])
47 |     print np.mean(np.argmax(teY, axis=1) == predict(teX))
48 | 
49 | 


--------------------------------------------------------------------------------
/4_modern_net.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | from theano import tensor as T
 3 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 4 | import numpy as np
 5 | from load import mnist
 6 | 
 7 | srng = RandomStreams()
 8 | 
 9 | def floatX(X):
10 |     return np.asarray(X, dtype=theano.config.floatX)
11 | 
12 | def init_weights(shape):
13 |     return theano.shared(floatX(np.random.randn(*shape) * 0.01))
14 | 
15 | def rectify(X):
16 |     return T.maximum(X, 0.)
17 | 
18 | def softmax(X):
19 |     e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
20 |     return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
21 | 
22 | def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
23 |     grads = T.grad(cost=cost, wrt=params)
24 |     updates = []
25 |     for p, g in zip(params, grads):
26 |         acc = theano.shared(p.get_value() * 0.)
27 |         acc_new = rho * acc + (1 - rho) * g ** 2
28 |         gradient_scaling = T.sqrt(acc_new + epsilon)
29 |         g = g / gradient_scaling
30 |         updates.append((acc, acc_new))
31 |         updates.append((p, p - lr * g))
32 |     return updates
33 | 
34 | def dropout(X, p=0.):
35 |     if p > 0:
36 |         retain_prob = 1 - p
37 |         X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
38 |         X /= retain_prob
39 |     return X
40 | 
41 | def model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
42 |     X = dropout(X, p_drop_input)
43 |     h = rectify(T.dot(X, w_h))
44 | 
45 |     h = dropout(h, p_drop_hidden)
46 |     h2 = rectify(T.dot(h, w_h2))
47 | 
48 |     h2 = dropout(h2, p_drop_hidden)
49 |     py_x = softmax(T.dot(h2, w_o))
50 |     return h, h2, py_x
51 | 
52 | trX, teX, trY, teY = mnist(onehot=True)
53 | 
54 | X = T.fmatrix()
55 | Y = T.fmatrix()
56 | 
57 | w_h = init_weights((784, 625))
58 | w_h2 = init_weights((625, 625))
59 | w_o = init_weights((625, 10))
60 | 
61 | noise_h, noise_h2, noise_py_x = model(X, w_h, w_h2, w_o, 0.2, 0.5)
62 | h, h2, py_x = model(X, w_h, w_h2, w_o, 0., 0.)
63 | y_x = T.argmax(py_x, axis=1)
64 | 
65 | cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
66 | params = [w_h, w_h2, w_o]
67 | updates = RMSprop(cost, params, lr=0.001)
68 | 
69 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
70 | predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
71 | 
72 | for i in range(100):
73 |     for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
74 |         cost = train(trX[start:end], trY[start:end])
75 |     print np.mean(np.argmax(teY, axis=1) == predict(teX))
76 | 
77 | 


--------------------------------------------------------------------------------
/5_convolutional_net.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | from theano import tensor as T
 3 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 4 | import numpy as np
 5 | from load import mnist
 6 | from theano.tensor.nnet.conv import conv2d
 7 | from theano.tensor.signal.downsample import max_pool_2d
 8 | 
 9 | srng = RandomStreams()
10 | 
11 | def floatX(X):
12 |     return np.asarray(X, dtype=theano.config.floatX)
13 | 
14 | def init_weights(shape):
15 |     return theano.shared(floatX(np.random.randn(*shape) * 0.01))
16 | 
17 | def rectify(X):
18 |     return T.maximum(X, 0.)
19 | 
20 | def softmax(X):
21 |     e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
22 |     return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
23 | 
24 | def dropout(X, p=0.):
25 |     if p > 0:
26 |         retain_prob = 1 - p
27 |         X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
28 |         X /= retain_prob
29 |     return X
30 | 
31 | def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
32 |     grads = T.grad(cost=cost, wrt=params)
33 |     updates = []
34 |     for p, g in zip(params, grads):
35 |         acc = theano.shared(p.get_value() * 0.)
36 |         acc_new = rho * acc + (1 - rho) * g ** 2
37 |         gradient_scaling = T.sqrt(acc_new + epsilon)
38 |         g = g / gradient_scaling
39 |         updates.append((acc, acc_new))
40 |         updates.append((p, p - lr * g))
41 |     return updates
42 | 
43 | def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden):
44 |     l1a = rectify(conv2d(X, w, border_mode='full'))
45 |     l1 = max_pool_2d(l1a, (2, 2))
46 |     l1 = dropout(l1, p_drop_conv)
47 | 
48 |     l2a = rectify(conv2d(l1, w2))
49 |     l2 = max_pool_2d(l2a, (2, 2))
50 |     l2 = dropout(l2, p_drop_conv)
51 | 
52 |     l3a = rectify(conv2d(l2, w3))
53 |     l3b = max_pool_2d(l3a, (2, 2))
54 |     l3 = T.flatten(l3b, outdim=2)
55 |     l3 = dropout(l3, p_drop_conv)
56 | 
57 |     l4 = rectify(T.dot(l3, w4))
58 |     l4 = dropout(l4, p_drop_hidden)
59 | 
60 |     pyx = softmax(T.dot(l4, w_o))
61 |     return l1, l2, l3, l4, pyx
62 | 
63 | trX, teX, trY, teY = mnist(onehot=True)
64 | 
65 | trX = trX.reshape(-1, 1, 28, 28)
66 | teX = teX.reshape(-1, 1, 28, 28)
67 | 
68 | X = T.ftensor4()
69 | Y = T.fmatrix()
70 | 
71 | w = init_weights((32, 1, 3, 3))
72 | w2 = init_weights((64, 32, 3, 3))
73 | w3 = init_weights((128, 64, 3, 3))
74 | w4 = init_weights((128 * 3 * 3, 625))
75 | w_o = init_weights((625, 10))
76 | 
77 | noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)
78 | l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
79 | y_x = T.argmax(py_x, axis=1)
80 | 
81 | 
82 | cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
83 | params = [w, w2, w3, w4, w_o]
84 | updates = RMSprop(cost, params, lr=0.001)
85 | 
86 | train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
87 | predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
88 | 
89 | for i in range(100):
90 |     for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
91 |         cost = train(trX[start:end], trY[start:end])
92 |     print np.mean(np.argmax(teY, axis=1) == predict(teX))
93 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Alec Radford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Theano-Tutorials
 2 | ================
 3 | 
 4 | Bare bones introduction to machine learning from linear regression to convolutional neural networks using Theano.
 5 | 
 6 | ***Dataset***
 7 | It's worth noting that this library assumes that the reader has access to the mnist dataset. This dataset is freely available and is accessible through Yann LeCun's [personal website](http://yann.lecun.com/exdb/mnist/).
 8 | 
 9 | If you want to automate the download of the dataset, there is an included file that will do this for you. Simply run the following:
10 | `sudo ./download_mnist.sh`
11 | 
12 | ***Known Issues***
13 | `Library not loaded: /usr/local/opt/openssl/lib/libssl.1.0.0.dylib`
14 | This results from a broken openssl installation on mac. It can be fixed by uninstalling and reinstalling openssl:
15 | `sudo brew remove openssl`
16 | `brew install openssl`
17 | 


--------------------------------------------------------------------------------
/download_mnist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p /media/datasets/mnist
 4 | 
 5 | if ! [ -e /media/datasets/mnist/train-images-idx3-ubyte.gz ]
 6 | 	then
 7 | 		wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
 8 | fi
 9 | gzip -d /media/datasets/mnist/train-images-idx3-ubyte.gz
10 | 
11 | if ! [ -e /media/datasets/mnist/train-labels-idx1-ubyte.gz ]
12 | 	then
13 | 		wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
14 | fi
15 | gzip -d /media/datasets/mnist/train-labels-idx1-ubyte.gz
16 | 
17 | if ! [ -e /media/datasets/mnist/t10k-images-idx3-ubyte.gz ]
18 | 	then
19 | 		wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
20 | fi
21 | gzip -d /media/datasets/mnist/t10k-images-idx3-ubyte.gz
22 | 
23 | if ! [ -e /media/datasets/mnist/t10k-labels-idx1-ubyte.gz ]
24 | 	then
25 | 		wget -P /media/datasets/mnist/ http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
26 | fi
27 | gzip -d /media/datasets/mnist/t10k-labels-idx1-ubyte.gz
28 | 


--------------------------------------------------------------------------------
/load.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | datasets_dir = '/media/datasets/'
 5 | 
 6 | def one_hot(x,n):
 7 | 	if type(x) == list:
 8 | 		x = np.array(x)
 9 | 	x = x.flatten()
10 | 	o_h = np.zeros((len(x),n))
11 | 	o_h[np.arange(len(x)),x] = 1
12 | 	return o_h
13 | 
14 | def mnist(ntrain=60000,ntest=10000,onehot=True):
15 | 	data_dir = os.path.join(datasets_dir,'mnist/')
16 | 	fd = open(os.path.join(data_dir,'train-images-idx3-ubyte'))
17 | 	loaded = np.fromfile(file=fd,dtype=np.uint8)
18 | 	trX = loaded[16:].reshape((60000,28*28)).astype(float)
19 | 
20 | 	fd = open(os.path.join(data_dir,'train-labels-idx1-ubyte'))
21 | 	loaded = np.fromfile(file=fd,dtype=np.uint8)
22 | 	trY = loaded[8:].reshape((60000))
23 | 
24 | 	fd = open(os.path.join(data_dir,'t10k-images-idx3-ubyte'))
25 | 	loaded = np.fromfile(file=fd,dtype=np.uint8)
26 | 	teX = loaded[16:].reshape((10000,28*28)).astype(float)
27 | 
28 | 	fd = open(os.path.join(data_dir,'t10k-labels-idx1-ubyte'))
29 | 	loaded = np.fromfile(file=fd,dtype=np.uint8)
30 | 	teY = loaded[8:].reshape((10000))
31 | 
32 | 	trX = trX/255.
33 | 	teX = teX/255.
34 | 
35 | 	trX = trX[:ntrain]
36 | 	trY = trY[:ntrain]
37 | 
38 | 	teX = teX[:ntest]
39 | 	teY = teY[:ntest]
40 | 
41 | 	if onehot:
42 | 		trY = one_hot(trY, 10)
43 | 		teY = one_hot(teY, 10)
44 | 	else:
45 | 		trY = np.asarray(trY)
46 | 		teY = np.asarray(teY)
47 | 
48 | 	return trX,teX,trY,teY


--------------------------------------------------------------------------------