├── .gitignore ├── 1_multiply.py ├── 2_linear_regression.py ├── 3_logistic_regression.py ├── 4_net.py ├── 5_fancy_net.py ├── 6_convnet.py ├── 6_convnet_color.py ├── 6_convnet_dropout.py ├── 7_convnet_lasagne.py ├── LICENSE ├── README.md ├── data └── .dummy ├── download_cifar10.py ├── download_cifar100.py ├── load.py ├── plot_utils.py └── theano_tutorial.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /1_multiply.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | 4 | a = T.scalar() 5 | b = T.scalar() 6 | 7 | y = a * b 8 | 9 | f = theano.function([a, b], y) 10 | 11 | print f(1, 2) # 2 12 | print f(3, 3) # 9 -------------------------------------------------------------------------------- /2_linear_regression.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | plt.ion() 6 | 7 | # create artificial training data 8 | x_train = np.linspace(-1, 1, 101) 9 | t_train = 2 * x_train + np.random.randn(*x_train.shape) * 0.33 10 | 11 | 12 | # plot data 13 | plt.scatter(x_train, t_train) 14 | 15 | 16 | # define symbolic Theano variables 17 | x = T.scalar() 18 | t = T.scalar() 19 | 20 | 21 | # define model: linear regression 22 | def model(x, w): 23 | return x * w 24 | 25 | w = theano.shared(0.0) 26 | y = model(x, w) 27 | 28 | cost = T.mean((t - y) ** 2) 29 | g = T.grad(cost, w) 30 | updates = [(w, w - g * 0.01)] 31 | 32 | 33 | # compile theano function 34 | train = theano.function([x, t], cost, updates=updates) 35 | 36 | 37 | # train model 38 | for i in range(20): 39 | print "iteration %d" % (i + 1) 40 | for x, t in zip(x_train, t_train): 41 | train(x, t) 42 | 43 | print "w = %.8f" % w.get_value() 44 | print 45 | 46 | 47 | # plot fitted line 48 | plt.plot(x_train, w.get_value() * x_train) -------------------------------------------------------------------------------- /3_logistic_regression.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | plt.ion() 6 | 7 | import load 8 | 9 | 10 | # load data 11 | x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) 12 | labels_test = np.argmax(t_test, axis=1) 13 | 14 | 15 | # visualize data 16 | plt.imshow(x_train[0].reshape(32, 32), cmap=plt.cm.gray) 17 | 18 | 19 | # define symbolic Theano variables 20 | x = T.matrix() 21 | t = T.matrix() 22 | 23 | 24 | # define model: logistic regression 25 | def floatX(x): 26 | return np.asarray(x, dtype=theano.config.floatX) 27 | 28 | def init_weights(shape): 29 | return theano.shared(floatX(np.random.randn(*shape) * 0.1)) 30 | 31 | def model(x, w): 32 | return T.nnet.softmax(T.dot(x, w)) 33 | 34 | w = init_weights((32 * 32, 10)) 35 | 36 | p_y_given_x = model(x, w) 37 | y = T.argmax(p_y_given_x, axis=1) 38 | 39 | cost = T.mean(T.nnet.categorical_crossentropy(p_y_given_x, t)) 40 | g = T.grad(cost, w) 41 | updates = [(w, w - g * 0.001)] 42 | 43 | 44 | # compile theano functions 45 | train = theano.function([x, t], cost, updates=updates) 46 | predict = theano.function([x], y) 47 | 48 | 49 | # train model 50 | batch_size = 50 51 | for i in range(100): 52 | print "iteration %d" % (i + 1) 53 | for start in range(0, len(x_train), batch_size): 54 | x_batch = x_train[start:start + batch_size] 55 | t_batch = t_train[start:start + batch_size] 56 | cost = train(x_batch, t_batch) 57 | 58 | predictions_test = predict(x_test) 59 | accuracy = np.mean(predictions_test == labels_test) 60 | print "accuracy: %.5f" % accuracy 61 | print -------------------------------------------------------------------------------- /4_net.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | plt.ion() 6 | 7 | import load 8 | 9 | 10 | # load data 11 | x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) 12 | labels_test = np.argmax(t_test, axis=1) 13 | 14 | 15 | # define symbolic Theano variables 16 | x = T.matrix() 17 | t = T.matrix() 18 | 19 | 20 | # define model: neural network 21 | def floatX(x): 22 | return np.asarray(x, dtype=theano.config.floatX) 23 | 24 | def init_weights(shape): 25 | return theano.shared(floatX(np.random.randn(*shape) * 0.1)) 26 | 27 | def sgd(cost, params, learning_rate): 28 | grads = T.grad(cost, params) 29 | updates = [] 30 | for p, g in zip(params, grads): 31 | updates.append([p, p - g * learning_rate]) 32 | return updates 33 | 34 | def model(x, w_h, w_o): 35 | h = T.maximum(0, T.dot(x, w_h)) 36 | p_y_given_x = T.nnet.softmax(T.dot(h, w_o)) 37 | return p_y_given_x 38 | 39 | w_h = init_weights((32 * 32, 100)) 40 | w_o = init_weights((100, 10)) 41 | 42 | p_y_given_x = model(x, w_h, w_o) 43 | y = T.argmax(p_y_given_x, axis=1) 44 | 45 | cost = T.mean(T.nnet.categorical_crossentropy(p_y_given_x, t)) 46 | params = [w_h, w_o] 47 | updates = sgd(cost, params, learning_rate=0.01) 48 | 49 | 50 | # compile theano functions 51 | train = theano.function([x, t], cost, updates=updates) 52 | predict = theano.function([x], y) 53 | 54 | 55 | # train model 56 | batch_size = 50 57 | for i in range(50): 58 | print "iteration %d" % (i + 1) 59 | for start in range(0, len(x_train), batch_size): 60 | x_batch = x_train[start:start + batch_size] 61 | t_batch = t_train[start:start + batch_size] 62 | cost = train(x_batch, t_batch) 63 | 64 | predictions_test = predict(x_test) 65 | accuracy = np.mean(predictions_test == labels_test) 66 | print "accuracy: %.5f" % accuracy 67 | print 68 | 69 | -------------------------------------------------------------------------------- /5_fancy_net.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | plt.ion() 6 | 7 | import load 8 | 9 | 10 | # load data 11 | x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) 12 | labels_test = np.argmax(t_test, axis=1) 13 | 14 | 15 | # define symbolic Theano variables 16 | x = T.matrix() 17 | t = T.matrix() 18 | 19 | 20 | # define model: neural network 21 | def floatX(x): 22 | return np.asarray(x, dtype=theano.config.floatX) 23 | 24 | def init_weights(shape): 25 | return theano.shared(floatX(np.random.randn(*shape) * 0.1)) 26 | 27 | def momentum(cost, params, learning_rate, momentum): 28 | grads = theano.grad(cost, params) 29 | updates = [] 30 | 31 | for p, g in zip(params, grads): 32 | mparam_i = theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) 33 | v = momentum * mparam_i - learning_rate * g 34 | updates.append((mparam_i, v)) 35 | updates.append((p, p + v)) 36 | 37 | return updates 38 | 39 | def model(x, w_h1, b_h1, w_h2, b_h2, w_o, b_o): 40 | h1 = T.maximum(0, T.dot(x, w_h1) + b_h1) 41 | h2 = T.maximum(0, T.dot(h1, w_h2) + b_h2) 42 | p_y_given_x = T.nnet.softmax(T.dot(h2, w_o) + b_o) 43 | return p_y_given_x 44 | 45 | w_h1 = init_weights((32 * 32, 100)) 46 | b_h1 = init_weights((100,)) 47 | w_h2 = init_weights((100, 100)) 48 | b_h2 = init_weights((100,)) 49 | w_o = init_weights((100, 10)) 50 | b_o = init_weights((10,)) 51 | 52 | params = [w_h1, b_h1, w_h2, b_h2, w_o, b_o] 53 | 54 | p_y_given_x = model(x, *params) 55 | y = T.argmax(p_y_given_x, axis=1) 56 | 57 | cost = T.mean(T.nnet.categorical_crossentropy(p_y_given_x, t)) 58 | 59 | updates = momentum(cost, params, learning_rate=0.001, momentum=0.9) 60 | 61 | 62 | # compile theano functions 63 | train = theano.function([x, t], cost, updates=updates) 64 | predict = theano.function([x], y) 65 | 66 | 67 | # train model 68 | batch_size = 50 69 | 70 | for i in range(50): 71 | print "iteration %d" % (i + 1) 72 | for start in range(0, len(x_train), batch_size): 73 | x_batch = x_train[start:start + batch_size] 74 | t_batch = t_train[start:start + batch_size] 75 | cost = train(x_batch, t_batch) 76 | 77 | predictions_test = predict(x_test) 78 | accuracy = np.mean(predictions_test == labels_test) 79 | print "accuracy: %.5f" % accuracy 80 | print 81 | 82 | -------------------------------------------------------------------------------- /6_convnet.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | plt.ion() 6 | 7 | import load 8 | 9 | from theano.tensor.nnet.conv import conv2d 10 | from theano.tensor.signal.downsample import max_pool_2d 11 | 12 | 13 | # load data 14 | x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) 15 | labels_test = np.argmax(t_test, axis=1) 16 | 17 | 18 | # reshape data 19 | x_train = x_train.reshape((x_train.shape[0], 1, 32, 32)) 20 | x_test = x_test.reshape((x_test.shape[0], 1, 32, 32)) 21 | 22 | 23 | # define symbolic Theano variables 24 | x = T.tensor4() 25 | t = T.matrix() 26 | 27 | 28 | # define model: neural network 29 | def floatX(x): 30 | return np.asarray(x, dtype=theano.config.floatX) 31 | 32 | def init_weights(shape): 33 | return theano.shared(floatX(np.random.randn(*shape) * 0.1)) 34 | 35 | def momentum(cost, params, learning_rate, momentum): 36 | grads = theano.grad(cost, params) 37 | updates = [] 38 | 39 | for p, g in zip(params, grads): 40 | mparam_i = theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) 41 | v = momentum * mparam_i - learning_rate * g 42 | updates.append((mparam_i, v)) 43 | updates.append((p, p + v)) 44 | 45 | return updates 46 | 47 | def model(x, w_c1, b_c1, w_c2, b_c2, w_h3, b_h3, w_o, b_o): 48 | c1 = T.maximum(0, conv2d(x, w_c1) + b_c1.dimshuffle('x', 0, 'x', 'x')) 49 | p1 = max_pool_2d(c1, (3, 3)) 50 | 51 | c2 = T.maximum(0, conv2d(p1, w_c2) + b_c2.dimshuffle('x', 0, 'x', 'x')) 52 | p2 = max_pool_2d(c2, (2, 2)) 53 | 54 | p2_flat = p2.flatten(2) 55 | h3 = T.maximum(0, T.dot(p2_flat, w_h3) + b_h3) 56 | p_y_given_x = T.nnet.softmax(T.dot(h3, w_o) + b_o) 57 | return p_y_given_x 58 | 59 | w_c1 = init_weights((4, 1, 3, 3)) 60 | b_c1 = init_weights((4,)) 61 | w_c2 = init_weights((8, 4, 3, 3)) 62 | b_c2 = init_weights((8,)) 63 | w_h3 = init_weights((8 * 4 * 4, 100)) 64 | b_h3 = init_weights((100,)) 65 | w_o = init_weights((100, 10)) 66 | b_o = init_weights((10,)) 67 | 68 | params = [w_c1, b_c1, w_c2, b_c2, w_h3, b_h3, w_o, b_o] 69 | 70 | p_y_given_x = model(x, *params) 71 | y = T.argmax(p_y_given_x, axis=1) 72 | 73 | cost = T.mean(T.nnet.categorical_crossentropy(p_y_given_x, t)) 74 | 75 | updates = momentum(cost, params, learning_rate=0.01, momentum=0.9) 76 | 77 | 78 | # compile theano functions 79 | train = theano.function([x, t], cost, updates=updates) 80 | predict = theano.function([x], y) 81 | 82 | 83 | # train model 84 | batch_size = 50 85 | 86 | for i in range(50): 87 | print "iteration %d" % (i + 1) 88 | for start in range(0, len(x_train), batch_size): 89 | x_batch = x_train[start:start + batch_size] 90 | t_batch = t_train[start:start + batch_size] 91 | cost = train(x_batch, t_batch) 92 | 93 | predictions_test = predict(x_test) 94 | accuracy = np.mean(predictions_test == labels_test) 95 | print "accuracy: %.5f" % accuracy 96 | print 97 | 98 | -------------------------------------------------------------------------------- /6_convnet_color.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | 5 | import load 6 | 7 | from theano.tensor.nnet.conv import conv2d 8 | from theano.tensor.signal.downsample import max_pool_2d 9 | 10 | 11 | # load data 12 | x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX, grayscale=False) 13 | labels_test = np.argmax(t_test, axis=1) 14 | 15 | 16 | # reshape data 17 | x_train = x_train.reshape((x_train.shape[0], 3, 32, 32)) 18 | x_test = x_test.reshape((x_test.shape[0], 3, 32, 32)) 19 | 20 | 21 | # define symbolic Theano variables 22 | x = T.tensor4() 23 | t = T.matrix() 24 | 25 | 26 | # define model: neural network 27 | def floatX(x): 28 | return np.asarray(x, dtype=theano.config.floatX) 29 | 30 | def init_weights(shape): 31 | return theano.shared(floatX(np.random.randn(*shape) * 0.1)) 32 | 33 | def momentum(cost, params, learning_rate, momentum): 34 | grads = theano.grad(cost, params) 35 | updates = [] 36 | 37 | for p, g in zip(params, grads): 38 | mparam_i = theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) 39 | v = momentum * mparam_i - learning_rate * g 40 | updates.append((mparam_i, v)) 41 | updates.append((p, p + v)) 42 | 43 | return updates 44 | 45 | def model(x, w_c1, b_c1, w_c2, b_c2, w_h3, b_h3, w_o, b_o): 46 | c1 = T.maximum(0, conv2d(x, w_c1) + b_c1.dimshuffle('x', 0, 'x', 'x')) 47 | p1 = max_pool_2d(c1, (3, 3)) 48 | 49 | c2 = T.maximum(0, conv2d(p1, w_c2) + b_c2.dimshuffle('x', 0, 'x', 'x')) 50 | p2 = max_pool_2d(c2, (2, 2)) 51 | 52 | p2_flat = p2.flatten(2) 53 | h3 = T.maximum(0, T.dot(p2_flat, w_h3) + b_h3) 54 | p_y_given_x = T.nnet.softmax(T.dot(h3, w_o) + b_o) 55 | return p_y_given_x 56 | 57 | w_c1 = init_weights((4, 3, 3, 3)) 58 | b_c1 = init_weights((4,)) 59 | w_c2 = init_weights((8, 4, 3, 3)) 60 | b_c2 = init_weights((8,)) 61 | w_h3 = init_weights((8 * 4 * 4, 100)) 62 | b_h3 = init_weights((100,)) 63 | w_o = init_weights((100, 10)) 64 | b_o = init_weights((10,)) 65 | 66 | params = [w_c1, b_c1, w_c2, b_c2, w_h3, b_h3, w_o, b_o] 67 | 68 | p_y_given_x = model(x, *params) 69 | y = T.argmax(p_y_given_x, axis=1) 70 | 71 | cost = T.mean(T.nnet.categorical_crossentropy(p_y_given_x, t)) 72 | 73 | updates = momentum(cost, params, learning_rate=0.01, momentum=0.9) 74 | 75 | 76 | # compile theano functions 77 | train = theano.function([x, t], cost, updates=updates) 78 | predict = theano.function([x], y) 79 | 80 | 81 | # train model 82 | batch_size = 50 83 | 84 | for i in range(50): 85 | print "iteration %d" % (i + 1) 86 | for start in range(0, len(x_train), batch_size): 87 | x_batch = x_train[start:start + batch_size] 88 | t_batch = t_train[start:start + batch_size] 89 | cost = train(x_batch, t_batch) 90 | 91 | predictions_test = predict(x_test) 92 | accuracy = np.mean(predictions_test == labels_test) 93 | print "accuracy: %.5f" % accuracy 94 | print 95 | 96 | -------------------------------------------------------------------------------- /6_convnet_dropout.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | 5 | import load 6 | 7 | from theano.tensor.nnet.conv import conv2d 8 | from theano.tensor.signal.downsample import max_pool_2d 9 | 10 | # from theano.tensor.shared_randomstreams import RandomStreams 11 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 12 | srng = RandomStreams() 13 | 14 | 15 | # load data 16 | x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) 17 | labels_test = np.argmax(t_test, axis=1) 18 | 19 | 20 | # reshape data 21 | x_train = x_train.reshape((x_train.shape[0], 1, 32, 32)) 22 | x_test = x_test.reshape((x_test.shape[0], 1, 32, 32)) 23 | 24 | 25 | # define symbolic Theano variables 26 | x = T.tensor4() 27 | t = T.matrix() 28 | 29 | 30 | # define model: neural network 31 | def floatX(x): 32 | return np.asarray(x, dtype=theano.config.floatX) 33 | 34 | def init_weights(shape): 35 | return theano.shared(floatX(np.random.randn(*shape) * 0.1)) 36 | 37 | def momentum(cost, params, learning_rate, momentum): 38 | grads = theano.grad(cost, params) 39 | updates = [] 40 | 41 | for p, g in zip(params, grads): 42 | mparam_i = theano.shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) 43 | v = momentum * mparam_i - learning_rate * g 44 | updates.append((mparam_i, v)) 45 | updates.append((p, p + v)) 46 | 47 | return updates 48 | 49 | def dropout(x, p=0.): 50 | if p > 0: 51 | retain_prob = 1 - p 52 | x *= srng.binomial(x.shape, p=retain_prob, dtype=theano.config.floatX) 53 | x /= retain_prob 54 | return x 55 | 56 | def model(x, w_c1, b_c1, w_c2, b_c2, w_h3, b_h3, w_o, b_o, p=0.0): 57 | c1 = T.maximum(0, conv2d(x, w_c1) + b_c1.dimshuffle('x', 0, 'x', 'x')) 58 | p1 = max_pool_2d(c1, (3, 3)) 59 | 60 | c2 = T.maximum(0, conv2d(p1, w_c2) + b_c2.dimshuffle('x', 0, 'x', 'x')) 61 | p2 = max_pool_2d(c2, (2, 2)) 62 | 63 | p2_flat = p2.flatten(2) 64 | p2_flat = dropout(p2_flat, p=p) 65 | h3 = T.maximum(0, T.dot(p2_flat, w_h3) + b_h3) 66 | p_y_given_x = T.nnet.softmax(T.dot(h3, w_o) + b_o) 67 | return p_y_given_x 68 | 69 | w_c1 = init_weights((4, 1, 3, 3)) 70 | b_c1 = init_weights((4,)) 71 | w_c2 = init_weights((8, 4, 3, 3)) 72 | b_c2 = init_weights((8,)) 73 | w_h3 = init_weights((8 * 4 * 4, 100)) 74 | b_h3 = init_weights((100,)) 75 | w_o = init_weights((100, 10)) 76 | b_o = init_weights((10,)) 77 | 78 | params = [w_c1, b_c1, w_c2, b_c2, w_h3, b_h3, w_o, b_o] 79 | 80 | p_y_given_x_train = model(x, *params, p=0.5) 81 | p_y_given_x_test = model(x, *params, p=0.0) 82 | y_train = T.argmax(p_y_given_x_train, axis=1) 83 | y_test = T.argmax(p_y_given_x_test, axis=1) 84 | 85 | cost_train = T.mean(T.nnet.categorical_crossentropy(p_y_given_x_train, t)) 86 | 87 | updates = momentum(cost_train, params, learning_rate=0.01, momentum=0.9) 88 | 89 | 90 | # compile theano functions 91 | train = theano.function([x, t], cost_train, updates=updates) 92 | predict = theano.function([x], y_test) 93 | 94 | 95 | # train model 96 | batch_size = 50 97 | 98 | for i in range(50): 99 | print "iteration %d" % (i + 1) 100 | for start in range(0, len(x_train), batch_size): 101 | x_batch = x_train[start:start + batch_size] 102 | t_batch = t_train[start:start + batch_size] 103 | cost = train(x_batch, t_batch) 104 | 105 | predictions_test = predict(x_test) 106 | accuracy = np.mean(predictions_test == labels_test) 107 | print "accuracy: %.5f" % accuracy 108 | print 109 | 110 | -------------------------------------------------------------------------------- /7_convnet_lasagne.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | plt.ion() 6 | 7 | import load 8 | 9 | import lasagne as nn 10 | 11 | # load data 12 | x_train, t_train, x_test, t_test = load.cifar10(dtype=theano.config.floatX) 13 | labels_test = np.argmax(t_test, axis=1) 14 | 15 | 16 | # reshape data 17 | x_train = x_train.reshape((x_train.shape[0], 1, 32, 32)) 18 | x_test = x_test.reshape((x_test.shape[0], 1, 32, 32)) 19 | 20 | 21 | # define model: neural network 22 | l_in = nn.layers.InputLayer((None, 1, 32, 32)) 23 | 24 | l_conv1 = nn.layers.Conv2DLayer(l_in, num_filters=4, filter_size=(3, 3)) 25 | l_pool1 = nn.layers.MaxPool2DLayer(l_conv1, ds=(3, 3)) 26 | 27 | l_conv2 = nn.layers.Conv2DLayer(l_pool1, num_filters=8, filter_size=(3, 3)) 28 | l_pool2 = nn.layers.MaxPool2DLayer(l_conv2, ds=(2, 2)) 29 | 30 | l3 = nn.layers.DenseLayer(nn.layers.dropout(l_pool2, p=0.5), num_units=100) 31 | 32 | l_out = nn.layers.DenseLayer(l3, num_units=10, nonlinearity=T.nnet.softmax) 33 | 34 | objective = nn.objectives.Objective(l_out, loss_function=nn.objectives.multinomial_nll) 35 | 36 | cost_train = objective.get_loss() 37 | p_y_given_x = l_out.get_output(deterministic=True) 38 | y = T.argmax(p_y_given_x, axis=1) 39 | 40 | 41 | params = nn.layers.get_all_params(l_out) 42 | updates = nn.updates.momentum(cost_train, params, learning_rate=0.01, momentum=0.9) 43 | 44 | 45 | # compile theano functions 46 | train = theano.function([l_in.input_var, objective.target_var], cost_train, updates=updates) 47 | predict = theano.function([l_in.input_var], y) 48 | 49 | 50 | # train model 51 | batch_size = 50 52 | 53 | for i in range(50): 54 | print "iteration %d" % (i + 1) 55 | for start in range(0, len(x_train), batch_size): 56 | x_batch = x_train[start:start + batch_size] 57 | t_batch = t_train[start:start + batch_size] 58 | cost = train(x_batch, t_batch) 59 | 60 | predictions_test = predict(x_test) 61 | accuracy = np.mean(predictions_test == labels_test) 62 | print "accuracy: %.5f" % accuracy 63 | print 64 | 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Sander Dieleman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # theano-tutorial 2 | Reslab Theano tutorial (10 February 2015) 3 | 4 | This repository hosts the code for the Reslab tutorial on Theano and deep learning. During the tutorial, this repository will be updated with solutions. 5 | 6 | The code is heavily based on Alec Radford's Theano tutorial, which can be found at https://github.com/newmu/Theano-Tutorials 7 | -------------------------------------------------------------------------------- /data/.dummy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benanne/theano-tutorial/b6c4b1e443734a19eb734c314ca113be5da72f2b/data/.dummy -------------------------------------------------------------------------------- /download_cifar10.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | 4 | 5 | target_dir = "data" 6 | target_path = os.path.join(target_dir, "cifar-10-python.tar.gz") 7 | 8 | if not os.path.exists(target_dir): 9 | os.makedirs(target_dir) 10 | 11 | if not os.path.exists(target_path): 12 | print "Downloading..." 13 | urllib.urlretrieve("http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", "data/cifar-10-python.tar.gz") 14 | 15 | print "Extracting..." 16 | os.system("tar xzvf data/cifar-10-python.tar.gz -C data") 17 | 18 | print "done." -------------------------------------------------------------------------------- /download_cifar100.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | 4 | 5 | target_dir = "data" 6 | target_path = os.path.join(target_dir, "cifar-100-python.tar.gz") 7 | 8 | if not os.path.exists(target_dir): 9 | os.makedirs(target_dir) 10 | 11 | if not os.path.exists(target_path): 12 | print "Downloading..." 13 | urllib.urlretrieve("http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz", "data/cifar-100-python.tar.gz") 14 | 15 | print "Extracting..." 16 | os.system("tar xzvf data/cifar-100-python.tar.gz -C data") 17 | 18 | print "done." -------------------------------------------------------------------------------- /load.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import cPickle as pickle 4 | import glob 5 | 6 | 7 | data_dir = "data" 8 | data_dir_cifar10 = os.path.join(data_dir, "cifar-10-batches-py") 9 | data_dir_cifar100 = os.path.join(data_dir, "cifar-100-python") 10 | 11 | class_names_cifar10 = np.load(os.path.join(data_dir_cifar10, "batches.meta")) 12 | class_names_cifar100 = np.load(os.path.join(data_dir_cifar100, "meta")) 13 | 14 | 15 | def one_hot(x, n): 16 | """ 17 | convert index representation to one-hot representation 18 | """ 19 | x = np.array(x) 20 | assert x.ndim == 1 21 | return np.eye(n)[x] 22 | 23 | 24 | def _load_batch_cifar10(filename, dtype='float64'): 25 | """ 26 | load a batch in the CIFAR-10 format 27 | """ 28 | path = os.path.join(data_dir_cifar10, filename) 29 | batch = np.load(path) 30 | data = batch['data'] / 255.0 # scale between [0, 1] 31 | labels = one_hot(batch['labels'], n=10) # convert labels to one-hot representation 32 | return data.astype(dtype), labels.astype(dtype) 33 | 34 | 35 | def _grayscale(a): 36 | return a.reshape(a.shape[0], 3, 32, 32).mean(1).reshape(a.shape[0], -1) 37 | 38 | 39 | def cifar10(dtype='float64', grayscale=True): 40 | # train 41 | x_train = [] 42 | t_train = [] 43 | for k in xrange(5): 44 | x, t = _load_batch_cifar10("data_batch_%d" % (k + 1), dtype=dtype) 45 | x_train.append(x) 46 | t_train.append(t) 47 | 48 | x_train = np.concatenate(x_train, axis=0) 49 | t_train = np.concatenate(t_train, axis=0) 50 | 51 | # test 52 | x_test, t_test = _load_batch_cifar10("test_batch", dtype=dtype) 53 | 54 | if grayscale: 55 | x_train = _grayscale(x_train) 56 | x_test = _grayscale(x_test) 57 | 58 | return x_train, t_train, x_test, t_test 59 | 60 | 61 | def _load_batch_cifar100(filename, dtype='float64'): 62 | """ 63 | load a batch in the CIFAR-100 format 64 | """ 65 | path = os.path.join(data_dir_cifar100, filename) 66 | batch = np.load(path) 67 | data = batch['data'] / 255.0 68 | labels = one_hot(batch['fine_labels'], n=100) 69 | return data.astype(dtype), labels.astype(dtype) 70 | 71 | 72 | def cifar100(dtype='float64', grayscale=True): 73 | x_train, t_train = _load_batch_cifar100("train", dtype=dtype) 74 | x_test, t_test = _load_batch_cifar100("test", dtype=dtype) 75 | 76 | if grayscale: 77 | x_train = _grayscale(x_train) 78 | x_test = _grayscale(x_test) 79 | 80 | return x_train, t_train, x_test, t_test 81 | -------------------------------------------------------------------------------- /plot_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def most_square_shape(num_blocks, blockshape=(1,1)): 6 | x, y = blockshape 7 | num_x = np.ceil(np.sqrt(num_blocks * y / float(x))) 8 | num_y = np.ceil(num_blocks / num_x) 9 | return (num_x, num_y) 10 | 11 | 12 | def visualize_grid(chunk, range=None): 13 | if chunk.ndim == 4 and chunk.shape[1] == 1: 14 | # this is a chunk with one input channel, select it 15 | chunk = chunk[:, 0] 16 | 17 | if chunk.ndim != 3: 18 | raise RuntimeError("Only 3D tensors or 4D tensors with one input channel are supported as input, input dimensionality is %d" % chunk.ndim) 19 | 20 | if range is None: 21 | range = chunk.min(), chunk.max() 22 | vmin, vmax = range 23 | 24 | patch_size = chunk.shape[1:] 25 | num_x, num_y = most_square_shape(chunk.shape[0], patch_size) 26 | 27 | #pad with zeros so that the number of filters equals num_x * num_y 28 | chunk_padded = np.zeros((num_x * num_y,) + patch_size) 29 | chunk_padded[:chunk.shape[0]] = chunk 30 | 31 | chunk_split = chunk_padded.reshape(num_x, num_y, patch_size[0], patch_size[1]) 32 | chunk_with_border = np.ones((num_x, num_y, patch_size[0] + 1, patch_size[1] + 1)) * vmax 33 | chunk_with_border[:, :, :patch_size[0], :patch_size[1]] = chunk_split 34 | 35 | grid = chunk_with_border.transpose(0, 2, 1, 3).reshape(num_x * (patch_size[0] + 1), num_y * (patch_size[1] + 1)) 36 | grid_with_left_border = np.ones((num_x * (patch_size[0] + 1) + 1, num_y * (patch_size[1] + 1) + 1)) * vmax 37 | grid_with_left_border[1:, 1:] = grid 38 | 39 | plt.imshow(grid_with_left_border, interpolation='nearest', cmap=plt.cm.binary, vmin=vmin, vmax=vmax) 40 | 41 | 42 | -------------------------------------------------------------------------------- /theano_tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benanne/theano-tutorial/b6c4b1e443734a19eb734c314ca113be5da72f2b/theano_tutorial.pdf --------------------------------------------------------------------------------