├── .gitignore ├── 01_building_expressions ├── 01_scalar.py ├── 01_scalar_soln.py ├── 02_vector_mat.py ├── 02_vector_mat_soln.py ├── 03_tensor.py └── 03_tensor_soln.py ├── 02_compiling_and_running ├── 01_function.py ├── 01_function_soln.py ├── 02_shared.py ├── 02_shared_soln.py ├── 03_bug.py └── 03_bug_soln.txt ├── 03_modifying ├── 01_grad.py ├── 01_grad_soln.py ├── 02_traverse.py └── 02_traverse_soln.py ├── 04_debugging ├── 02_compute_test_value.py └── 02_compute_test_value_soln.py ├── 05_tripleop ├── 01_tripleop.py └── 01_tripleop_soln.py ├── 06_scalmulop ├── 01_scalmulop.py └── 01_scalmulop_soln.py ├── 07_scalmulgrad ├── 01_scalmulop.py └── 01_scalmulop_soln.py ├── 08_scalmulc ├── 01_scalmulc.py └── 01_scalmulc_soln.py ├── 09_opt ├── 01_opt.py └── 01_opt_soln.py ├── LICENSE ├── Makefile ├── README.md ├── advanced.pdf ├── advanced.tex ├── apply_node.png ├── c.py ├── cop.py ├── doublec.py ├── doublecop.c ├── doublecop.py ├── doubleop.py ├── opt.py ├── presentation.pdf ├── presentation.tex ├── python.py ├── scalmulop.py ├── test_doubleop.py ├── test_opt.py ├── thunk.py └── tripleop.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | -------------------------------------------------------------------------------- /01_building_expressions/01_scalar.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_scalar.py to see if your solution works! 3 | # 4 | # This exercice ask you to create Theano variable and do some 5 | # computation on them. 6 | import numpy as np 7 | from theano import function 8 | raise NotImplementedError("TODO: add any other imports you need") 9 | 10 | 11 | def make_scalar(): 12 | """ 13 | Returns a new Theano scalar. 14 | """ 15 | 16 | raise NotImplementedError("TODO: implement this function.") 17 | 18 | 19 | def log(x): 20 | """ 21 | Returns the logarithm of a Theano scalar x. 22 | """ 23 | 24 | raise NotImplementedError("TODO: implement this function.") 25 | 26 | 27 | def add(x, y): 28 | """ 29 | Adds two theano scalars together and returns the result. 30 | """ 31 | 32 | raise NotImplementedError("TODO: implement this function.") 33 | 34 | if __name__ == "__main__": 35 | a = make_scalar() 36 | b = make_scalar() 37 | c = log(b) 38 | d = add(a, c) 39 | f = function([a, b], d) 40 | a = np.cast[a.dtype](1.) 41 | b = np.cast[b.dtype](2.) 42 | actual = f(a, b) 43 | expected = 1. + np.log(2.) 44 | assert np.allclose(actual, expected) 45 | print "SUCCESS!" 46 | -------------------------------------------------------------------------------- /01_building_expressions/01_scalar_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_scalar(): 7 | """ 8 | Returns a new Theano scalar. 9 | """ 10 | 11 | return T.scalar() 12 | 13 | 14 | def log(x): 15 | """ 16 | Returns the logarithm of a Theano scalar x. 17 | """ 18 | 19 | return T.log(x) 20 | 21 | 22 | def add(x, y): 23 | """ 24 | Adds two theano scalars together and returns the result. 25 | """ 26 | 27 | return x + y 28 | 29 | if __name__ == "__main__": 30 | a = make_scalar() 31 | b = make_scalar() 32 | c = log(b) 33 | d = add(a, c) 34 | f = function([a, b], d) 35 | a = np.cast[a.dtype](1.) 36 | b = np.cast[b.dtype](2.) 37 | actual = f(a, b) 38 | expected = 1. + np.log(2.) 39 | assert np.allclose(actual, expected) 40 | print "SUCCESS!" 41 | -------------------------------------------------------------------------------- /01_building_expressions/02_vector_mat.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 02_vector_mat.py to see if your solution works! 3 | # 4 | # This exercices ask you to make Theano variable, elemwise 5 | # multiplication and matrix/vector dot product. 6 | import numpy as np 7 | from theano import function 8 | raise NotImplementedError("TODO: add any other imports you need") 9 | 10 | 11 | def make_vector(): 12 | """ 13 | Returns a new Theano vector. 14 | """ 15 | 16 | raise NotImplementedError("TODO: implement this function.") 17 | 18 | 19 | def make_matrix(): 20 | """ 21 | Returns a new Theano matrix. 22 | """ 23 | 24 | raise NotImplementedError("TODO: implement this function.") 25 | 26 | 27 | def elemwise_mul(a, b): 28 | """ 29 | a: A theano matrix 30 | b: A theano matrix 31 | Returns the elementwise product of a and b 32 | """ 33 | 34 | raise NotImplementedError("TODO: implement this function.") 35 | 36 | 37 | def matrix_vector_mul(a, b): 38 | """ 39 | a: A theano matrix 40 | b: A theano vector 41 | Returns the matrix-vector product of a and b 42 | """ 43 | 44 | raise NotImplementedError("TODO: implement this function.") 45 | 46 | if __name__ == "__main__": 47 | a = make_vector() 48 | b = make_vector() 49 | c = elemwise_mul(a, b) 50 | d = make_matrix() 51 | e = matrix_vector_mul(d, c) 52 | 53 | f = function([a, b, d], e) 54 | 55 | rng = np.random.RandomState([1, 2, 3]) 56 | a_value = rng.randn(5).astype(a.dtype) 57 | b_value = rng.rand(5).astype(b.dtype) 58 | c_value = a_value * b_value 59 | d_value = rng.randn(5, 5).astype(d.dtype) 60 | expected = np.dot(d_value, c_value) 61 | 62 | actual = f(a_value, b_value, d_value) 63 | 64 | assert np.allclose(actual, expected) 65 | print "SUCCESS!" 66 | -------------------------------------------------------------------------------- /01_building_expressions/02_vector_mat_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_vector(): 7 | """ 8 | Returns a new Theano vector. 9 | """ 10 | 11 | return T.vector() 12 | 13 | 14 | def make_matrix(): 15 | """ 16 | Returns a new Theano matrix. 17 | """ 18 | 19 | return T.matrix() 20 | 21 | 22 | def elemwise_mul(a, b): 23 | """ 24 | a: A theano matrix 25 | b: A theano matrix 26 | Returns the elementwise product of a and b 27 | """ 28 | 29 | return a * b 30 | 31 | 32 | def matrix_vector_mul(a, b): 33 | """ 34 | a: A theano matrix 35 | b: A theano vector 36 | Returns the matrix-vector product of a and b 37 | """ 38 | 39 | return T.dot(a, b) 40 | 41 | if __name__ == "__main__": 42 | a = make_vector() 43 | b = make_vector() 44 | c = elemwise_mul(a, b) 45 | d = make_matrix() 46 | e = matrix_vector_mul(d, c) 47 | 48 | f = function([a, b, d], e) 49 | 50 | rng = np.random.RandomState([1, 2, 3]) 51 | a_value = rng.randn(5).astype(a.dtype) 52 | b_value = rng.rand(5).astype(b.dtype) 53 | c_value = a_value * b_value 54 | d_value = rng.randn(5, 5).astype(d.dtype) 55 | expected = np.dot(d_value, c_value) 56 | 57 | actual = f(a_value, b_value, d_value) 58 | 59 | assert np.allclose(actual, expected) 60 | print "SUCCESS!" 61 | -------------------------------------------------------------------------------- /01_building_expressions/03_tensor.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 03_tensor.py to see if your solution works! 3 | # 4 | # This exercices ask you to create Theano tensor variable, do 5 | # broadcastable addition and to compute the max over part of a tensor. 6 | import numpy as np 7 | from theano import function 8 | raise NotImplementedError("TODO: add any other imports you need") 9 | 10 | 11 | def make_tensor(dim): 12 | """ 13 | Returns a new Theano tensor with no broadcastable dimensions. 14 | dim: the total number of dimensions of the tensor. 15 | (You can use any dtype you like) 16 | """ 17 | 18 | raise NotImplementedError("TODO: implement this function.") 19 | 20 | 21 | def broadcasted_add(a, b): 22 | """ 23 | a: a 3D theano tensor 24 | b: a 4D theano tensor 25 | Returns c, a 4D theano tensor, where 26 | 27 | c[i, j, k, l] = a[l, k, i] + b[i, j, k, l] 28 | 29 | for all i, j, k, l 30 | """ 31 | 32 | raise NotImplementedError("TODO: implement this function.") 33 | 34 | 35 | def partial_max(a): 36 | """ 37 | a: a 4D theano tensor 38 | 39 | Returns b, a theano matrix, where 40 | 41 | b[i, j] = max_{k,l} a[i, k, l, j] 42 | 43 | for all i, j 44 | """ 45 | 46 | raise NotImplementedError("TODO: implement this function.") 47 | 48 | if __name__ == "__main__": 49 | a = make_tensor(3) 50 | b = make_tensor(4) 51 | c = broadcasted_add(a, b) 52 | d = partial_max(c) 53 | 54 | f = function([a, b], d) 55 | 56 | rng = np.random.RandomState([1, 2, 3]) 57 | a_value = rng.randn(2, 2, 2).astype(a.dtype) 58 | b_value = rng.rand(2, 2, 2, 2).astype(b.dtype) 59 | c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value 60 | expected = c_value.max(axis=1).max(axis=1) 61 | 62 | actual = f(a_value, b_value) 63 | 64 | assert np.allclose(actual, expected), (actual, expected) 65 | print "SUCCESS!" 66 | -------------------------------------------------------------------------------- /01_building_expressions/03_tensor_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_tensor(dim): 7 | """ 8 | Returns a new Theano tensor with no broadcastable dimensions. 9 | dim: the total number of dimensions of the tensor. 10 | """ 11 | 12 | return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')() 13 | 14 | 15 | def broadcasted_add(a, b): 16 | """ 17 | a: a 3D theano tensor 18 | b: a 4D theano tensor 19 | Returns c, a 4D theano tensor, where 20 | 21 | c[i, j, k, l] = a[l, k, i] + b[i, j, k, l] 22 | 23 | for all i, j, k, l 24 | """ 25 | 26 | return a.dimshuffle(2, 'x', 1, 0) + b 27 | 28 | 29 | def partial_max(a): 30 | """ 31 | a: a 4D theano tensor 32 | 33 | Returns b, a theano matrix, where 34 | 35 | b[i, j] = max_{k,l} a[i, k, l, j] 36 | 37 | for all i, j 38 | """ 39 | 40 | return a.max(axis=(1, 2)) 41 | 42 | if __name__ == "__main__": 43 | a = make_tensor(3) 44 | b = make_tensor(4) 45 | c = broadcasted_add(a, b) 46 | d = partial_max(c) 47 | 48 | f = function([a, b], d) 49 | 50 | rng = np.random.RandomState([1, 2, 3]) 51 | a_value = rng.randn(2, 2, 2).astype(a.dtype) 52 | b_value = rng.rand(2, 2, 2, 2).astype(b.dtype) 53 | c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value 54 | expected = c_value.max(axis=1).max(axis=1) 55 | 56 | actual = f(a_value, b_value) 57 | 58 | assert np.allclose(actual, expected), (actual, expected) 59 | print "SUCCESS!" 60 | -------------------------------------------------------------------------------- /02_compiling_and_running/01_function.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_function.py to see if your solution works! 3 | # 4 | # This exercice ask you to compile a Theano functiont and call it to 5 | # execute "x + y". 6 | from theano import tensor as T 7 | raise NotImplementedError("TODO: add any other imports you need") 8 | 9 | 10 | def evaluate(x, y, expr, x_value, y_value): 11 | """ 12 | x: A theano variable 13 | y: A theano variable 14 | expr: A theano expression involving x and y 15 | x_value: A numpy value 16 | y_value: A numpy value 17 | 18 | Returns the value of expr when x_value is substituted for x 19 | and y_value is substituted for y 20 | """ 21 | 22 | raise NotImplementedError("TODO: implement this function.") 23 | 24 | 25 | if __name__ == "__main__": 26 | x = T.iscalar() 27 | y = T.iscalar() 28 | z = x + y 29 | assert evaluate(x, y, z, 1, 2) == 3 30 | print "SUCCESS!" 31 | -------------------------------------------------------------------------------- /02_compiling_and_running/01_function_soln.py: -------------------------------------------------------------------------------- 1 | from theano import tensor as T 2 | from theano import function 3 | 4 | 5 | def evaluate(x, y, expr, x_value, y_value): 6 | """ 7 | x: A theano variable 8 | y: A theano variable 9 | expr: A theano expression involving x and y 10 | x_value: A numpy value 11 | y_value: A numpy value 12 | 13 | Returns the value of expr when x_value is substituted for x 14 | and y_value is substituted for y 15 | """ 16 | 17 | return function([x, y], expr)(x_value, y_value) 18 | 19 | 20 | if __name__ == "__main__": 21 | x = T.iscalar() 22 | y = T.iscalar() 23 | z = x + y 24 | assert evaluate(x, y, z, 1, 2) == 3 25 | print "SUCCESS!" 26 | -------------------------------------------------------------------------------- /02_compiling_and_running/02_shared.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_function.py to see if your solution works! 3 | # 4 | # This exercice make you use shared variable. You must create them and 5 | # update them by swapping 2 shared variables values. 6 | import numpy as np 7 | raise NotImplementedError("TODO: add any other imports you need") 8 | 9 | 10 | def make_shared(shape): 11 | """ 12 | Returns a theano shared variable containing a tensor of the specified 13 | shape. 14 | You can use any value you want. 15 | """ 16 | raise NotImplementedError("TODO: implement the function") 17 | 18 | 19 | def exchange_shared(a, b): 20 | """ 21 | a: a theano shared variable 22 | b: a theano shared variable 23 | Uses get_value and set_value to swap the values stored in a and b 24 | """ 25 | raise NotImplementedError("TODO: implement the function") 26 | 27 | 28 | def make_exchange_func(a, b): 29 | """ 30 | a: a theano shared variable 31 | b: a theano shared variable 32 | Returns f 33 | where f is a theano function, that, when called, swaps the 34 | values in a and b 35 | f should not return anything 36 | """ 37 | raise NotImplementedError("TODO: implement the function") 38 | 39 | 40 | if __name__ == "__main__": 41 | a = make_shared((5, 4, 3)) 42 | assert a.get_value().shape == (5, 4, 3) 43 | b = make_shared((5, 4, 3)) 44 | assert a.get_value().shape == (5, 4, 3) 45 | a.set_value(np.zeros((5, 4, 3), dtype=a.dtype)) 46 | b.set_value(np.ones((5, 4, 3), dtype=b.dtype)) 47 | exchange_shared(a, b) 48 | assert np.all(a.get_value() == 1.) 49 | assert np.all(b.get_value() == 0.) 50 | f = make_exchange_func(a, b) 51 | rval = f() 52 | assert isinstance(rval, list) 53 | assert len(rval) == 0 54 | assert np.all(a.get_value() == 0.) 55 | assert np.all(b.get_value() == 1.) 56 | 57 | print "SUCCESS!" 58 | -------------------------------------------------------------------------------- /02_compiling_and_running/02_shared_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano.compat.python2x import OrderedDict 3 | from theano import function 4 | from theano import shared 5 | 6 | 7 | def make_shared(shape): 8 | """ 9 | Returns a theano shared variable containing a tensor of the specified 10 | shape. 11 | You can use any value you want. 12 | """ 13 | return shared(np.zeros(shape)) 14 | 15 | 16 | def exchange_shared(a, b): 17 | """ 18 | a: a theano shared variable 19 | b: a theano shared variable 20 | Uses get_value and set_value to swap the values stored in a and b 21 | """ 22 | temp = a.get_value() 23 | a.set_value(b.get_value()) 24 | b.set_value(temp) 25 | 26 | 27 | def make_exchange_func(a, b): 28 | """ 29 | a: a theano shared variable 30 | b: a theano shared variable 31 | Returns f 32 | where f is a theano function, that, when called, swaps the 33 | values in a and b 34 | f should not return anything 35 | """ 36 | 37 | updates = OrderedDict() 38 | updates[a] = b 39 | updates[b] = a 40 | f = function([], updates=updates) 41 | return f 42 | 43 | 44 | if __name__ == "__main__": 45 | a = make_shared((5, 4, 3)) 46 | assert a.get_value().shape == (5, 4, 3) 47 | b = make_shared((5, 4, 3)) 48 | assert a.get_value().shape == (5, 4, 3) 49 | a.set_value(np.zeros((5, 4, 3), dtype=a.dtype)) 50 | b.set_value(np.ones((5, 4, 3), dtype=b.dtype)) 51 | exchange_shared(a, b) 52 | assert np.all(a.get_value() == 1.) 53 | assert np.all(b.get_value() == 0.) 54 | f = make_exchange_func(a, b) 55 | rval = f() 56 | assert isinstance(rval, list) 57 | assert len(rval) == 0 58 | assert np.all(a.get_value() == 0.) 59 | assert np.all(b.get_value() == 1.) 60 | 61 | print "SUCCESS!" 62 | -------------------------------------------------------------------------------- /02_compiling_and_running/03_bug.py: -------------------------------------------------------------------------------- 1 | # Something weird happens when you run this code. 2 | # Find something that is not quite right. 3 | # Figure out which compilation modes make the problem more obvious. 4 | # Explain why what is happening. 5 | import numpy as np 6 | from theano import function 7 | from theano import tensor as T 8 | x = T.vector() 9 | y = T.vector() 10 | z = T.zeros_like(y) 11 | a = x + z 12 | f = function([x, y], a) 13 | output = f(np.zeros((1,), dtype=x.dtype), np.zeros((2,), dtype=y.dtype)) 14 | -------------------------------------------------------------------------------- /02_compiling_and_running/03_bug_soln.txt: -------------------------------------------------------------------------------- 1 | The weird thing is that if you think about how the function call is 2 | implemented, the two arguments have different shapes, and so should 3 | the resulting values of x and z. The line adding x and z should therefore 4 | result in a ValueError. However, when run in the default mode it does not. 5 | The reason is that the optimizations realize that z is always 0, so adding 6 | z to x has no effect. The optimizations thus remove the addition of z. 7 | However, this causes the function to fail to raise an error for bad values 8 | of x and y. To use fewer optimizations and see the bug, you can use 9 | THEANO_FLAGS="mode=FAST_COMPILE". DEBUG_MODE will also catch the bug. 10 | -------------------------------------------------------------------------------- /03_modifying/01_grad.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_grad.py to see if your solution works! 3 | # 4 | # This exercice ask you to use Theano automatic gradient system to 5 | # compute some derivative. 6 | from theano import tensor as T 7 | 8 | 9 | def grad_sum(x, y, z): 10 | """ 11 | x: A theano variable 12 | y: A theano variable 13 | z: A theano expression involving x and y 14 | 15 | Returns dz / dx + dz / dy 16 | """ 17 | 18 | raise NotImplementedError("TODO: implement this function.") 19 | 20 | 21 | if __name__ == "__main__": 22 | x = T.scalar() 23 | y = T.scalar() 24 | z = x + y 25 | s = grad_sum(x, y, z) 26 | assert s.eval({x: 0, y: 0}) == 2 27 | print "SUCCESS!" 28 | -------------------------------------------------------------------------------- /03_modifying/01_grad_soln.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_grad.py to see if your solution works! 3 | # 4 | from theano import tensor as T 5 | 6 | 7 | def grad_sum(x, y, z): 8 | """ 9 | x: A theano variable 10 | y: A theano variable 11 | z: A theano expression involving x and y 12 | 13 | Returns dz / dx + dz / dy 14 | """ 15 | 16 | return sum(T.grad(z, [x, y])) 17 | 18 | if __name__ == "__main__": 19 | x = T.scalar() 20 | y = T.scalar() 21 | z = x + y 22 | s = grad_sum(x, y, z) 23 | assert s.eval({x: 0, y: 0}) == 2 24 | print "SUCCESS!" 25 | -------------------------------------------------------------------------------- /03_modifying/02_traverse.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs and run python 02_traverse.py to see if your solution 2 | # works! 3 | # 4 | # This exercice is here to show you how to navigate a little in the 5 | # Theano graph. You will need to find the inputs used that produce 6 | # some computation. 7 | import numpy as np 8 | from theano import tensor as T 9 | raise NotImplementedError("Add any imports you need.") 10 | 11 | 12 | def arg_to_softmax(prob): 13 | """ 14 | Oh no! Someone has passed you the probability output, 15 | "prob", of a softmax function, and you want the unnormalized 16 | log probability--the argument to the softmax. 17 | 18 | Verify that prob really is the output of a softmax. Raise a 19 | TypeError if it is not. 20 | 21 | If it is, return the argument to the softmax. 22 | """ 23 | 24 | raise NotImplementedError("Implement this function.") 25 | 26 | if __name__ == "__main__": 27 | x = np.ones((5, 4)) 28 | try: 29 | arg_to_softmax(x) 30 | raise Exception("You should have raised an error.") 31 | except TypeError: 32 | pass 33 | 34 | x = T.matrix() 35 | try: 36 | arg_to_softmax(x) 37 | raise Exception("You should have raised an error.") 38 | except TypeError: 39 | pass 40 | 41 | y = T.nnet.sigmoid(x) 42 | try: 43 | arg_to_softmax(y) 44 | raise Exception("You should have raised an error.") 45 | except TypeError: 46 | pass 47 | 48 | y = T.nnet.softmax(x) 49 | rval = arg_to_softmax(y) 50 | assert rval is x 51 | 52 | print "SUCCESS!" 53 | -------------------------------------------------------------------------------- /03_modifying/02_traverse_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano.gof import Variable 3 | from theano import tensor as T 4 | 5 | 6 | def arg_to_softmax(prob): 7 | """ 8 | Oh no! Someone has passed you the probability output, 9 | "prob", of a softmax function, and you want the unnormalized 10 | log probability--the argument to the softmax. 11 | 12 | Verify that prob really is the output of a softmax. Raise a 13 | TypeError if it is not. 14 | 15 | If it is, return the argument to the softmax. 16 | """ 17 | 18 | if not isinstance(prob, Variable): 19 | raise TypeError() 20 | 21 | if prob.owner is None: 22 | raise TypeError() 23 | 24 | owner = prob.owner 25 | 26 | if not isinstance(owner.op, T.nnet.Softmax): 27 | raise TypeError() 28 | 29 | rval, = owner.inputs 30 | 31 | return rval 32 | 33 | if __name__ == "__main__": 34 | x = np.ones((5, 4)) 35 | try: 36 | arg_to_softmax(x) 37 | raise Exception("You should have raised an error.") 38 | except TypeError: 39 | pass 40 | 41 | x = T.matrix() 42 | try: 43 | arg_to_softmax(x) 44 | raise Exception("You should have raised an error.") 45 | except TypeError: 46 | pass 47 | 48 | y = T.nnet.sigmoid(x) 49 | try: 50 | arg_to_softmax(y) 51 | raise Exception("You should have raised an error.") 52 | except TypeError: 53 | pass 54 | 55 | y = T.nnet.softmax(x) 56 | rval = arg_to_softmax(y) 57 | assert rval is x 58 | 59 | print "SUCCESS!" 60 | -------------------------------------------------------------------------------- /04_debugging/02_compute_test_value.py: -------------------------------------------------------------------------------- 1 | # Run 2 | # python 01_compute_test_value.py 3 | # It should raise an exception when it tries to execute the call to fn. 4 | # The exception doesn't make it easy to tell which line of the python script 5 | # first created an invalid expression though. 6 | # Modify the script to use compute_test_value to find the first bad line. 7 | # 8 | # This show you another way then using Theano flags to find the line 9 | # in your code that build a bad graph. 10 | import numpy as np 11 | from theano import function 12 | from theano import tensor as T 13 | a = T.vector() 14 | b = T.log(a) 15 | c = T.nnet.sigmoid(b) 16 | d = T.sqrt(c) 17 | e = T.concatenate((d, c), axis=0) 18 | f = b * c * d 19 | g = e + f 20 | h = g / c 21 | fn = function([a], h) 22 | fn(np.ones((3,)).astype(a.dtype)) 23 | -------------------------------------------------------------------------------- /04_debugging/02_compute_test_value_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | from theano import tensor as T 4 | from theano import config 5 | config.compute_test_value = 'raise' 6 | a = T.vector() 7 | a.tag.test_value = np.ones((3,)).astype(a.dtype) 8 | b = T.log(a) 9 | c = T.nnet.sigmoid(b) 10 | d = T.sqrt(c) 11 | e = T.concatenate((d, c), axis=0) 12 | f = b * c * d 13 | # This is the first bad line 14 | g = e + f 15 | h = g / c 16 | fn = function([a], h) 17 | fn(np.ones((3,)).astype(a.dtype)) 18 | -------------------------------------------------------------------------------- /05_tripleop/01_tripleop.py: -------------------------------------------------------------------------------- 1 | # Modify this file to get a new op TripleOp that multiplies the 2 | # elements of the array by 3 instead of 2. 3 | from theano import Op, Apply 4 | from theano.tensor import as_tensor_variable 5 | 6 | class DoubleOp(Op): 7 | __props__ = () 8 | 9 | def make_node(self, x): 10 | x = as_tensor_variable(x) 11 | return Apply(self, [x], [x.type()]) 12 | 13 | def perform(self, node, inputs, output_storage): 14 | x = inputs[0] 15 | z = output_storage[0] 16 | z[0] = x * 2 17 | 18 | def infer_shape(self, node, input_shapes): 19 | return input_shapes 20 | 21 | def grad(self, inputs, output_grads): 22 | return [output_grads[0] * 2] 23 | -------------------------------------------------------------------------------- /05_tripleop/01_tripleop_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class TripleOp(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * 3 15 | 16 | def infer_shape(self, node, i0_shapes): 17 | return i0_shapes 18 | 19 | def grad(self, inputs, output_grads): 20 | return [output_grads[0] * 3] 21 | -------------------------------------------------------------------------------- /06_scalmulop/01_scalmulop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleOp(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * 2 15 | -------------------------------------------------------------------------------- /06_scalmulop/01_scalmulop_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | from theano.scalar import as_scalar_variable 4 | 5 | class ScalMulV1(Op): 6 | __props__ = ('scal',) 7 | 8 | def __init__(self, scal): 9 | if not isinstance(scal, int): 10 | raise TypeError('expected an int') 11 | self.scal = scal 12 | 13 | def make_node(self, x): 14 | x = as_tensor_variable(x) 15 | return Apply(self, [x], [x.type()]) 16 | 17 | def perform(self, node, inputs, output_storage): 18 | x = inputs[0] 19 | z = output_storage[0] 20 | z[0] = x * self.scal 21 | 22 | 23 | class ScalMulV2(Op): 24 | __props__ = () 25 | 26 | def make_node(self, x, scal): 27 | x = as_tensor_variable(x) 28 | scal = as_scalar_variable(scal) 29 | return Apply(self, [x, scal], [x.type()]) 30 | 31 | def perform(self, node, inputs, output_storage): 32 | x = inputs[0] 33 | scal = inputs[1] 34 | z = output_storage[0] 35 | z[0] = x * scal 36 | -------------------------------------------------------------------------------- /07_scalmulgrad/01_scalmulop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | from theano.scalar import as_scalar_variable 4 | 5 | class ScalMul(Op): 6 | __props__ = ('scal',) 7 | 8 | def __init__(self, scal): 9 | if not isinstance(scal, int): 10 | raise TypeError('expected an int') 11 | self.scal = scal 12 | 13 | def make_node(self, x): 14 | x = as_tensor_variable(x) 15 | return Apply(self, [x], [x.type()]) 16 | 17 | def perform(self, node, inputs, output_storage): 18 | x = inputs[0] 19 | z = output_storage[0] 20 | z[0] = x * self.scal 21 | -------------------------------------------------------------------------------- /07_scalmulgrad/01_scalmulop_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | from theano.scalar import as_scalar_variable 4 | 5 | class ScalMul(Op): 6 | __props__ = ('scal',) 7 | 8 | def __init__(self, scal): 9 | if not isinstance(scal, int): 10 | raise TypeError('expected an int') 11 | self.scal = scal 12 | 13 | def make_node(self, x): 14 | x = as_tensor_variable(x) 15 | return Apply(self, [x], [x.type()]) 16 | 17 | def perform(self, node, inputs, output_storage): 18 | x = inputs[0] 19 | z = output_storage[0] 20 | z[0] = x * self.scal 21 | 22 | def infer_shape(self, node, input_shapes): 23 | return input_shapes 24 | 25 | def grad(self, inputs, output_grads): 26 | return [output_grads[0] * self.scal] 27 | -------------------------------------------------------------------------------- /08_scalmulc/01_scalmulc.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleC(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | if x.ndim != 1: 10 | raise TypeError("DoubleC only works on 1D") 11 | return Apply(self, [x], [x.type()]) 12 | 13 | def c_code(self, node, name, input_names, 14 | output_names, sub): 15 | return """ 16 | Py_XDECREF(%(out)s); 17 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray( 18 | %(inp)s, NPY_ANYORDER, NULL, 0); 19 | if (%(out)s == NULL) { 20 | %(fail)s 21 | } 22 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) { 23 | *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) = 24 | (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * 2; 25 | } 26 | """ % dict(inp=input_names[0], out=output_names[0], 27 | fail=sub["fail"]) 28 | 29 | def infer_shape(self, node, input_shapes): 30 | return input_shapes 31 | 32 | def grad(self, inputs, output_grads): 33 | return [output_grads[0] * 2] 34 | -------------------------------------------------------------------------------- /08_scalmulc/01_scalmulc_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class ScalMulC(Op): 5 | __props__ = ('scal',) 6 | 7 | def __init__(self, scal): 8 | if not isinstance(scal, int): 9 | raise TypeError('expected an int') 10 | self.scal = scal 11 | 12 | def make_node(self, x): 13 | x = as_tensor_variable(x) 14 | if x.ndim != 1: 15 | raise TypeError("ScalMulC only works on 1D") 16 | return Apply(self, [x], [x.type()]) 17 | 18 | def c_code(self, node, name, input_names, 19 | output_names, sub): 20 | return """ 21 | Py_XDECREF(%(out)s); 22 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray( 23 | %(inp)s, NPY_ANYORDER, NULL, 0); 24 | if (%(out)s == NULL) { 25 | %(fail)s 26 | } 27 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) { 28 | *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) = 29 | (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * %(scal)d; 30 | } 31 | """ % % dict(inp=input_names[0], out=output_names[0], 32 | fail=sub["fail"], scal=self.scal) 33 | 34 | def infer_shape(self, node, input_shapes): 35 | return input_shapes 36 | 37 | def grad(self, inputs, output_grads): 38 | return [output_grads[0] * self.scal] 39 | -------------------------------------------------------------------------------- /09_opt/01_opt.py: -------------------------------------------------------------------------------- 1 | from scalmulop import ScalMulV1 2 | from doubleop import DoubleOp 3 | 4 | from theano.gof import local_optimizer 5 | 6 | from theano.tensor.opt import register_specialize 7 | 8 | @register_specialize 9 | @local_optimizer([ScalMulV1]) 10 | def local_scalmul_double_v1(node): 11 | if not (isinstance(node.op, ScalMulV1) 12 | and node.op.scal == 2): 13 | return False 14 | 15 | return [DoubleOp()(node.inputs[0])] 16 | 17 | from theano.gof.opt import OpSub 18 | 19 | local_scalmul_double_v2 = OpSub(ScalMulV1(2), DoubleOp()) 20 | 21 | register_specialize(local_scalmul_double_v2, 22 | name='local_scalmul_double_v2') 23 | -------------------------------------------------------------------------------- /09_opt/01_opt_soln.py: -------------------------------------------------------------------------------- 1 | from doubleop import DoubleOp 2 | from doublec import DoubleC 3 | 4 | from theano.gof import local_optimizer 5 | 6 | from theano.tensor.opt import register_specialize 7 | 8 | @register_specialize 9 | @local_optimizer([DoubleOp]) 10 | def local_scalmul_double_v1(node): 11 | if not (isinstance(node.op, DoubleOp) 12 | and node.inputs[0].ndim == 1): 13 | return False 14 | 15 | return [DoubleC()(node.inputs[0])] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Ian Goodfellow 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: presentation.pdf 2 | 3 | clean: 4 | rm -f *~ *.toc *.vrb *.out *.nav *.snm *.aux *.log 5 | 6 | presentation.pdf: presentation.tex 7 | pdflatex presentation 8 | pdflatex presentation 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ccw_tutorial_theano 2 | =================== 3 | 4 | Common Code Workflow tutorial on Theano 5 | -------------------------------------------------------------------------------- /advanced.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lisa-lab/tutorial_theano/b22999e3d4fd609a98d901fea8e9839e77fbcdbe/advanced.pdf -------------------------------------------------------------------------------- /advanced.tex: -------------------------------------------------------------------------------- 1 | \documentclass[utf8x]{beamer} 2 | 3 | % \usepackage{beamerthemesplit} // Activate for custom appearance 4 | \usepackage[utf8x]{inputenc} 5 | \usepackage[OT1]{fontenc} 6 | \usepackage{graphicx} 7 | \usepackage{listings} 8 | \usepackage{hyperref} 9 | \usepackage{xcolor} 10 | 11 | \usetheme{Malmoe} 12 | \usecolortheme{beaver} 13 | 14 | \lstloadlanguages{Python,C,sh} 15 | 16 | \definecolor{darkgreen}{RGB}{0,93,21} 17 | \definecolor{greenblue}{RGB}{40,110,126} 18 | \definecolor{lightgray}{RGB}{246,246,246} 19 | \definecolor{bordergray}{RGB}{193,193,193} 20 | \definecolor{lightblue}{RGB}{0,114,168} 21 | \definecolor{methblue}{RGB}{0,31,108} 22 | 23 | 24 | \title{Extending Theano} 25 | \author{Arnaud Bergeron} 26 | \date{\today} 27 | 28 | \lstset{ 29 | language=Python, 30 | basicstyle=\fontfamily{pcr}\selectfont\footnotesize, 31 | keywordstyle=\color{darkgreen}\bfseries, 32 | commentstyle=\color{greenblue}\itshape, 33 | stringstyle=\color{violet}, 34 | showstringspaces=false, 35 | tabsize=4, 36 | backgroundcolor=\color{lightgray}, 37 | frame=single, 38 | %showlines=true, 39 | %emph={theano,MyOp,DoubleOp}, emphstyle=\color{lightblue}\bfseries, 40 | emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}}, 41 | emph={[3]self},emphstyle={[3]\color{darkgreen}}, 42 | moredelim=**[is][{\color{red}}]{`}{`} 43 | } 44 | 45 | \newcommand{\code}[1]{\lstinline[emph={[2]}]|#1|} 46 | 47 | \begin{document} 48 | 49 | \frame[plain]{\titlepage} 50 | 51 | \section*{} 52 | 53 | \begin{frame}{Outline} 54 | \begin{enumerate} 55 | \item How to Make an Op (Python) (45 min) 56 | \item How to Make an Op (C) (30 min) 57 | \item How to Make a Complex Op (10 min) 58 | \item Optimizations (20 min) 59 | \end{enumerate} 60 | \end{frame} 61 | 62 | \section{How to Make an Op (Python)} 63 | 64 | \begin{frame}[plain]{} 65 | \begin{center} 66 | \Huge How to Make an Op (Python) 67 | \end{center} 68 | \end{frame} 69 | 70 | \begin{frame}[fragile]{Overview} 71 | \lstinputlisting[lastline=14]{python.py} 72 | \end{frame} 73 | 74 | \begin{frame}{\code{__init__}} 75 | \lstinputlisting[firstline=6,lastline=8]{python.py} 76 | \begin{itemize} 77 | \item Optional, a lot of Ops don't have one 78 | \item Serves to set up Op-level parameters 79 | \item Should also perform validation on those parameters 80 | \end{itemize} 81 | \end{frame} 82 | 83 | \begin{frame}{\code{__props__}} 84 | \lstinputlisting[firstline=4,lastline=5]{python.py} 85 | \begin{itemize} 86 | \item Optional (although very useful) 87 | \item Generates \code{__hash__}, \code{__eq__} and \code{__str__} methods if present 88 | \item Empty tuple signifies no properties that should take part in comparison 89 | \item If you have only one property, make sure you add a final comma: \code{('property',)} 90 | \end{itemize} 91 | \end{frame} 92 | 93 | \begin{frame}{\code{make_node}} 94 | \lstinputlisting[firstline=9,lastline=11]{python.py} 95 | \begin{itemize} 96 | \item This creates the node object that represents our computation in the graph 97 | \item The parameters are usually Theano variables, but can be python objects too 98 | \item The return value must be an \code{Apply} instance 99 | \end{itemize} 100 | \end{frame} 101 | 102 | \begin{frame}{What Is an Apply Node?} 103 | \begin{center} 104 | \includegraphics[width=\textwidth]{apply_node} 105 | \end{center} 106 | \end{frame} 107 | 108 | \begin{frame}{\code{perform}} 109 | \lstinputlisting[firstline=12,lastline=14]{python.py} 110 | \begin{itemize} 111 | \item This performs the computation on a set of values (hence the method name) 112 | \item The parameters are all python objects (not symbolic values) 113 | \item This method must not return its result, but rather store it in the 1-element lists (or cells) provided in \code{outputs_storage} 114 | \item The output storage may contain a pre-existing value from a previous run that may be reused for storage. 115 | \end{itemize} 116 | \end{frame} 117 | 118 | \begin{frame}{DoubleOp} 119 | \lstinputlisting[lastline=15]{doubleop.py} 120 | \end{frame} 121 | 122 | \begin{frame}{Op Instances and Nodes} 123 | When you call an op class you get an instance of that Op: 124 | \vskip4mm 125 | \hskip3em\code{double_op = DoubleOp()} 126 | \vskip4mm 127 | But when you want to use that op as a node in a graph you need to call the \textit{instance}: 128 | \vskip4mm 129 | \hskip3em\code{node = double_op(x)} 130 | \vskip4mm 131 | You can do both steps at once with a double call like this: 132 | \vskip4mm 133 | \hskip3em\code{node = DoubleOp()(x)} 134 | \end{frame} 135 | 136 | \begin{frame}{Basic Tests} 137 | \lstinputlisting[linerange={1-5,8-18}]{test_doubleop.py} 138 | \end{frame} 139 | 140 | \begin{frame}[fragile]{Run Tests} 141 | The simplest way to run your tests is to use \texttt{nosetests} directly on your test file like this: 142 | 143 | \begin{lstlisting}[language={},backgroundcolor=\color{white},frame={}] 144 | $ nosetests test_doubleop.py 145 | . 146 | ------------------------------------------------------ 147 | Ran 1 test in 0.427s 148 | 149 | OK 150 | \end{lstlisting} 151 | 152 | You can also use \texttt{theano-nose} which is a wrapper around \texttt{nosetests} with some extra options. 153 | 154 | \end{frame} 155 | 156 | \begin{frame}{Exercise: TripleOp} 157 | What would need to be changed in the code below (DoubleOp) to make this Op triple the input instead of double? 158 | \lstinputlisting[lastline=15]{doubleop.py} 159 | \end{frame} 160 | 161 | \begin{frame}{Solution: TripleOp} 162 | You change the class name and the constant \code{2} for a constant \code{3}. \\ 163 | \ 164 | \lstinputlisting[lastline=15]{tripleop.py} 165 | \end{frame} 166 | 167 | \begin{frame}{Exercise: ScalMulOp} 168 | \begin{center} 169 | Work though the "06\_scalmulop" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. 170 | \end{center} 171 | \begin{itemize} 172 | \item Take the \code{DoubleOp} code and make it work with an arbitrary scalar 173 | \item There are more than one solution possible, both have advantages and disadvantages 174 | \end{itemize} 175 | \end{frame} 176 | 177 | \begin{frame}{\code{infer_shape}} 178 | \lstinputlisting[firstline=15,lastline=17]{python.py} 179 | \begin{itemize} 180 | \item This functions is optional, although highly recommended 181 | \item It takes as input the symbolic shapes of the input variables 182 | \item \code{input_shapes} is of the form \code{[[i0_shp0, i0_shp1, ...], ...]} 183 | \item It must return a list with the symbolic shape of the output variables 184 | \end{itemize} 185 | \end{frame} 186 | 187 | \begin{frame}{Example} 188 | \lstinputlisting[firstline=16,lastline=18]{doubleop.py} 189 | \begin{itemize} 190 | \item Here the code is really simple since we don't change the shape in any way in our Op 191 | \item \code{input_shapes} would be an expression equivalent to \code{[x.shape]} 192 | \end{itemize} 193 | \end{frame} 194 | 195 | \begin{frame}{Tests} 196 | \lstinputlisting[linerange={5-5,20-34}]{test_doubleop.py} 197 | \end{frame} 198 | 199 | \begin{frame}{Gradient} 200 | \lstinputlisting[firstline=18,lastline=20]{python.py} 201 | \begin{itemize} 202 | \item This function is required for graphs including your op to work with \code{theano.grad()} 203 | \item Each item you return represents the gradient with respect to that input computed based on the gradient with respect to the outputs (which you get in \code{output_grads}). 204 | \item It must return a list of symbolic graphs for each of your inputs 205 | \item Inputs that have no valid gradient should have a special \code{DisconnectedType} value 206 | \end{itemize} 207 | \end{frame} 208 | 209 | \begin{frame}{Example} 210 | \lstinputlisting[firstline=19,lastline=21]{doubleop.py} 211 | \begin{itemize} 212 | \item Here since the operation is simple the gradient is simple 213 | \item Note that we return a list 214 | \end{itemize} 215 | \end{frame} 216 | 217 | \begin{frame}{Tests} 218 | To test the gradient we use \code{verify_grad} 219 | \lstinputlisting[linerange={5-5,36-44}]{test_doubleop.py} 220 | It will compute the gradient numerically and symbolically (using our \code{grad()} method) and compare the two. 221 | \end{frame} 222 | 223 | \begin{frame}{Exercice: Add Special Methods to ScalMulOp} 224 | Work through the "07\_scalmulgrad" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git} 225 | \begin{itemize} 226 | \item Take the ScalMulOp class you made and add the \code{infer_shape} and \code{grad} methods to it. 227 | \item Don't forget to make tests for your new class to make sure everything works correctly. 228 | \end{itemize} 229 | \end{frame} 230 | 231 | \section{How to Make an Op (C)} 232 | 233 | \begin{frame}[plain]{} 234 | \begin{center} 235 | \Huge How to Make an Op (C) 236 | \end{center} 237 | \end{frame} 238 | 239 | \begin{frame}{Overview} 240 | \lstinputlisting{c.py} 241 | \end{frame} 242 | 243 | \begin{frame}{\code{c_code}} 244 | \lstinputlisting[linerange={9-11}]{c.py} 245 | \begin{itemize} 246 | \item This method returns a python string containing C code 247 | \item \code{input_names} contains the variable names where the inputs are 248 | \item \code{output_names} contains the variable names where to place the outputs 249 | \item \code{sub} contains some code snippets to insert into our code (mostly to indicate failure) 250 | \item The variables in \code{output_names} may contain a reference to a pre-existing value from a previous run that may be reused for storage. 251 | \end{itemize} 252 | \end{frame} 253 | 254 | \begin{frame}{Support Code} 255 | \lstinputlisting[linerange={13-14}]{c.py} 256 | \begin{itemize} 257 | \item This method return a python string containing C code 258 | \item The code may be shared with multiple instances of the op 259 | \item It can contain things like helper functions 260 | \end{itemize} 261 | There are a number of similar methods to insert code at various points 262 | \end{frame} 263 | 264 | \begin{frame}{Headers, Libraries, Compilers} 265 | Some of the methods available to customize the compilation environment: 266 | \begin{description} 267 | \item[\texttt{c\_libraries}] Return a list of shared libraries the op needs 268 | \item[\texttt{c\_headers}] Return a list of included headers the op needs 269 | \item[\texttt{c\_compiler}] C compiler to use (if not the default) 270 | \end{description} 271 | Again others are available. Refer to the documentation for a complete list. 272 | \end{frame} 273 | 274 | \begin{frame}{Python C-API} 275 | \begin{description} 276 | \item[\texttt{void Py\_INCREF(PyObject *o)}] Increase the reference count of a python object. 277 | \item[\texttt{void Py\_DECREF(PyObject *o)}] Decrease the reference count of a python object. 278 | \item[\texttt{void Py\_XINCREF(PyObject *o)}] Increase the reference count of a (potentially NULL) python object. 279 | \item[\texttt{void Py\_XDECREF(PyObject *o)}] Decrease the reference count of a (potentially NULL) python object. 280 | \end{description} 281 | \end{frame} 282 | 283 | \begin{frame}{Numpy C-API} 284 | \begin{description} 285 | \item[\texttt{int PyArray\_NDIM(PyArrayObject *a)}] Get the number of dimension of an array. 286 | \item[\texttt{npy\_intp *PyArray\_DIMS(PyArrayObject *a)}] Get the shape of an array. 287 | \item[\texttt{npy\_intp *PyArray\_STRIDES(PyArrayObject *a)}] Get the strides of an array. 288 | \item[\texttt{void * PyArray\_DATA(PyArrayObject *a)}] Get the data pointer (pointer to element 0) of an array. 289 | \end{description} 290 | \end{frame} 291 | 292 | \begin{frame}[allowframebreaks]{Example} 293 | \vskip5mm 294 | This is the C code equivalent to \code{perform} 295 | \vskip4mm 296 | \lstinputlisting[linerange={1-27}]{doublec.py} 297 | \end{frame} 298 | 299 | \begin{frame}{COp} 300 | \lstinputlisting{cop.py} 301 | \end{frame} 302 | 303 | \begin{frame}{Constructor Arguments} 304 | \begin{itemize} 305 | \item Basically you just pass two arguments to the constructor of COp 306 | \begin{itemize} 307 | \item Either by calling the constructor directly \code{COp.__init__(self, ...)} 308 | \item Or via the superclass \code{super(MyOp, self).__init__(...)} 309 | \end{itemize} 310 | \item The two arguments are: 311 | \begin{itemize} 312 | \item the name of the C code file 313 | \item the name of the function to call to make the computation 314 | \end{itemize} 315 | \end{itemize} 316 | \end{frame} 317 | 318 | \begin{frame}{COp: Example} 319 | \only<1>{\lstinputlisting[linerange={1-16}]{doublecop.py}} 320 | \only<2>{\lstinputlisting[language=C]{doublecop.c}} 321 | \end{frame} 322 | 323 | \begin{frame}{Tests} 324 | \begin{itemize} 325 | \item Testing ops with C code is done the same way as testing for python ops 326 | \item One thing to watch for is tests for ops which don't have python code 327 | \begin{itemize} 328 | \item You should skip the test in those cases 329 | \item Test for \code{theano.config.gxx == ""} 330 | \end{itemize} 331 | \item Using DebugMode will compare the output of the Python version to the output of the C version and raise an error if they don't match 332 | \end{itemize} 333 | \end{frame} 334 | 335 | \begin{frame}{Gradient and Other Concerns} 336 | \begin{itemize} 337 | \item The code for \code{grad()} and \code{infer_shape()} is done the same way as for a python Op 338 | \item In fact you can have the same Op with a python and a C version sharing the \code{grad()} and \code{infer_shape()} code 339 | \begin{itemize} 340 | \item That's how most Ops are implemented 341 | \end{itemize} 342 | \end{itemize} 343 | \end{frame} 344 | 345 | \begin{frame}{Exercice: Add C Code to ScalMulOp} 346 | Work through the "08\_scalmulc" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. 347 | \begin{itemize} 348 | \item Take the ScalMulOp from before and write C code for it using either approach (only accept vectors). 349 | \item You can base yourself on the C code for DoubleOp. 350 | \item Don't forget to test your new implementation! Be sure to check for invalid inputs (matrices). 351 | \end{itemize} 352 | \end{frame} 353 | 354 | \section{How to Make a Complex Op} 355 | 356 | \begin{frame}[plain]{} 357 | \begin{center} 358 | \Huge How to Make a Complex Op 359 | \end{center} 360 | \end{frame} 361 | 362 | \begin{frame}{\code{make_thunk}} 363 | \lstinputlisting[linerange={12-14}]{thunk.py} 364 | \begin{itemize} 365 | \item Define instead of \code{perform} or \code{c_code} 366 | \item Gives total freedom on how the computation is performed 367 | \item More complex to use and generally not needed 368 | \end{itemize} 369 | \end{frame} 370 | 371 | \section{Optimizations} 372 | 373 | \begin{frame}[plain]{} 374 | \begin{center} 375 | \Huge Optimizations 376 | \end{center} 377 | \end{frame} 378 | 379 | \begin{frame}{Purpose} 380 | \begin{itemize} 381 | \item End goal is to make code run faster 382 | \item Sometimes they look after stability or memory usage 383 | \item Most of the time you will make one to insert a new Op you wrote 384 | \end{itemize} 385 | \end{frame} 386 | 387 | \begin{frame}{Replace an Op (V1)} 388 | Here is code to use \code{DoubleOp()} instead of \code{ScalMul(2)}. 389 | \lstinputlisting[linerange={1-5,9-15}]{opt.py} 390 | \end{frame} 391 | 392 | \begin{frame}{Replace an Op (V2)} 393 | In this case since we are replacing one instance with another there is an easier way. 394 | \lstinputlisting[linerange={1-2,16-20}]{opt.py} 395 | \end{frame} 396 | 397 | \begin{frame}{Registering} 398 | In any case you need to register your optimization. 399 | \lstinputlisting[linerange={6-10}]{opt.py} 400 | \lstinputlisting[linerange={21-22}]{opt.py} 401 | \end{frame} 402 | 403 | \begin{frame}{Tests} 404 | \lstinputlisting{test_opt.py} 405 | \end{frame} 406 | 407 | \begin{frame}{Exercice 4} 408 | Work through the "09\_opt" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. 409 | \begin{itemize} 410 | \item Make an optimization that replace DoubleOp with DoubleC (or DoubleCOp) 411 | \item Write tests to make sure your optimization is applied correctly 412 | \end{itemize} 413 | \end{frame} 414 | 415 | \end{document} 416 | -------------------------------------------------------------------------------- /apply_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lisa-lab/tutorial_theano/b22999e3d4fd609a98d901fea8e9839e77fbcdbe/apply_node.png -------------------------------------------------------------------------------- /c.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | 3 | class MyOp(Op): 4 | __props__ = () 5 | 6 | def make_node(self, ...): 7 | # return apply node 8 | 9 | def c_code(self, node, name, input_names, 10 | output_names, sub): 11 | # return C code string 12 | 13 | def c_support_code(self): 14 | # return C code string 15 | 16 | def c_code_cache_version(self): 17 | # return hashable object 18 | -------------------------------------------------------------------------------- /cop.py: -------------------------------------------------------------------------------- 1 | from theano.gof import COp 2 | 3 | class MyOp(COp): 4 | __props__ = () 5 | 6 | def __init__(self, ...): 7 | COp.__init__(self, c_file, func_name) 8 | # Other init code if needed 9 | 10 | def make_node(self, ...): 11 | # make the Apply node 12 | -------------------------------------------------------------------------------- /doublec.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleC(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | if x.ndim != 1: 10 | raise TypeError("DoubleC only works on 1D") 11 | return Apply(self, [x], [x.type()]) 12 | 13 | def c_code(self, node, name, input_names, 14 | output_names, sub): 15 | return """ 16 | Py_XDECREF(%(out)s); 17 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray( 18 | %(inp)s, NPY_ANYORDER, NULL, 0); 19 | if (%(out)s == NULL) { 20 | %(fail)s 21 | } 22 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) { 23 | *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) = 24 | (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * 2; 25 | } 26 | """ % dict(inp=input_names[0], out=output_names[0], 27 | fail=sub["fail"]) 28 | 29 | def infer_shape(self, node, input_shapes): 30 | return input_shapes 31 | 32 | def grad(self, inputs, output_grads): 33 | return [output_grads[0] * 2] 34 | -------------------------------------------------------------------------------- /doublecop.c: -------------------------------------------------------------------------------- 1 | #section support_code 2 | 3 | int APPLY_SPECIFIC(doublecop)(PyArrayObject *x, 4 | PyArrayObject **out) { 5 | Py_XDECREF(*out); 6 | *out = (PyArrayObject *)PyArray_NewLikeArray( 7 | inp, NPY_ANYORDER, NULL, 0); 8 | if (*out == NULL) 9 | return -1; 10 | 11 | for (npy_intp i = 0; i < PyArray_DIM(x, 0); i++) { 12 | *(DTYPE_OUTPUT_0 *)PyArray_GETPTR1(*out, i) = 13 | (*(DTYPE_INPUT_0 *)PyArray_GETPTR1(x, i)) * 2; 14 | } 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /doublecop.py: -------------------------------------------------------------------------------- 1 | from theano import Apply 2 | from theano.gof import COp 3 | from theano.tensor import as_tensor_variable 4 | 5 | class DoubleCOp(COp): 6 | __props__ = () 7 | 8 | def __init__(self): 9 | COp.__init__(self, "./doublecop.c", 10 | "APPLY_SPECIFIC(doublecop)") 11 | 12 | def make_node(self, x): 13 | x = as_tensor_variable(x) 14 | if x.ndim != 1: 15 | raise TypeError("DoubleCOp only works with 1D") 16 | return Apply(self, [x], [x.type()]) 17 | 18 | def infer_shape(self, input_shapes): 19 | return input_shapes 20 | 21 | def grad(self, inputs, g): 22 | return [g[0] * 2] 23 | -------------------------------------------------------------------------------- /doubleop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleOp(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * 2 15 | 16 | def infer_shape(self, node, input_shapes): 17 | return input_shapes 18 | 19 | def grad(self, inputs, output_grads): 20 | return [output_grads[0] * 2] 21 | 22 | def R_op(self, inputs, eval_points): 23 | if eval_points[0] is None: 24 | return eval_points 25 | return self.grad(inputs, eval_points) 26 | -------------------------------------------------------------------------------- /opt.py: -------------------------------------------------------------------------------- 1 | from scalmulop import ScalMulV1 2 | from doubleop import DoubleOp 3 | 4 | from theano.gof import local_optimizer 5 | 6 | from theano.tensor.opt import register_specialize 7 | 8 | @register_specialize 9 | @local_optimizer([ScalMulV1]) 10 | def local_scalmul_double_v1(node): 11 | if not (isinstance(node.op, ScalMulV1) 12 | and node.op.scal == 2): 13 | return False 14 | 15 | return [DoubleOp()(node.inputs[0])] 16 | 17 | from theano.gof.opt import OpSub 18 | 19 | local_scalmul_double_v2 = OpSub(ScalMulV1(2), DoubleOp()) 20 | 21 | register_specialize(local_scalmul_double_v2, 22 | name='local_scalmul_double_v2') 23 | -------------------------------------------------------------------------------- /presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lisa-lab/tutorial_theano/b22999e3d4fd609a98d901fea8e9839e77fbcdbe/presentation.pdf -------------------------------------------------------------------------------- /presentation.tex: -------------------------------------------------------------------------------- 1 | \documentclass[utf8x,xcolor=pdftex,dvipsnames,table]{beamer} 2 | \usetheme{Malmoe} % Now it's a beamer presentation with the lisa theme! 3 | \setbeamertemplate{footline}[page number] 4 | \usecolortheme{beaver} 5 | \usepackage[T1]{fontenc} 6 | \usepackage{amsmath} 7 | \usepackage[utf8x]{inputenc} 8 | %\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}} 9 | \usepackage{listings} 10 | 11 | \newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}} 12 | 13 | \mode 14 | 15 | \title{Introduction to Theano} 16 | 17 | \author{% 18 | \footnotesize 19 | Frédéric Bastien \newline 20 | (slides highly copied from previous tutorial by Ian G.) \newline 21 | } 22 | 23 | \date{September 25, 2014} 24 | 25 | \setbeamertemplate{navigation symbols}{} 26 | 27 | \begin{document} 28 | 29 | \begin{frame}[plain] 30 | \titlepage 31 | \vspace{-5em} 32 | % \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png} 33 | \hfill 34 | % \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop} 35 | \end{frame} 36 | 37 | \section{Outline} 38 | \begin{frame}{High level}\setcounter{page}{1} 39 | \begin{itemize} 40 | \item Overview of library (3 min) 41 | \item Building expressions (30 min) 42 | \item Compiling and running expressions (30 min) 43 | \item Modifying expressions (25 min) 44 | \item Debugging (30 min) 45 | \item Citing Theano (2 min) 46 | \end{itemize} 47 | \end{frame} 48 | 49 | 50 | \begin{frame}{Overview of Library} 51 | Theano is many things 52 | \begin{itemize} 53 | \item Language 54 | \item Compiler 55 | \item Python library 56 | \end{itemize} 57 | \end{frame} 58 | 59 | \begin{frame}{Overview} 60 | Theano language: 61 | \begin{itemize} 62 | \item Operations on scalar, vector, matrix, tensor, and sparse variables 63 | \item Linear algebra 64 | \item Element-wise nonlinearities 65 | \item Convolution 66 | \item Extensible 67 | \end{itemize} 68 | \end{frame} 69 | 70 | \begin{frame}[fragile] 71 | \frametitle{Overview} 72 | Using Theano: 73 | \begin{itemize} 74 | \item define expression $f(x,y) = x + y$ 75 | \item compile expression 76 | \lstset{language=Python, 77 | commentstyle=\itshape\color{blue}, 78 | stringstyle=\color{violet}, 79 | } 80 | \begin{lstlisting} 81 | int f(int x, int y){ 82 | return x + y; 83 | } 84 | \end{lstlisting} 85 | 86 | \item execute expression 87 | \lstset{language=Python, 88 | commentstyle=\itshape\color{blue}, 89 | stringstyle=\color{violet}, 90 | } 91 | \begin{lstlisting} 92 | >>> f(1, 2) 93 | 3 94 | \end{lstlisting} 95 | \end{itemize} 96 | \end{frame} 97 | 98 | 99 | \section{Building} 100 | \begin{frame}{Building expressions} 101 | \begin{itemize} 102 | \item Scalars 103 | \item Vectors 104 | \item Matrices 105 | \item Tensors 106 | \item Reduction 107 | \item Dimshuffle 108 | \end{itemize} 109 | \end{frame} 110 | 111 | \begin{frame}[fragile] 112 | \frametitle{Scalar math} 113 | Using Theano: 114 | \begin{itemize} 115 | \item define expression $f(x,y) = x + y$ 116 | \item compile expression 117 | \end{itemize} 118 | \lstset{language=Python, 119 | commentstyle=\itshape\color{blue}, 120 | stringstyle=\color{violet}, 121 | } 122 | \begin{lstlisting} 123 | from theano import tensor as T 124 | x = T.scalar() 125 | y = T.scalar() 126 | z = x+y 127 | w = z*x 128 | a = T.sqrt(w) 129 | b = T.exp(a) 130 | c = a ** b 131 | d = T.log(c) 132 | \end{lstlisting} 133 | \end{frame} 134 | 135 | \begin{frame}[fragile] 136 | \frametitle{Vector math} 137 | 138 | \lstset{language=Python, 139 | commentstyle=\itshape\color{blue}, 140 | stringstyle=\color{violet}, 141 | } 142 | \begin{lstlisting} 143 | from theano import tensor as T 144 | x = T.vector() 145 | y = T.vector() 146 | # Scalar math applied elementwise 147 | a = x * y 148 | # Vector dot product 149 | b = T.dot(x, y) 150 | # Broadcasting 151 | c = a + b 152 | \end{lstlisting} 153 | \end{frame} 154 | 155 | \begin{frame}[fragile] 156 | \frametitle{Matrix math} 157 | 158 | \lstset{language=Python, 159 | commentstyle=\itshape\color{blue}, 160 | stringstyle=\color{violet}, 161 | } 162 | \begin{lstlisting} 163 | from theano import tensor as T 164 | x = T.matrix() 165 | y = T.matrix() 166 | a = T.vector() 167 | # Matrix-matrix product 168 | b = T.dot(x, y) 169 | # Matrix-vector product 170 | c = T.dot(x, a) 171 | \end{lstlisting} 172 | \end{frame} 173 | 174 | \begin{frame}[fragile] 175 | \frametitle{Tensors} 176 | Using Theano: 177 | \begin{itemize} 178 | \item define expression $f(x,y) = x + y$ 179 | \item compile expression 180 | \begin{itemize} 181 | \item Dimensionality defined by length of ``broadcastable'' argument 182 | \item Can add (or do other elemwise op) on two 183 | tensors with same dimensionality 184 | \item Duplicate tensors along broadcastable axes to 185 | make size match 186 | \end{itemize} 187 | \end{itemize} 188 | \lstset{language=Python, 189 | commentstyle=\itshape\color{blue}, 190 | stringstyle=\color{violet}, 191 | } 192 | \begin{lstlisting} 193 | from theano import tensor as T 194 | tensor3 = T.TensorType( 195 | broadcastable=(False, False, False), 196 | dtype='float32') 197 | x = tensor3() 198 | \end{lstlisting} 199 | \end{frame} 200 | 201 | \begin{frame}[fragile] 202 | \frametitle{Reductions} 203 | Using Theano: 204 | \begin{itemize} 205 | \item define expression $f(x,y) = x + y$ 206 | \item compile expression 207 | \end{itemize} 208 | \lstset{language=Python, 209 | commentstyle=\itshape\color{blue}, 210 | stringstyle=\color{violet}, 211 | } 212 | \begin{lstlisting} 213 | from theano import tensor as T 214 | tensor3 = T.TensorType( 215 | broadcastable=(False, False, False), 216 | dtype='float32') 217 | x = tensor3() 218 | total = x.sum() 219 | marginals = x.sum(axis=(0, 2)) 220 | mx = x.max(axis=1) 221 | \end{lstlisting} 222 | \end{frame} 223 | 224 | \begin{frame}[fragile] 225 | \frametitle{Dimshuffle} 226 | 227 | \lstset{language=Python, 228 | commentstyle=\itshape\color{blue}, 229 | stringstyle=\color{violet}, 230 | } 231 | \begin{lstlisting} 232 | from theano import tensor as T 233 | tensor3 = T.TensorType(broadcastable=(False, False, False), dtype=''float32'') 234 | x = tensor3() 235 | y = x.dimshuffle((2, 1, 0)) 236 | a = T.matrix() 237 | b = a.T 238 | # Same as b 239 | c = a.dimshuffle((0, 1)) 240 | # Adding to larger tensor 241 | d = a.dimshuffle((0, 1, ``x'')) 242 | e = a + d 243 | \end{lstlisting} 244 | \end{frame} 245 | 246 | \begin{frame}{Exercices} 247 | Work through the ``01\_buildbing\_expressions'' directory now. 248 | Available at ``git~clone~https://github.com/nouiz/ccw\_tutorial\_theano.git''. 249 | \end{frame} 250 | 251 | \section{Compiling/Running} 252 | \begin{frame}{Compiling and running expression} 253 | \begin{itemize} 254 | \item theano.function 255 | \item shared variables and updates 256 | \item compilation modes 257 | \item compilation for GPU 258 | \item optimizations 259 | \end{itemize} 260 | \end{frame} 261 | 262 | \begin{frame}[fragile] 263 | \frametitle{theano.function} 264 | 265 | \lstset{language=Python, 266 | commentstyle=\itshape\color{blue}, 267 | stringstyle=\color{violet}, 268 | } 269 | \begin{lstlisting} 270 | >>> from theano import tensor as T 271 | >>> x = T.scalar() 272 | >>> y = T.scalar() 273 | >>> from theano import function 274 | >>> # first arg is list of SYMBOLIC inputs 275 | >>> # second arg is SYMBOLIC output 276 | >>> f = function([x, y], x + y) 277 | >>> # Call it with NUMERICAL values 278 | >>> # Get a NUMERICAL output 279 | >>> f(1., 2.) 280 | array(3.0) 281 | \end{lstlisting} 282 | \end{frame} 283 | 284 | \begin{frame}{Shared variables} 285 | \begin{itemize} 286 | \item It’s hard to do much with purely functional programming 287 | \item ``shared variables'' add just a little bit of imperative programming 288 | \item A “shared variable” is a buffer that stores a numerical value for a Theano variable 289 | \item Can write to as many shared variables as you want, once each, at the end of the function 290 | \item Modify outside Theano function with get\_value() and set\_value() methods. 291 | \end{itemize} 292 | \end{frame} 293 | 294 | \begin{frame}[fragile] 295 | \frametitle{Shared variable example} 296 | 297 | \lstset{language=Python, 298 | commentstyle=\itshape\color{blue}, 299 | stringstyle=\color{violet}, 300 | } 301 | \begin{lstlisting} 302 | >>> from theano import shared 303 | >>> x = shared(0.) 304 | >>> from theano.compat.python2x import OrderedDict 305 | >>> updates = OrderedDict() 306 | >>> updates[x] = x + 1 307 | >>> f = function([], updates=updates) 308 | >>> f() 309 | >>> x.get\_value() 310 | 1.0 311 | >>> x.set\_value(100.) 312 | >>> f() 313 | >>> x.get\_value() 314 | 101.0 315 | \end{lstlisting} 316 | \end{frame} 317 | 318 | \begin{frame}{Which dict?} 319 | \begin{itemize} 320 | \item Use theano.compat.python2x.OrderedDict 321 | \item Not collections.OrderedDict 322 | \begin{itemize} 323 | \item This isn’t available in older versions of python, 324 | and will limit the portability of your code 325 | \end{itemize} 326 | \item Not \{\} aka dict 327 | \begin{itemize} 328 | \item The iteration order of this built-in class is not 329 | deterministic (thanks, Python!) so if Theano 330 | accepted this, the same script could compile 331 | different C programs each time you run it 332 | \end{itemize} 333 | \end{itemize} 334 | \end{frame} 335 | 336 | \begin{frame}{Compilation modes} 337 | \begin{itemize} 338 | \item Can compile in different modes to get different kinds of programs 339 | \item Can specify these modes very precisely with arguments to theano.function 340 | \item Can use a few quick presets with environment variable flags 341 | \end{itemize} 342 | \end{frame} 343 | 344 | \begin{frame}{Example preset compilation modes} 345 | \begin{itemize} 346 | \item FAST\_RUN: default. Spends a lot of time on 347 | compilation to get an executable that runs 348 | fast. 349 | \item FAST\_COMPILE: Doesn’t spend much time 350 | compiling. Executable usually uses python 351 | instead of compiled C code. Runs slow. 352 | \item DEBUG\_MODE: Adds lots of checks. 353 | Raises error messages in situations other 354 | modes regard as fine. 355 | \end{itemize} 356 | \end{frame} 357 | 358 | \begin{frame}{Compilation for GPU} 359 | \begin{itemize} 360 | \item Theano current back-end only supports 32 bit on GPU 361 | \item CUDA supports 64 bit, but is slow in gamer card 362 | \item T.fscalar, T.fvector, T.fmatrix are all 32 bit 363 | \item T.scalar, T.vector, T.matrix resolve to 32 bit or 64 bit depending on theano’s floatX flag 364 | \item floatX is float64 by default, set it to float32 365 | \item Set device flag to gpu (or a specific gpu, like gpu0) 366 | \end{itemize} 367 | \end{frame} 368 | 369 | \begin{frame}{Optimizations} 370 | \begin{itemize} 371 | \item Theano changes the symbolic expressions 372 | you write before converting them to C code 373 | \item It makes them faster 374 | \begin{itemize} 375 | \item (x+y)+(x+y) -> 2 (x + y) 376 | \end{itemize} 377 | \item It makes them more stable 378 | \begin{itemize} 379 | \item exp(a)/exp(a).sum()->softmax(a) 380 | \end{itemize} 381 | \end{itemize} 382 | \end{frame} 383 | 384 | \begin{frame}[fragile] 385 | \frametitle{Optimizations} 386 | 387 | \begin{itemize} 388 | \item Sometimes optimizations discard error 389 | checking and produce incorrect output 390 | rather than an exception 391 | \end{itemize} 392 | \lstset{language=Python, 393 | commentstyle=\itshape\color{blue}, 394 | stringstyle=\color{violet}, 395 | } 396 | \begin{lstlisting} 397 | >>> x = T.scalar() 398 | >>> f = function([x], x/x) 399 | >>> f(0.) 400 | array(1.0) 401 | \end{lstlisting} 402 | 403 | \end{frame} 404 | 405 | \begin{frame}{Exercises} 406 | Work through the ``02\_compiling\_and\_running'' directory now 407 | \end{frame} 408 | 409 | \section{Modifying expressions} 410 | \begin{frame}{Modifying expressions} 411 | \begin{itemize} 412 | \item The grad method 413 | \item Variable nodes 414 | \item Types 415 | \item Ops 416 | \item Apply nodes 417 | \end{itemize} 418 | \end{frame} 419 | 420 | \begin{frame}[fragile] 421 | \frametitle{The grad method} 422 | 423 | \lstset{language=Python, 424 | commentstyle=\itshape\color{blue}, 425 | stringstyle=\color{violet}, 426 | } 427 | \begin{lstlisting} 428 | >>> x = T.scalar('x') 429 | >>> y = 2. * x 430 | >>> g = T.grad(y, x) 431 | >>> from theano.printing import min_informative_str 432 | >>> print min_informative_str(g) 433 | A. Elemwise{mul} 434 | B. Elemwise{second,no_inplace} 435 | C. Elemwise{mul,no_inplace} 436 | D. TensorConstant{2.0} 437 | E. x 438 | F. TensorConstant{1.0} 439 | 440 | \end{lstlisting} 441 | \end{frame} 442 | 443 | \begin{frame}{Theano Variables} 444 | \begin{itemize} 445 | \item A Variable is a theano expression 446 | \item Can come from T.scalar, T.matrix, etc. 447 | \item Can come from doing operations on other Variables 448 | \item Every Variable has a type field, identifying its Type \newline 449 | e.g. TensorType((True, False), ‘float32’) 450 | \item Variables can be thought of as nodes in a graph 451 | \end{itemize} 452 | \end{frame} 453 | 454 | \begin{frame}{Ops} 455 | 456 | \begin{itemize} 457 | \item An Op is any class that describes a 458 | mathematical function of some variables 459 | \item Can call the op on some variables to get a 460 | new variable or variables 461 | \item An Op class can supply other forms of 462 | information about the function, such as its 463 | derivatives 464 | \end{itemize} 465 | \end{frame} 466 | 467 | \begin{frame}{Apply nodes} 468 | \begin{itemize} 469 | \item The Apply class is a specific instance of an application of an Op 470 | \item Notable fields: 471 | \begin{itemize} 472 | \item op: The Op to be applied 473 | \item inputs: The Variables to be used as input 474 | \item outputs: The Variables produced 475 | \end{itemize} 476 | \item Variable.owner identifies the Apply that created the variable 477 | \item Variable and Apply instances are nodes and owner/ 478 | inputs/outputs identify edges in a Theano graph 479 | \end{itemize} 480 | \end{frame} 481 | 482 | \begin{frame}{Exercises} 483 | Work through the ``03\_modifying'' directory now 484 | \end{frame} 485 | 486 | \section{Debugging} 487 | \begin{frame}{Debugging} 488 | \begin{itemize} 489 | \item DEBUG\_MODE 490 | \item Error message 491 | \item theano.printing.debugprint 492 | \item min\_informative\_str 493 | \item compute\_test\_value 494 | \item Accessing the FunctionGraph 495 | \end{itemize} 496 | \end{frame} 497 | 498 | \begin{frame}[fragile] 499 | \frametitle{Error message: code} 500 | \lstset{language=Python, 501 | commentstyle=\itshape\color{blue}, 502 | stringstyle=\color{violet}, 503 | } 504 | \begin{lstlisting} 505 | import numpy as np 506 | import theano 507 | import theano.tensor as T 508 | x = T.vector() 509 | y = T.vector() 510 | z = x + x 511 | z = z + y 512 | f = theano.function([x, y], z) 513 | f(np.ones((2,)), np.ones((3,))) 514 | \end{lstlisting} 515 | \end{frame} 516 | 517 | \begin{frame}[fragile] 518 | \frametitle{Error message: 1st part} 519 | 520 | \lstset{language=Python, 521 | commentstyle=\itshape\color{blue}, 522 | stringstyle=\color{violet}, 523 | } 524 | \begin{lstlisting} 525 | Traceback (most recent call last): 526 | [...] 527 | ValueError: Input dimension mis-match. 528 | (input[0].shape[0] = 3, input[1].shape[0] = 2) 529 | Apply node that caused the error: 530 | Elemwise{add,no_inplace}(, 531 | , 532 | ) 533 | Inputs types: [TensorType(float64, vector), 534 | TensorType(float64, vector), 535 | TensorType(float64, vector)] 536 | Inputs shapes: [(3,), (2,), (2,)] 537 | Inputs strides: [(8,), (8,), (8,)] 538 | Inputs scalar values: ['not scalar', 'not scalar', 'not scalar'] 539 | \end{lstlisting} 540 | \end{frame} 541 | 542 | \begin{frame}[fragile] 543 | \frametitle{Error message: 2st part} 544 | 545 | \lstset{language=Python, 546 | commentstyle=\itshape\color{blue}, 547 | stringstyle=\color{violet}, 548 | } 549 | \begin{lstlisting} 550 | HINT: Re-running with most Theano optimization 551 | disabled could give you a back-traces when this 552 | node was created. This can be done with by setting 553 | the Theano flags optimizer=fast_compile 554 | HINT: Use the Theano flag 'exception_verbosity=high' 555 | for a debugprint of this apply node. 556 | \end{lstlisting} 557 | \end{frame} 558 | 559 | \begin{frame}[fragile] 560 | \frametitle{Error message: exception\_verbosity=high} 561 | 562 | \lstset{language=Python, 563 | commentstyle=\itshape\color{blue}, 564 | stringstyle=\color{violet}, 565 | } 566 | \begin{lstlisting} 567 | Debugprint of the apply node: 568 | Elemwise{add,no_inplace} [@A] '' 569 | | [@B] 570 | | [@C] 571 | | [@C] 572 | 573 | \end{lstlisting} 574 | \end{frame} 575 | 576 | \begin{frame}[fragile] 577 | \frametitle{Error message: optimizer=fast\_compile} 578 | 579 | \lstset{language=Python, 580 | commentstyle=\itshape\color{blue}, 581 | stringstyle=\color{violet}, 582 | } 583 | \begin{lstlisting} 584 | Backtrace when the node is created: 585 | File "test.py", line 7, in 586 | z = z + y 587 | File "/home/nouiz/src/Theano/theano/tensor/var.py", line 122, in __add__ 588 | return theano.tensor.basic.add(self, other) 589 | 590 | \end{lstlisting} 591 | \end{frame} 592 | 593 | \begin{frame}[fragile] 594 | \frametitle{Error message: Traceback} 595 | 596 | \lstset{language=Python, 597 | commentstyle=\itshape\color{blue}, 598 | stringstyle=\color{violet}, 599 | } 600 | \begin{lstlisting} 601 | Traceback (most recent call last): 602 | File "test.py", line 9, in 603 | f(np.ones((2,)), np.ones((3,))) 604 | File "/u/bastienf/repos/theano/compile/function_module.py", 605 | line 589, in __call__ 606 | self.fn.thunks[self.fn.position_of_error]) 607 | File "/u/bastienf/repos/theano/compile/function_module.py", 608 | line 579, in __call__ 609 | outputs = self.fn() 610 | 611 | \end{lstlisting} 612 | \end{frame} 613 | 614 | \begin{frame}[fragile] 615 | \frametitle{debugprint} 616 | 617 | \lstset{language=Python, 618 | commentstyle=\itshape\color{blue}, 619 | stringstyle=\color{violet}, 620 | } 621 | \begin{lstlisting} 622 | >>> from theano.printing import debugprint 623 | >>> debugprint(a) 624 | Elemwise{mul,no_inplace} [@A] '' 625 | |TensorConstant{2.0} [@B] 626 | |Elemwise{add,no_inplace} [@C] 'z' 627 | | [@D] 628 | | [@E] 629 | \end{lstlisting} 630 | \end{frame} 631 | 632 | \begin{frame}[fragile] 633 | \frametitle{min\_informative\_str} 634 | 635 | \lstset{language=Python, 636 | commentstyle=\itshape\color{blue}, 637 | stringstyle=\color{violet}, 638 | } 639 | \begin{lstlisting} 640 | >>> x = T.scalar() 641 | >>> y = T.scalar() 642 | >>> z = x + y 643 | >>> z.name = 'z' 644 | >>> a = 2. * z 645 | >>> from theano.printing import min_informative_str 646 | >>> print min_informative_str(a) 647 | A. Elemwise{mul,no_inplace} 648 | B. TensorConstant{2.0} 649 | C. z 650 | \end{lstlisting} 651 | \end{frame} 652 | 653 | \begin{frame}[fragile] 654 | \frametitle{compute\_test\_value} 655 | 656 | \lstset{language=Python, 657 | commentstyle=\itshape\color{blue}, 658 | stringstyle=\color{violet}, 659 | } 660 | \begin{lstlisting} 661 | >>> from theano import config 662 | >>> config.compute_test_value = 'raise' 663 | >>> x = T.vector() 664 | >>> import numpy as np 665 | >>> x.tag.test_value = np.ones((2,)) 666 | >>> y = T.vector() 667 | >>> y.tag.test_value = np.ones((3,)) 668 | >>> x + y 669 | ... 670 | ValueError: Input dimension mis-match. 671 | (input[0].shape[0] = 2, input[1].shape[0] = 3) 672 | \end{lstlisting} 673 | \end{frame} 674 | 675 | \begin{frame}[fragile] 676 | \frametitle{Accessing a function’s fgraph} 677 | 678 | \lstset{language=Python, 679 | commentstyle=\itshape\color{blue}, 680 | stringstyle=\color{violet}, 681 | } 682 | \begin{lstlisting} 683 | >>> x = T.scalar() 684 | >>> y = x / x 685 | >>> f = function([x], y) 686 | >>> debugprint(f.maker.fgraph.outputs[0]) 687 | DeepCopyOp [@A] '' 688 | |TensorConstant{1.0} [@B] 689 | \end{lstlisting} 690 | \end{frame} 691 | 692 | \begin{frame}{Exercises} 693 | Work through the ``04\_debugging'' directory now 694 | \end{frame} 695 | 696 | \section{Citing} 697 | \begin{frame}{Citing Theano} 698 | \begin{itemize} 699 | \item Please cite both of the following papers in 700 | all work that uses Theano: 701 | \item Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Bergstra, James, Goodfellow, Ian, Bergeron, Arnaud, Bouchard, Nicolas, and 702 | Bengio,Yoshua. Theano: new features and speed improvements. Deep Learning and Unsupervised Feature Learning NIPS 2012 703 | Workshop, 2012. 704 | \item Bergstra, James, Breuleux, Olivier, Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Desjardins, Guillaume, Turian, Joseph, Warde- 705 | Farley, David, and Bengio,Yoshua. Theano: a CPU and GPU math expression compiler. In Proceedings of the Python for Scientific 706 | Computing Conference (SciPy), June 2010. Oral Presentation. 707 | \end{itemize} 708 | \end{frame} 709 | 710 | \begin{frame}{Example acknowledgments} 711 | We would like to thank the developers of 712 | Theano \\citep\{bergstra+al:2010-scipy,Bastien-Theano-2012\}, 713 | Pylearn2 \\citep\{pylearn2\_arxiv\_2013\}. We would also like 714 | to thank NSERC, Compute Canada, and Calcul Qu\'ebec 715 | for providing computational resources. 716 | \end{frame} 717 | 718 | 719 | \begin{frame} 720 | \begin{center} 721 | \bibliography{strings,strings-short,ml,aigaion-shorter} 722 | \Huge 723 | Questions? 724 | \end{center} 725 | \end{frame} 726 | 727 | 728 | \end{document} 729 | -------------------------------------------------------------------------------- /python.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | 3 | class MyOp(Op): 4 | __props__ = () 5 | 6 | def __init__(self, ...): 7 | # set up parameters 8 | 9 | def make_node(self, ...): 10 | # create apply node 11 | 12 | def perform(self, node, inputs, outputs_storage): 13 | # do the computation 14 | 15 | def infer_shape(self, input_shapes): 16 | # return output shapes 17 | 18 | def grad(self, inputs, output_grads): 19 | # return gradient graph for each input 20 | 21 | def R_op(self, inputs, eval_points): 22 | # return R_op graph for each input 23 | -------------------------------------------------------------------------------- /scalmulop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | from theano.scalar import as_scalar 4 | 5 | class ScalMulV1(Op): 6 | __props__ = ('scal',) 7 | 8 | def __init__(self, scal): 9 | if not isinstance(scal, int): 10 | raise TypeError('expected an int') 11 | self.scal = scal 12 | 13 | def make_node(self, x): 14 | x = as_tensor_variable(x) 15 | return Apply(self, [x], [x.type()]) 16 | 17 | def perform(self, node, inputs, output_storage): 18 | x = inputs[0] 19 | z = output_storage[0] 20 | z[0] = x * self.scal 21 | 22 | def infer_shape(self, node, input_shapes): 23 | return input_shapes 24 | 25 | def grad(self, inputs, output_grads): 26 | return [output_grads[0] * self.scal] 27 | 28 | def R_op(self, inputs, eval_points): 29 | if eval_points[0] is None: 30 | return eval_points 31 | return self.grad(inputs, eval_points) 32 | 33 | 34 | class ScalMulV2(Op): 35 | __props__ = () 36 | 37 | def make_node(self, x, scal): 38 | x = as_tensor_variable(x) 39 | scal = as_scalar(scal) 40 | return Apply(self, [x, scal], [x.type()]) 41 | 42 | def perform(self, node, inputs, output_storage): 43 | x = inputs[0] 44 | scal = inputs[1] 45 | z = output_storage[0] 46 | z[0] = x * scal 47 | 48 | def infer_shape(self, node, input_shapes): 49 | return [input_shapes[0]] 50 | 51 | def grad(self, inputs, output_grads): 52 | return [output_grads[0] * inputs[1], (inputs[0] * outputs_grads[1]).sum()] 53 | 54 | # def R_op(self, inputs, eval_points): 55 | # if eval_points[0] is None: 56 | # return eval_points 57 | # return self.grad(inputs, eval_points) 58 | -------------------------------------------------------------------------------- /test_doubleop.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from theano import function, config 4 | from theano.tensor import matrix 5 | from theano.tests import unittest_tools as utt 6 | from theano.tests.test_rop import RopLop_checker 7 | 8 | from doubleop import DoubleOp 9 | 10 | 11 | def test_doubleop(): 12 | utt.seed_rng() 13 | x = matrix() 14 | f = function([x], DoubleOp()(x)) 15 | inp = numpy.asarray(numpy.random.rand(5, 4), 16 | dtype=config.floatX) 17 | out = f(inp) 18 | utt.assert_allclose(inp * 2, out) 19 | 20 | 21 | class test_Double(utt.InferShapeTester): 22 | def test_infer_shape(self): 23 | utt.seed_rng() 24 | x = matrix() 25 | self._compile_and_check( 26 | # function inputs (symbolic) 27 | [x], 28 | # Op instance 29 | [DoubleOp()(x)], 30 | # numeric input 31 | [numpy.asarray(numpy.random.rand(5, 4), 32 | dtype=config.floatX)], 33 | # Op class that should disappear 34 | DoubleOp) 35 | 36 | 37 | def test_doubleop_grad(): 38 | utt.seed_rng() 39 | utt.verify_grad( 40 | # Op instance 41 | DoubleOp(), 42 | # Numeric inputs 43 | [numpy.random.rand(5, 7, 2)] 44 | ) 45 | -------------------------------------------------------------------------------- /test_opt.py: -------------------------------------------------------------------------------- 1 | import theano 2 | 3 | from scalmulop import ScalMulV1 4 | from doubleop import DoubleOp 5 | import opt 6 | 7 | def test_scalmul_double(): 8 | x = theano.tensor.matrix() 9 | y = ScalMulV1(2)(x) 10 | f = theano.function([x], y) 11 | 12 | assert not any(isinstance(n.op, ScalMulV1) 13 | for n in f.maker.fgraph.toposort()) 14 | assert any(isinstance(n.op, DoubleOp) 15 | for n in f.maker.fgraph.toposort()) 16 | 17 | -------------------------------------------------------------------------------- /thunk.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | 3 | class MyOp(Op): 4 | __props__ = () 5 | 6 | def __init__(self, ...): 7 | # set up parameters 8 | 9 | def make_node(self, ...): 10 | # create apply node 11 | 12 | def make_thunk(self, node, storage_map, 13 | compute_map, no_recycling): 14 | # return a thunk 15 | 16 | def infer_shape(self, input_shapes): 17 | # return output shapes 18 | 19 | def grad(self, inputs, output_grads): 20 | # return gradient graph for each input 21 | -------------------------------------------------------------------------------- /tripleop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class `TripleOp`(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * `3` 15 | 16 | def infer_shape(self, node, i0_shapes): 17 | return i0_shapes 18 | 19 | def grad(self, inputs, output_grads): 20 | return [output_grads[0] * `3`] 21 | --------------------------------------------------------------------------------