├── .gitignore ├── 01_building_expressions ├── 01_scalar.py ├── 01_scalar_soln.py ├── 02_vector_mat.py ├── 02_vector_mat_soln.py ├── 03_tensor.py └── 03_tensor_soln.py ├── 02_compiling_and_running ├── 01_function.py ├── 01_function_soln.py ├── 02_shared.py ├── 02_shared_soln.py ├── 03_bug.py └── 03_bug_soln.txt ├── 03_modifying ├── 01_grad.py ├── 01_grad_soln.py ├── 02_traverse.py └── 02_traverse_soln.py ├── 04_debugging ├── 02_compute_test_value.py └── 02_compute_test_value_soln.py ├── 05_tripleop ├── 01_tripleop.py └── 01_tripleop_soln.py ├── 06_scalmulop ├── 01_scalmulop.py └── 01_scalmulop_soln.py ├── 07_scalmulgrad ├── 01_scalmulop.py └── 01_scalmulop_soln.py ├── 08_scalmulc ├── 01_scalmulc.py └── 01_scalmulc_soln.py ├── 09_opt ├── 01_opt.py └── 01_opt_soln.py ├── LICENSE ├── Makefile ├── README.md ├── advanced.pdf ├── advanced.tex ├── apply_node.png ├── c.py ├── cop.py ├── doublec.py ├── doublecgpu.c ├── doublecgpu.py ├── doublecop.c ├── doublecop.py ├── doublegpu.py ├── doubleop.py ├── gpu.py ├── ipnb ├── 01_scalar_soln.py ├── 02_vector_mat_soln.py ├── 03_tensor_soln.py ├── 11_function_soln.py ├── 12_shared_soln.py ├── 13_bug_soln.py ├── 21_grad_soln.py ├── 22_traverse_soln.py ├── 31_debug_soln.py └── Theano-basic.ipynb ├── opt.py ├── params.py ├── presentation.pdf ├── presentation.tex ├── python.py ├── scalmulop.py ├── test_doubleop.py ├── test_opt.py ├── thunk.py └── tripleop.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # Latex stuff 39 | *.aux 40 | *.log 41 | *.nav 42 | *.out 43 | *.snm 44 | *.synctex.gz 45 | *.toc 46 | *.vrb 47 | -------------------------------------------------------------------------------- /01_building_expressions/01_scalar.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_scalar.py to see if your solution works! 3 | # 4 | # This exercice ask you to create Theano variable and do some 5 | # computation on them. 6 | import numpy as np 7 | from theano import function 8 | raise NotImplementedError("TODO: add any other imports you need") 9 | 10 | 11 | def make_scalar(): 12 | """ 13 | Returns a new Theano scalar. 14 | """ 15 | 16 | raise NotImplementedError("TODO: implement this function.") 17 | 18 | 19 | def log(x): 20 | """ 21 | Returns the logarithm of a Theano scalar x. 22 | """ 23 | 24 | raise NotImplementedError("TODO: implement this function.") 25 | 26 | 27 | def add(x, y): 28 | """ 29 | Adds two theano scalars together and returns the result. 30 | """ 31 | 32 | raise NotImplementedError("TODO: implement this function.") 33 | 34 | if __name__ == "__main__": 35 | a = make_scalar() 36 | b = make_scalar() 37 | c = log(b) 38 | d = add(a, c) 39 | f = function([a, b], d) 40 | a = np.cast[a.dtype](1.) 41 | b = np.cast[b.dtype](2.) 42 | actual = f(a, b) 43 | expected = 1. + np.log(2.) 44 | assert np.allclose(actual, expected) 45 | print "SUCCESS!" 46 | -------------------------------------------------------------------------------- /01_building_expressions/01_scalar_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_scalar(): 7 | """ 8 | Returns a new Theano scalar. 9 | """ 10 | 11 | return T.scalar() 12 | 13 | 14 | def log(x): 15 | """ 16 | Returns the logarithm of a Theano scalar x. 17 | """ 18 | 19 | return T.log(x) 20 | 21 | 22 | def add(x, y): 23 | """ 24 | Adds two theano scalars together and returns the result. 25 | """ 26 | 27 | return x + y 28 | 29 | if __name__ == "__main__": 30 | a = make_scalar() 31 | b = make_scalar() 32 | c = log(b) 33 | d = add(a, c) 34 | f = function([a, b], d) 35 | a = np.cast[a.dtype](1.) 36 | b = np.cast[b.dtype](2.) 37 | actual = f(a, b) 38 | expected = 1. + np.log(2.) 39 | assert np.allclose(actual, expected) 40 | print "SUCCESS!" 41 | -------------------------------------------------------------------------------- /01_building_expressions/02_vector_mat.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 02_vector_mat.py to see if your solution works! 3 | # 4 | # This exercices ask you to make Theano variable, elemwise 5 | # multiplication and matrix/vector dot product. 6 | import numpy as np 7 | from theano import function 8 | raise NotImplementedError("TODO: add any other imports you need") 9 | 10 | 11 | def make_vector(): 12 | """ 13 | Returns a new Theano vector. 14 | """ 15 | 16 | raise NotImplementedError("TODO: implement this function.") 17 | 18 | 19 | def make_matrix(): 20 | """ 21 | Returns a new Theano matrix. 22 | """ 23 | 24 | raise NotImplementedError("TODO: implement this function.") 25 | 26 | 27 | def elemwise_mul(a, b): 28 | """ 29 | a: A theano matrix 30 | b: A theano matrix 31 | Returns the elementwise product of a and b 32 | """ 33 | 34 | raise NotImplementedError("TODO: implement this function.") 35 | 36 | 37 | def matrix_vector_mul(a, b): 38 | """ 39 | a: A theano matrix 40 | b: A theano vector 41 | Returns the matrix-vector product of a and b 42 | """ 43 | 44 | raise NotImplementedError("TODO: implement this function.") 45 | 46 | if __name__ == "__main__": 47 | a = make_vector() 48 | b = make_vector() 49 | c = elemwise_mul(a, b) 50 | d = make_matrix() 51 | e = matrix_vector_mul(d, c) 52 | 53 | f = function([a, b, d], e) 54 | 55 | rng = np.random.RandomState([1, 2, 3]) 56 | a_value = rng.randn(5).astype(a.dtype) 57 | b_value = rng.rand(5).astype(b.dtype) 58 | c_value = a_value * b_value 59 | d_value = rng.randn(5, 5).astype(d.dtype) 60 | expected = np.dot(d_value, c_value) 61 | 62 | actual = f(a_value, b_value, d_value) 63 | 64 | assert np.allclose(actual, expected) 65 | print "SUCCESS!" 66 | -------------------------------------------------------------------------------- /01_building_expressions/02_vector_mat_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_vector(): 7 | """ 8 | Returns a new Theano vector. 9 | """ 10 | 11 | return T.vector() 12 | 13 | 14 | def make_matrix(): 15 | """ 16 | Returns a new Theano matrix. 17 | """ 18 | 19 | return T.matrix() 20 | 21 | 22 | def elemwise_mul(a, b): 23 | """ 24 | a: A theano matrix 25 | b: A theano matrix 26 | Returns the elementwise product of a and b 27 | """ 28 | 29 | return a * b 30 | 31 | 32 | def matrix_vector_mul(a, b): 33 | """ 34 | a: A theano matrix 35 | b: A theano vector 36 | Returns the matrix-vector product of a and b 37 | """ 38 | 39 | return T.dot(a, b) 40 | 41 | if __name__ == "__main__": 42 | a = make_vector() 43 | b = make_vector() 44 | c = elemwise_mul(a, b) 45 | d = make_matrix() 46 | e = matrix_vector_mul(d, c) 47 | 48 | f = function([a, b, d], e) 49 | 50 | rng = np.random.RandomState([1, 2, 3]) 51 | a_value = rng.randn(5).astype(a.dtype) 52 | b_value = rng.rand(5).astype(b.dtype) 53 | c_value = a_value * b_value 54 | d_value = rng.randn(5, 5).astype(d.dtype) 55 | expected = np.dot(d_value, c_value) 56 | 57 | actual = f(a_value, b_value, d_value) 58 | 59 | assert np.allclose(actual, expected) 60 | print "SUCCESS!" 61 | -------------------------------------------------------------------------------- /01_building_expressions/03_tensor.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 03_tensor.py to see if your solution works! 3 | # 4 | # This exercices ask you to create Theano tensor variable, do 5 | # broadcastable addition and to compute the max over part of a tensor. 6 | import numpy as np 7 | from theano import function 8 | raise NotImplementedError("TODO: add any other imports you need") 9 | 10 | 11 | def make_tensor(dim): 12 | """ 13 | Returns a new Theano tensor with no broadcastable dimensions. 14 | dim: the total number of dimensions of the tensor. 15 | (You can use any dtype you like) 16 | """ 17 | 18 | raise NotImplementedError("TODO: implement this function.") 19 | 20 | 21 | def broadcasted_add(a, b): 22 | """ 23 | a: a 3D theano tensor 24 | b: a 4D theano tensor 25 | Returns c, a 4D theano tensor, where 26 | 27 | c[i, j, k, l] = a[l, k, i] + b[i, j, k, l] 28 | 29 | for all i, j, k, l 30 | """ 31 | 32 | raise NotImplementedError("TODO: implement this function.") 33 | 34 | 35 | def partial_max(a): 36 | """ 37 | a: a 4D theano tensor 38 | 39 | Returns b, a theano matrix, where 40 | 41 | b[i, j] = max_{k,l} a[i, k, l, j] 42 | 43 | for all i, j 44 | """ 45 | 46 | raise NotImplementedError("TODO: implement this function.") 47 | 48 | if __name__ == "__main__": 49 | a = make_tensor(3) 50 | b = make_tensor(4) 51 | c = broadcasted_add(a, b) 52 | d = partial_max(c) 53 | 54 | f = function([a, b], d) 55 | 56 | rng = np.random.RandomState([1, 2, 3]) 57 | a_value = rng.randn(2, 2, 2).astype(a.dtype) 58 | b_value = rng.rand(2, 2, 2, 2).astype(b.dtype) 59 | c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value 60 | expected = c_value.max(axis=1).max(axis=1) 61 | 62 | actual = f(a_value, b_value) 63 | 64 | assert np.allclose(actual, expected), (actual, expected) 65 | print "SUCCESS!" 66 | -------------------------------------------------------------------------------- /01_building_expressions/03_tensor_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_tensor(dim): 7 | """ 8 | Returns a new Theano tensor with no broadcastable dimensions. 9 | dim: the total number of dimensions of the tensor. 10 | """ 11 | 12 | return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')() 13 | 14 | 15 | def broadcasted_add(a, b): 16 | """ 17 | a: a 3D theano tensor 18 | b: a 4D theano tensor 19 | Returns c, a 4D theano tensor, where 20 | 21 | c[i, j, k, l] = a[l, k, i] + b[i, j, k, l] 22 | 23 | for all i, j, k, l 24 | """ 25 | 26 | return a.dimshuffle(2, 'x', 1, 0) + b 27 | 28 | 29 | def partial_max(a): 30 | """ 31 | a: a 4D theano tensor 32 | 33 | Returns b, a theano matrix, where 34 | 35 | b[i, j] = max_{k,l} a[i, k, l, j] 36 | 37 | for all i, j 38 | """ 39 | 40 | return a.max(axis=(1, 2)) 41 | 42 | if __name__ == "__main__": 43 | a = make_tensor(3) 44 | b = make_tensor(4) 45 | c = broadcasted_add(a, b) 46 | d = partial_max(c) 47 | 48 | f = function([a, b], d) 49 | 50 | rng = np.random.RandomState([1, 2, 3]) 51 | a_value = rng.randn(2, 2, 2).astype(a.dtype) 52 | b_value = rng.rand(2, 2, 2, 2).astype(b.dtype) 53 | c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value 54 | expected = c_value.max(axis=1).max(axis=1) 55 | 56 | actual = f(a_value, b_value) 57 | 58 | assert np.allclose(actual, expected), (actual, expected) 59 | print "SUCCESS!" 60 | -------------------------------------------------------------------------------- /02_compiling_and_running/01_function.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_function.py to see if your solution works! 3 | # 4 | # This exercice ask you to compile a Theano functiont and call it to 5 | # execute "x + y". 6 | from theano import tensor as T 7 | raise NotImplementedError("TODO: add any other imports you need") 8 | 9 | 10 | def evaluate(x, y, expr, x_value, y_value): 11 | """ 12 | x: A theano variable 13 | y: A theano variable 14 | expr: A theano expression involving x and y 15 | x_value: A numpy value 16 | y_value: A numpy value 17 | 18 | Returns the value of expr when x_value is substituted for x 19 | and y_value is substituted for y 20 | """ 21 | 22 | raise NotImplementedError("TODO: implement this function.") 23 | 24 | 25 | if __name__ == "__main__": 26 | x = T.iscalar() 27 | y = T.iscalar() 28 | z = x + y 29 | assert evaluate(x, y, z, 1, 2) == 3 30 | print "SUCCESS!" 31 | -------------------------------------------------------------------------------- /02_compiling_and_running/01_function_soln.py: -------------------------------------------------------------------------------- 1 | from theano import tensor as T 2 | from theano import function 3 | 4 | 5 | def evaluate(x, y, expr, x_value, y_value): 6 | """ 7 | x: A theano variable 8 | y: A theano variable 9 | expr: A theano expression involving x and y 10 | x_value: A numpy value 11 | y_value: A numpy value 12 | 13 | Returns the value of expr when x_value is substituted for x 14 | and y_value is substituted for y 15 | """ 16 | 17 | return function([x, y], expr)(x_value, y_value) 18 | 19 | 20 | if __name__ == "__main__": 21 | x = T.iscalar() 22 | y = T.iscalar() 23 | z = x + y 24 | assert evaluate(x, y, z, 1, 2) == 3 25 | print "SUCCESS!" 26 | -------------------------------------------------------------------------------- /02_compiling_and_running/02_shared.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_function.py to see if your solution works! 3 | # 4 | # This exercice make you use shared variable. You must create them and 5 | # update them by swapping 2 shared variables values. 6 | import numpy as np 7 | raise NotImplementedError("TODO: add any other imports you need") 8 | 9 | 10 | def make_shared(shape): 11 | """ 12 | Returns a theano shared variable containing a tensor of the specified 13 | shape. 14 | You can use any value you want. 15 | """ 16 | raise NotImplementedError("TODO: implement the function") 17 | 18 | 19 | def exchange_shared(a, b): 20 | """ 21 | a: a theano shared variable 22 | b: a theano shared variable 23 | Uses get_value and set_value to swap the values stored in a and b 24 | """ 25 | raise NotImplementedError("TODO: implement the function") 26 | 27 | 28 | def make_exchange_func(a, b): 29 | """ 30 | a: a theano shared variable 31 | b: a theano shared variable 32 | Returns f 33 | where f is a theano function, that, when called, swaps the 34 | values in a and b 35 | f should not return anything 36 | """ 37 | raise NotImplementedError("TODO: implement the function") 38 | 39 | 40 | if __name__ == "__main__": 41 | a = make_shared((5, 4, 3)) 42 | assert a.get_value().shape == (5, 4, 3) 43 | b = make_shared((5, 4, 3)) 44 | assert a.get_value().shape == (5, 4, 3) 45 | a.set_value(np.zeros((5, 4, 3), dtype=a.dtype)) 46 | b.set_value(np.ones((5, 4, 3), dtype=b.dtype)) 47 | exchange_shared(a, b) 48 | assert np.all(a.get_value() == 1.) 49 | assert np.all(b.get_value() == 0.) 50 | f = make_exchange_func(a, b) 51 | rval = f() 52 | assert isinstance(rval, list) 53 | assert len(rval) == 0 54 | assert np.all(a.get_value() == 0.) 55 | assert np.all(b.get_value() == 1.) 56 | 57 | print "SUCCESS!" 58 | -------------------------------------------------------------------------------- /02_compiling_and_running/02_shared_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano.compat.python2x import OrderedDict 3 | from theano import function 4 | from theano import shared 5 | 6 | 7 | def make_shared(shape): 8 | """ 9 | Returns a theano shared variable containing a tensor of the specified 10 | shape. 11 | You can use any value you want. 12 | """ 13 | return shared(np.zeros(shape)) 14 | 15 | 16 | def exchange_shared(a, b): 17 | """ 18 | a: a theano shared variable 19 | b: a theano shared variable 20 | Uses get_value and set_value to swap the values stored in a and b 21 | """ 22 | temp = a.get_value() 23 | a.set_value(b.get_value()) 24 | b.set_value(temp) 25 | 26 | 27 | def make_exchange_func(a, b): 28 | """ 29 | a: a theano shared variable 30 | b: a theano shared variable 31 | Returns f 32 | where f is a theano function, that, when called, swaps the 33 | values in a and b 34 | f should not return anything 35 | """ 36 | 37 | updates = OrderedDict() 38 | updates[a] = b 39 | updates[b] = a 40 | f = function([], updates=updates) 41 | return f 42 | 43 | 44 | if __name__ == "__main__": 45 | a = make_shared((5, 4, 3)) 46 | assert a.get_value().shape == (5, 4, 3) 47 | b = make_shared((5, 4, 3)) 48 | assert a.get_value().shape == (5, 4, 3) 49 | a.set_value(np.zeros((5, 4, 3), dtype=a.dtype)) 50 | b.set_value(np.ones((5, 4, 3), dtype=b.dtype)) 51 | exchange_shared(a, b) 52 | assert np.all(a.get_value() == 1.) 53 | assert np.all(b.get_value() == 0.) 54 | f = make_exchange_func(a, b) 55 | rval = f() 56 | assert isinstance(rval, list) 57 | assert len(rval) == 0 58 | assert np.all(a.get_value() == 0.) 59 | assert np.all(b.get_value() == 1.) 60 | 61 | print "SUCCESS!" 62 | -------------------------------------------------------------------------------- /02_compiling_and_running/03_bug.py: -------------------------------------------------------------------------------- 1 | # Something weird happens when you run this code. 2 | # Find something that is not quite right. 3 | # Figure out which compilation modes make the problem more obvious. 4 | # Explain why what is happening. 5 | import numpy as np 6 | from theano import function 7 | from theano import tensor as T 8 | x = T.vector() 9 | y = T.vector() 10 | z = T.zeros_like(y) 11 | a = x + z 12 | f = function([x, y], a) 13 | output = f(np.zeros((1,), dtype=x.dtype), np.zeros((2,), dtype=y.dtype)) 14 | -------------------------------------------------------------------------------- /02_compiling_and_running/03_bug_soln.txt: -------------------------------------------------------------------------------- 1 | The weird thing is that if you think about how the function call is 2 | implemented, the two arguments have different shapes, and so should 3 | the resulting values of x and z. The line adding x and z should therefore 4 | result in a ValueError. However, when run in the default mode it does not. 5 | The reason is that the optimizations realize that z is always 0, so adding 6 | z to x has no effect. The optimizations thus remove the addition of z. 7 | However, this causes the function to fail to raise an error for bad values 8 | of x and y. To use fewer optimizations and see the bug, you can use 9 | THEANO_FLAGS="mode=FAST_COMPILE". DEBUG_MODE will also catch the bug. 10 | -------------------------------------------------------------------------------- /03_modifying/01_grad.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_grad.py to see if your solution works! 3 | # 4 | # This exercice ask you to use Theano automatic gradient system to 5 | # compute some derivative. 6 | from theano import tensor as T 7 | 8 | 9 | def grad_sum(x, y, z): 10 | """ 11 | x: A theano variable 12 | y: A theano variable 13 | z: A theano expression involving x and y 14 | 15 | Returns dz / dx + dz / dy 16 | """ 17 | 18 | raise NotImplementedError("TODO: implement this function.") 19 | 20 | 21 | if __name__ == "__main__": 22 | x = T.scalar() 23 | y = T.scalar() 24 | z = x + y 25 | s = grad_sum(x, y, z) 26 | assert s.eval({x: 0, y: 0}) == 2 27 | print "SUCCESS!" 28 | -------------------------------------------------------------------------------- /03_modifying/01_grad_soln.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_grad.py to see if your solution works! 3 | # 4 | from theano import tensor as T 5 | 6 | 7 | def grad_sum(x, y, z): 8 | """ 9 | x: A theano variable 10 | y: A theano variable 11 | z: A theano expression involving x and y 12 | 13 | Returns dz / dx + dz / dy 14 | """ 15 | 16 | return sum(T.grad(z, [x, y])) 17 | 18 | if __name__ == "__main__": 19 | x = T.scalar() 20 | y = T.scalar() 21 | z = x + y 22 | s = grad_sum(x, y, z) 23 | assert s.eval({x: 0, y: 0}) == 2 24 | print "SUCCESS!" 25 | -------------------------------------------------------------------------------- /03_modifying/02_traverse.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs and run python 02_traverse.py to see if your solution 2 | # works! 3 | # 4 | # This exercice is here to show you how to navigate a little in the 5 | # Theano graph. You will need to find the inputs used that produce 6 | # some computation. 7 | import numpy as np 8 | from theano import tensor as T 9 | raise NotImplementedError("Add any imports you need.") 10 | 11 | 12 | def arg_to_softmax(prob): 13 | """ 14 | Oh no! Someone has passed you the probability output, 15 | "prob", of a softmax function, and you want the unnormalized 16 | log probability--the argument to the softmax. 17 | 18 | Verify that prob really is the output of a softmax. Raise a 19 | TypeError if it is not. 20 | 21 | If it is, return the argument to the softmax. 22 | """ 23 | 24 | raise NotImplementedError("Implement this function.") 25 | 26 | if __name__ == "__main__": 27 | x = np.ones((5, 4)) 28 | try: 29 | arg_to_softmax(x) 30 | raise Exception("You should have raised an error.") 31 | except TypeError: 32 | pass 33 | 34 | x = T.matrix() 35 | try: 36 | arg_to_softmax(x) 37 | raise Exception("You should have raised an error.") 38 | except TypeError: 39 | pass 40 | 41 | y = T.nnet.sigmoid(x) 42 | try: 43 | arg_to_softmax(y) 44 | raise Exception("You should have raised an error.") 45 | except TypeError: 46 | pass 47 | 48 | y = T.nnet.softmax(x) 49 | rval = arg_to_softmax(y) 50 | assert rval is x 51 | 52 | print "SUCCESS!" 53 | -------------------------------------------------------------------------------- /03_modifying/02_traverse_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano.gof import Variable 3 | from theano import tensor as T 4 | 5 | 6 | def arg_to_softmax(prob): 7 | """ 8 | Oh no! Someone has passed you the probability output, 9 | "prob", of a softmax function, and you want the unnormalized 10 | log probability--the argument to the softmax. 11 | 12 | Verify that prob really is the output of a softmax. Raise a 13 | TypeError if it is not. 14 | 15 | If it is, return the argument to the softmax. 16 | """ 17 | 18 | if not isinstance(prob, Variable): 19 | raise TypeError() 20 | 21 | if prob.owner is None: 22 | raise TypeError() 23 | 24 | owner = prob.owner 25 | 26 | if not isinstance(owner.op, T.nnet.Softmax): 27 | raise TypeError() 28 | 29 | rval, = owner.inputs 30 | 31 | return rval 32 | 33 | if __name__ == "__main__": 34 | x = np.ones((5, 4)) 35 | try: 36 | arg_to_softmax(x) 37 | raise Exception("You should have raised an error.") 38 | except TypeError: 39 | pass 40 | 41 | x = T.matrix() 42 | try: 43 | arg_to_softmax(x) 44 | raise Exception("You should have raised an error.") 45 | except TypeError: 46 | pass 47 | 48 | y = T.nnet.sigmoid(x) 49 | try: 50 | arg_to_softmax(y) 51 | raise Exception("You should have raised an error.") 52 | except TypeError: 53 | pass 54 | 55 | y = T.nnet.softmax(x) 56 | rval = arg_to_softmax(y) 57 | assert rval is x 58 | 59 | print "SUCCESS!" 60 | -------------------------------------------------------------------------------- /04_debugging/02_compute_test_value.py: -------------------------------------------------------------------------------- 1 | # Run 2 | # python 01_compute_test_value.py 3 | # It should raise an exception when it tries to execute the call to fn. 4 | # The exception doesn't make it easy to tell which line of the python script 5 | # first created an invalid expression though. 6 | # Modify the script to use compute_test_value to find the first bad line. 7 | # 8 | # This show you another way then using Theano flags to find the line 9 | # in your code that build a bad graph. 10 | import numpy as np 11 | from theano import function 12 | from theano import tensor as T 13 | a = T.vector() 14 | b = T.log(a) 15 | c = T.nnet.sigmoid(b) 16 | d = T.sqrt(c) 17 | e = T.concatenate((d, c), axis=0) 18 | f = b * c * d 19 | g = e + f 20 | h = g / c 21 | fn = function([a], h) 22 | fn(np.ones((3,)).astype(a.dtype)) 23 | -------------------------------------------------------------------------------- /04_debugging/02_compute_test_value_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | from theano import tensor as T 4 | from theano import config 5 | config.compute_test_value = 'raise' 6 | a = T.vector() 7 | a.tag.test_value = np.ones((3,)).astype(a.dtype) 8 | b = T.log(a) 9 | c = T.nnet.sigmoid(b) 10 | d = T.sqrt(c) 11 | e = T.concatenate((d, c), axis=0) 12 | f = b * c * d 13 | # This is the first bad line 14 | g = e + f 15 | h = g / c 16 | fn = function([a], h) 17 | fn(np.ones((3,)).astype(a.dtype)) 18 | -------------------------------------------------------------------------------- /05_tripleop/01_tripleop.py: -------------------------------------------------------------------------------- 1 | # Modify this file to get a new op TripleOp that multiplies the 2 | # elements of the array by 3 instead of 2. 3 | from theano import Op, Apply 4 | from theano.tensor import as_tensor_variable 5 | 6 | class DoubleOp(Op): 7 | __props__ = () 8 | 9 | def make_node(self, x): 10 | x = as_tensor_variable(x) 11 | return Apply(self, [x], [x.type()]) 12 | 13 | def perform(self, node, inputs, output_storage): 14 | x = inputs[0] 15 | z = output_storage[0] 16 | z[0] = x * 2 17 | 18 | def infer_shape(self, node, input_shapes): 19 | return input_shapes 20 | 21 | def grad(self, inputs, output_grads): 22 | return [output_grads[0] * 2] 23 | -------------------------------------------------------------------------------- /05_tripleop/01_tripleop_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class TripleOp(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * 3 15 | 16 | def infer_shape(self, node, i0_shapes): 17 | return i0_shapes 18 | 19 | def grad(self, inputs, output_grads): 20 | return [output_grads[0] * 3] 21 | -------------------------------------------------------------------------------- /06_scalmulop/01_scalmulop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleOp(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * 2 15 | -------------------------------------------------------------------------------- /06_scalmulop/01_scalmulop_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | from theano.scalar import as_scalar 4 | 5 | class ScalMulV1(Op): 6 | __props__ = ('scal',) 7 | 8 | def __init__(self, scal): 9 | if not isinstance(scal, int): 10 | raise TypeError('expected an int') 11 | self.scal = scal 12 | 13 | def make_node(self, x): 14 | x = as_tensor_variable(x) 15 | return Apply(self, [x], [x.type()]) 16 | 17 | def perform(self, node, inputs, output_storage): 18 | x = inputs[0] 19 | z = output_storage[0] 20 | z[0] = x * self.scal 21 | 22 | 23 | class ScalMulV2(Op): 24 | __props__ = () 25 | 26 | def make_node(self, x, scal): 27 | x = as_tensor_variable(x) 28 | scal = as_scalar(scal) 29 | return Apply(self, [x, scal], [x.type()]) 30 | 31 | def perform(self, node, inputs, output_storage): 32 | x = inputs[0] 33 | scal = inputs[1] 34 | z = output_storage[0] 35 | z[0] = x * scal 36 | -------------------------------------------------------------------------------- /07_scalmulgrad/01_scalmulop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class ScalMul(Op): 5 | __props__ = ('scal',) 6 | 7 | def __init__(self, scal): 8 | if not isinstance(scal, int): 9 | raise TypeError('expected an int') 10 | self.scal = scal 11 | 12 | def make_node(self, x): 13 | x = as_tensor_variable(x) 14 | return Apply(self, [x], [x.type()]) 15 | 16 | def perform(self, node, inputs, output_storage): 17 | x = inputs[0] 18 | z = output_storage[0] 19 | z[0] = x * self.scal 20 | -------------------------------------------------------------------------------- /07_scalmulgrad/01_scalmulop_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class ScalMul(Op): 5 | __props__ = ('scal',) 6 | 7 | def __init__(self, scal): 8 | if not isinstance(scal, int): 9 | raise TypeError('expected an int') 10 | self.scal = scal 11 | 12 | def make_node(self, x): 13 | x = as_tensor_variable(x) 14 | return Apply(self, [x], [x.type()]) 15 | 16 | def perform(self, node, inputs, output_storage): 17 | x = inputs[0] 18 | z = output_storage[0] 19 | z[0] = x * self.scal 20 | 21 | def infer_shape(self, node, input_shapes): 22 | return input_shapes 23 | 24 | def grad(self, inputs, output_grads): 25 | return [output_grads[0] * self.scal] 26 | -------------------------------------------------------------------------------- /08_scalmulc/01_scalmulc.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleC(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | if x.ndim != 1: 10 | raise TypeError("DoubleC only works on 1D") 11 | return Apply(self, [x], [x.type()]) 12 | 13 | def c_code(self, node, name, input_names, 14 | output_names, sub): 15 | return """ 16 | Py_XDECREF(%(out)s); 17 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray( 18 | %(inp)s, NPY_ANYORDER, NULL, 0); 19 | if (%(out)s == NULL) { 20 | %(fail)s 21 | } 22 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) { 23 | *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) = 24 | (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * 2; 25 | } 26 | """ % dict(inp=input_names[0], out=output_names[0], 27 | fail=sub["fail"]) 28 | 29 | def infer_shape(self, node, input_shapes): 30 | return input_shapes 31 | 32 | def grad(self, inputs, output_grads): 33 | return [output_grads[0] * 2] 34 | -------------------------------------------------------------------------------- /08_scalmulc/01_scalmulc_soln.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class ScalMulC(Op): 5 | __props__ = ('scal',) 6 | 7 | def __init__(self, scal): 8 | if not isinstance(scal, int): 9 | raise TypeError('expected an int') 10 | self.scal = scal 11 | 12 | def make_node(self, x): 13 | x = as_tensor_variable(x) 14 | if x.ndim != 1: 15 | raise TypeError("ScalMulC only works on 1D") 16 | return Apply(self, [x], [x.type()]) 17 | 18 | def c_code(self, node, name, input_names, 19 | output_names, sub): 20 | return """ 21 | Py_XDECREF(%(out)s); 22 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray( 23 | %(inp)s, NPY_ANYORDER, NULL, 0); 24 | if (%(out)s == NULL) { 25 | %(fail)s 26 | } 27 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) { 28 | *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) = 29 | (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * %(scal)d; 30 | } 31 | """ % % dict(inp=input_names[0], out=output_names[0], 32 | fail=sub["fail"], scal=self.scal) 33 | 34 | def infer_shape(self, node, input_shapes): 35 | return input_shapes 36 | 37 | def grad(self, inputs, output_grads): 38 | return [output_grads[0] * self.scal] 39 | -------------------------------------------------------------------------------- /09_opt/01_opt.py: -------------------------------------------------------------------------------- 1 | from scalmulop import ScalMulV1 2 | from doubleop import DoubleOp 3 | 4 | from theano.gof import local_optimizer 5 | 6 | from theano.tensor.opt import register_specialize 7 | 8 | @register_specialize 9 | @local_optimizer([ScalMulV1]) 10 | def local_scalmul_double_v1(node): 11 | if not (isinstance(node.op, ScalMulV1) 12 | and node.op.scal == 2): 13 | return False 14 | 15 | return [DoubleOp()(node.inputs[0])] 16 | 17 | from theano.gof.opt import OpSub 18 | 19 | local_scalmul_double_v2 = OpSub(ScalMulV1(2), DoubleOp()) 20 | 21 | register_specialize(local_scalmul_double_v2, 22 | name='local_scalmul_double_v2') 23 | -------------------------------------------------------------------------------- /09_opt/01_opt_soln.py: -------------------------------------------------------------------------------- 1 | from doubleop import DoubleOp 2 | from doublec import DoubleC 3 | 4 | from theano.gof import local_optimizer 5 | 6 | from theano.tensor.opt import register_specialize 7 | 8 | @register_specialize 9 | @local_optimizer([DoubleOp]) 10 | def local_scalmul_double_v1(node): 11 | if not (isinstance(node.op, DoubleOp) 12 | and node.inputs[0].ndim == 1): 13 | return False 14 | 15 | return [DoubleC()(node.inputs[0])] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Ian Goodfellow 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: presentation.pdf 2 | 3 | clean: 4 | rm -f *~ *.toc *.vrb *.out *.nav *.snm *.aux *.log 5 | 6 | presentation.pdf: presentation.tex 7 | pdflatex presentation 8 | pdflatex presentation 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ccw_tutorial_theano 2 | =================== 3 | 4 | This repo contains two theano tutorials. 5 | The first one covers the basics of running and debugging theano code. 6 | The second one covers extending theano in python and C. 7 | 8 | Basic tutorial 9 | -------------- 10 | 11 | This tutorial covers: 12 | 13 | * Overview of library (3 min) 14 | * Building expressions (30 min) 15 | * Compiling and running expressions (30 min) 16 | * Modifying expressions (25 min) 17 | * Debugging (30 min) 18 | * Citing Theano (2 min) 19 | 20 | In order to follow this tutorial you will need the ipython-notebook 21 | python package on your computer and a clone of this repo to get the 22 | notebook with exercices. 23 | 24 | The following commands should perform the correct installation on most 25 | unix-like machines: 26 | 27 | pip install ipython-notebook 28 | git clone https://github.com/abergeron/ccw_tutorial_theano.git 29 | cd ccw_tutorial_theano/ipnb 30 | ipython notebook Theano-basic.ipynb 31 | 32 | This should open your browser to the notebook page. 33 | 34 | Advanced tutorial 35 | ----------------- 36 | 37 | COMING SOON 38 | -------------------------------------------------------------------------------- /advanced.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abergeron/ccw_tutorial_theano/f92aa8edbb567c9ac09149a382858f841a4a7749/advanced.pdf -------------------------------------------------------------------------------- /advanced.tex: -------------------------------------------------------------------------------- 1 | \documentclass[utf8x]{beamer} 2 | 3 | \usepackage[utf8x]{inputenc} 4 | \usepackage[OT1]{fontenc} 5 | \usepackage{graphicx} 6 | \usepackage{listings} 7 | \usepackage{hyperref} 8 | \usepackage{xcolor} 9 | 10 | \usetheme{Malmoe} 11 | \usecolortheme{beaver} 12 | 13 | \lstloadlanguages{Python,C,sh} 14 | 15 | \definecolor{darkgreen}{RGB}{0,93,21} 16 | \definecolor{greenblue}{RGB}{40,110,126} 17 | \definecolor{lightgray}{RGB}{246,246,246} 18 | \definecolor{bordergray}{RGB}{193,193,193} 19 | \definecolor{lightblue}{RGB}{0,114,168} 20 | \definecolor{methblue}{RGB}{0,31,108} 21 | 22 | 23 | \title{Extending Theano} 24 | \author{Arnaud Bergeron} 25 | \date{\today} 26 | 27 | \lstset{ 28 | language=Python, 29 | basicstyle=\fontfamily{pcr}\selectfont\footnotesize, 30 | keywordstyle=\color{darkgreen}\bfseries, 31 | commentstyle=\color{greenblue}\itshape, 32 | stringstyle=\color{violet}, 33 | showstringspaces=false, 34 | tabsize=4, 35 | backgroundcolor=\color{lightgray}, 36 | frame=single, 37 | emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}}, 38 | emph={[3]self},emphstyle={[3]\color{darkgreen}}, 39 | moredelim=**[is][{\color{red}}]{`}{`} 40 | } 41 | 42 | \newcommand{\code}[1]{\lstinline[emph={[2]}]|#1|} 43 | 44 | \begin{document} 45 | 46 | \frame[plain]{\titlepage} 47 | 48 | \section*{} 49 | 50 | \begin{frame}{Outline} 51 | \begin{enumerate} 52 | \item How to Make an Op (Python) (45 min) 53 | \item How to Make an Op (C) (30 min) 54 | \item Op Params (10 min) 55 | \item Optimizations (20 min) 56 | \end{enumerate} 57 | \end{frame} 58 | 59 | \section{How to Make an Op (Python)} 60 | 61 | \begin{frame}[plain]{} 62 | \begin{center} 63 | \Huge How to Make an Op (Python) 64 | \end{center} 65 | \end{frame} 66 | 67 | \begin{frame}[fragile]{Overview} 68 | \lstinputlisting[lastline=14]{python.py} 69 | \end{frame} 70 | 71 | \begin{frame}{\code{__init__}} 72 | \lstinputlisting[firstline=6,lastline=8]{python.py} 73 | \begin{itemize} 74 | \item Optional, a lot of Ops don't have one 75 | \item Serves to set up Op-level parameters 76 | \item Should also perform validation on those parameters 77 | \end{itemize} 78 | \end{frame} 79 | 80 | \begin{frame}{\code{__props__}} 81 | \lstinputlisting[firstline=4,lastline=5]{python.py} 82 | \begin{itemize} 83 | \item Optional (although very useful) 84 | \item Generates \code{__hash__}, \code{__eq__} and \code{__str__} methods if present 85 | \item Empty tuple signifies no properties that should take part in comparison 86 | \item If you have only one property, make sure you add a final comma: \code{('property',)} 87 | \end{itemize} 88 | \end{frame} 89 | 90 | \begin{frame}{\code{make_node}} 91 | \lstinputlisting[firstline=9,lastline=11]{python.py} 92 | \begin{itemize} 93 | \item This creates the node object that represents our computation in the graph 94 | \item The parameters are usually Theano variables, but can be python objects too 95 | \item The return value must be an \code{Apply} instance 96 | \end{itemize} 97 | \end{frame} 98 | 99 | \begin{frame}{What Is an Apply Node?} 100 | \begin{center} 101 | \includegraphics[width=\textwidth]{apply_node} 102 | \end{center} 103 | \end{frame} 104 | 105 | \begin{frame}{\code{perform}} 106 | \lstinputlisting[firstline=12,lastline=14]{python.py} 107 | \begin{itemize} 108 | \item This performs the computation on a set of values (hence the method name) 109 | \item The parameters are all python objects (not symbolic values) 110 | \item This method must not return its result, but rather store it in the 1-element lists (or cells) provided in \code{outputs_storage} 111 | \item The output storage may contain a pre-existing value from a previous run that may be reused for storage. 112 | \end{itemize} 113 | \end{frame} 114 | 115 | \begin{frame}{DoubleOp} 116 | \lstinputlisting[lastline=15]{doubleop.py} 117 | \end{frame} 118 | 119 | \begin{frame}{Op Instances and Nodes} 120 | When you call an op class you get an instance of that Op: 121 | \vskip4mm 122 | \hskip3em\code{double_op = DoubleOp()} 123 | \vskip4mm 124 | But when you want to use that op as a node in a graph you need to call the \textit{instance}: 125 | \vskip4mm 126 | \hskip3em\code{node = double_op(x)} 127 | \vskip4mm 128 | You can do both steps at once with a double call like this: 129 | \vskip4mm 130 | \hskip3em\code{node = DoubleOp()(x)} 131 | \end{frame} 132 | 133 | \begin{frame}{Basic Tests} 134 | \lstinputlisting[linerange={1-5,8-18}]{test_doubleop.py} 135 | \end{frame} 136 | 137 | \begin{frame}[fragile]{Run Tests} 138 | The simplest way to run your tests is to use \texttt{nosetests} directly on your test file like this: 139 | 140 | \begin{lstlisting}[language={},backgroundcolor=\color{white},frame={}] 141 | $ nosetests test_doubleop.py 142 | . 143 | ------------------------------------------------------ 144 | Ran 1 test in 0.427s 145 | 146 | OK 147 | \end{lstlisting} 148 | 149 | You can also use \texttt{theano-nose} which is a wrapper around \texttt{nosetests} with some extra options. 150 | \end{frame} 151 | 152 | \begin{frame}{\code{infer_shape}} 153 | \lstinputlisting[firstline=15,lastline=17]{python.py} 154 | \begin{itemize} 155 | \item This functions is optional, although highly recommended 156 | \item It takes as input the symbolic shapes of the input variables 157 | \item \code{input_shapes} is of the form \code{[[i0_shp0, i0_shp1, ...], ...]} 158 | \item It must return a list with the symbolic shape of the output variables 159 | \end{itemize} 160 | \end{frame} 161 | 162 | \begin{frame}{Example} 163 | \lstinputlisting[firstline=16,lastline=18]{doubleop.py} 164 | \begin{itemize} 165 | \item Here the code is really simple since we don't change the shape in any way in our Op 166 | \item \code{input_shapes} would be an expression equivalent to \code{[x.shape]} 167 | \end{itemize} 168 | \end{frame} 169 | 170 | \begin{frame}{Tests} 171 | \lstinputlisting[linerange={5-5,20-34}]{test_doubleop.py} 172 | \end{frame} 173 | 174 | \begin{frame}{Gradient} 175 | \lstinputlisting[firstline=18,lastline=20]{python.py} 176 | \begin{itemize} 177 | \item This function is required for graphs including your op to work with \code{theano.grad()} 178 | \item Each item you return represents the gradient with respect to that input computed based on the gradient with respect to the outputs (which you get in \code{output_grads}). 179 | \item It must return a list of symbolic graphs for each of your inputs 180 | \item Inputs that have no valid gradient should have a special \code{DisconnectedType} value 181 | \end{itemize} 182 | \end{frame} 183 | 184 | \begin{frame}{Example} 185 | \lstinputlisting[firstline=19,lastline=21]{doubleop.py} 186 | \begin{itemize} 187 | \item Here since the operation is simple the gradient is simple 188 | \item Note that we return a list 189 | \end{itemize} 190 | \end{frame} 191 | 192 | \begin{frame}{Tests} 193 | To test the gradient we use \code{verify_grad} 194 | \lstinputlisting[linerange={5-5,36-44}]{test_doubleop.py} 195 | It will compute the gradient numerically and symbolically (using our \code{L_op()} method) and compare the two. 196 | \end{frame} 197 | 198 | \section{How to Make an Op (C)} 199 | 200 | \begin{frame}[plain]{} 201 | \begin{center} 202 | \Huge How to Make an Op (C) 203 | \end{center} 204 | \end{frame} 205 | 206 | \begin{frame}{Overview} 207 | \lstinputlisting{c.py} 208 | \end{frame} 209 | 210 | \begin{frame}{\code{c_code}} 211 | \lstinputlisting[linerange={9-11}]{c.py} 212 | \begin{itemize} 213 | \item This method returns a python string containing C code 214 | \item \code{input_names} contains the variable names where the inputs are 215 | \item \code{output_names} contains the variable names where to place the outputs 216 | \item \code{sub} contains some code snippets to insert into our code (mostly to indicate failure) 217 | \item The variables in \code{output_names} may contain a reference to a pre-existing value from a previous run that may be reused for storage. 218 | \end{itemize} 219 | \end{frame} 220 | 221 | \begin{frame}{Support Code} 222 | \lstinputlisting[linerange={13-14}]{c.py} 223 | \begin{itemize} 224 | \item This method return a python string containing C code 225 | \item The code may be shared with multiple instances of the op 226 | \item It can contain things like helper functions 227 | \end{itemize} 228 | There are a number of similar methods to insert code at various points 229 | \end{frame} 230 | 231 | \begin{frame}{Headers, Libraries, Compilers} 232 | Some of the methods available to customize the compilation environment: 233 | \begin{description} 234 | \item[\texttt{c\_libraries}] Return a list of shared libraries the op needs 235 | \item[\texttt{c\_headers}] Return a list of included headers the op needs 236 | \item[\texttt{c\_compiler}] C compiler to use (if not the default) 237 | \end{description} 238 | Again others are available. Refer to the documentation for a complete list. 239 | \end{frame} 240 | 241 | \begin{frame}{Python C-API} 242 | \begin{description} 243 | \item[\texttt{void Py\_INCREF(PyObject *o)}] Increase the reference count of a python object. 244 | \item[\texttt{void Py\_DECREF(PyObject *o)}] Decrease the reference count of a python object. 245 | \item[\texttt{void Py\_XINCREF(PyObject *o)}] Increase the reference count of a (potentially NULL) python object. 246 | \item[\texttt{void Py\_XDECREF(PyObject *o)}] Decrease the reference count of a (potentially NULL) python object. 247 | \end{description} 248 | \end{frame} 249 | 250 | \begin{frame}{Numpy C-API} 251 | \begin{description} 252 | \item[\texttt{int PyArray\_NDIM(PyArrayObject *a)}] Get the number of dimension of an array. 253 | \item[\texttt{npy\_intp *PyArray\_DIMS(PyArrayObject *a)}] Get the shape of an array. 254 | \item[\texttt{npy\_intp *PyArray\_STRIDES(PyArrayObject *a)}] Get the strides of an array. 255 | \item[\texttt{void * PyArray\_DATA(PyArrayObject *a)}] Get the data pointer (pointer to element 0) of an array. 256 | \end{description} 257 | \end{frame} 258 | 259 | \begin{frame}[allowframebreaks]{Example} 260 | \vskip5mm 261 | This is the C code equivalent to \code{perform} 262 | \vskip4mm 263 | \lstinputlisting[linerange={1-27}]{doublec.py} 264 | \end{frame} 265 | 266 | \begin{frame}{COp} 267 | \lstinputlisting{cop.py} 268 | \end{frame} 269 | 270 | \begin{frame}{Constructor Arguments} 271 | \begin{itemize} 272 | \item Basically you just pass arguments to the constructor of COp 273 | \begin{itemize} 274 | \item Either by calling the constructor directly \code{COp.__init__(self, ...)} 275 | \item Or via the superclass \code{super(MyOp, self).__init__(...)} 276 | \end{itemize} 277 | \item The arguments are: 278 | \begin{itemize} 279 | \item a list of file names with code sections (relative to the location of the op class) 280 | \item the name of a function to call to make the computation (optional) 281 | \end{itemize} 282 | \end{itemize} 283 | \end{frame} 284 | 285 | \begin{frame}{COp: Example} 286 | \only<1>{\lstinputlisting[linerange={1-16}]{doublecop.py}} 287 | \only<2>{\lstinputlisting[language=C]{doublecop.c}} 288 | \end{frame} 289 | 290 | \begin{frame}{Tests} 291 | \begin{itemize} 292 | \item Testing ops with C code is done the same way as testing for python ops 293 | \item One thing to watch for is tests for ops which don't have python code 294 | \begin{itemize} 295 | \item You should skip the test in those cases 296 | \item Test for \code{theano.config.gxx == ""} 297 | \end{itemize} 298 | \item Using DebugMode will compare the output of the Python version to the output of the C version and raise an error if they don't match 299 | \end{itemize} 300 | \end{frame} 301 | 302 | \begin{frame}{Gradient and Other Concerns} 303 | \begin{itemize} 304 | \item The code for \code{grad()} and \code{infer_shape()} is done the same way as for a python Op 305 | \item In fact you can have the same Op with a python and a C version sharing the \code{grad()} and \code{infer_shape()} code 306 | \begin{itemize} 307 | \item That's how most Ops are implemented 308 | \end{itemize} 309 | \end{itemize} 310 | \end{frame} 311 | 312 | \section{Op Params} 313 | 314 | \begin{frame}[plain]{} 315 | \begin{center} 316 | \Huge Op Params 317 | \end{center} 318 | \end{frame} 319 | 320 | \begin{frame}{Purpose} 321 | \begin{itemize} 322 | \item Used to pass information to the C code 323 | \item Can reduce the amount of compiled C code 324 | \item Required for things that can change from one script run to the other. 325 | \end{itemize} 326 | \end{frame} 327 | 328 | \begin{frame}{Usage} 329 | \lstinputlisting{params.py} 330 | \end{frame} 331 | 332 | \section{GPU Ops} 333 | 334 | \begin{frame}[plain]{} 335 | \begin{center} 336 | \Huge GPU Ops 337 | \end{center} 338 | \end{frame} 339 | 340 | \begin{frame}{Overview} 341 | \only<1>{\lstinputlisting[linerange=1-12]{gpu.py}} 342 | \only<2>{\lstinputlisting[linerange=14-20]{gpu.py} 343 | \begin{itemize} 344 | \item \texttt{params\_type} is new. 345 | \item \texttt{get\_params} is new. 346 | \end{itemize}} 347 | \end{frame} 348 | 349 | \begin{frame}{Context and Context Name} 350 | \begin{itemize} 351 | \item Context is what is used to refer to the chosen GPU. 352 | 353 | It is a C object that can't be serialized. 354 | \item Context Name is a name internal to Theano to refer to a given context object. It is a python string. 355 | \item Context Names are used whenever you need a symbolic object. 356 | \end{itemize} 357 | \end{frame} 358 | 359 | \begin{frame}{Double on GPU} 360 | \only<1>{\lstinputlisting[linerange=5-21]{doublegpu.py}} 361 | \only<2>{\lstinputlisting[linerange=22-37]{doublegpu.py}} 362 | \only<3>{\lstinputlisting[linerange=39-55]{doublegpu.py}} 363 | \end{frame} 364 | 365 | \begin{frame}{GpuKernelBase} 366 | \only<1>{\lstinputlisting[linerange=6-20]{doublecgpu.py}} 367 | \only<2>{\lstinputlisting[linerange=1-10]{doublecgpu.c}} 368 | \only<3>{\lstinputlisting[linerange=12-28]{doublecgpu.c}} 369 | \end{frame} 370 | 371 | \section{Optimizations} 372 | 373 | \begin{frame}[plain]{} 374 | \begin{center} 375 | \Huge Optimizations 376 | \end{center} 377 | \end{frame} 378 | 379 | \begin{frame}{Purpose} 380 | \begin{itemize} 381 | \item End goal is to make code run faster 382 | \item Sometimes they look after stability or memory usage 383 | \item Most of the time you will make one to insert a new Op you wrote 384 | \end{itemize} 385 | \end{frame} 386 | 387 | \begin{frame}{Replace an Op} 388 | Here is code to use \code{DoubleOp()} instead of \code{ScalMul(2)}. 389 | \lstinputlisting[linerange={1-2,7-8,11-20}]{opt.py} 390 | \end{frame} 391 | 392 | \begin{frame}{Replace an Op for GPU} 393 | Here is code to move the Double op to GPU. 394 | \lstinputlisting[linerange={1-5,9-10,22-30}]{opt.py} 395 | \end{frame} 396 | 397 | \begin{frame}{Tests} 398 | \lstinputlisting{test_opt.py} 399 | \end{frame} 400 | 401 | \begin{frame}{Exercice} 402 | \begin{itemize} 403 | \item Implement a ScalMulOp that multiplies its input by an arbitrary scalar value. Start with a python implementation 404 | \item Add C code to your implementation 405 | \item Create a GPU version of your op. 406 | \item Create an optimization that replace the CPU version with a GPU version when appropriate. 407 | \end{itemize} 408 | Clone the repo at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. 409 | \end{frame} 410 | 411 | \end{document} 412 | -------------------------------------------------------------------------------- /apply_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abergeron/ccw_tutorial_theano/f92aa8edbb567c9ac09149a382858f841a4a7749/apply_node.png -------------------------------------------------------------------------------- /c.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | 3 | class MyOp(Op): 4 | __props__ = () 5 | 6 | def make_node(self, ...): 7 | # return apply node 8 | 9 | def c_code(self, node, name, input_names, 10 | output_names, sub): 11 | # return C code string 12 | 13 | def c_support_code(self): 14 | # return C code string 15 | 16 | def c_code_cache_version(self): 17 | # return hashable object 18 | -------------------------------------------------------------------------------- /cop.py: -------------------------------------------------------------------------------- 1 | from theano.gof import COp 2 | 3 | class MyOp(COp): 4 | __props__ = () 5 | 6 | def __init__(self, ...): 7 | COp.__init__(self, c_files, func_name) 8 | # Other init code if needed 9 | 10 | def make_node(self, ...): 11 | # make the Apply node 12 | -------------------------------------------------------------------------------- /doublec.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleC(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | if x.ndim != 1: 10 | raise TypeError("DoubleC only works on 1D") 11 | return Apply(self, [x], [x.type()]) 12 | 13 | def c_code(self, node, name, input_names, 14 | output_names, sub): 15 | return """ 16 | Py_XDECREF(%(out)s); 17 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray( 18 | %(inp)s, NPY_ANYORDER, NULL, 0); 19 | if (%(out)s == NULL) { 20 | %(fail)s 21 | } 22 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) { 23 | *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) = 24 | (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * 2; 25 | } 26 | """ % dict(inp=input_names[0], out=output_names[0], 27 | fail=sub["fail"]) 28 | 29 | def infer_shape(self, node, input_shapes): 30 | return input_shapes 31 | 32 | def grad(self, inputs, output_grads): 33 | return [output_grads[0] * 2] 34 | -------------------------------------------------------------------------------- /doublecgpu.c: -------------------------------------------------------------------------------- 1 | #section kernels 2 | #kernel doublek : *, *, size : 3 | 4 | KERNEL void doublek(GLOBAL_MEM DTYPE_o0 *out, 5 | GLOBAL_MEM DTYPE_i0 *a, 6 | ga_size n) { 7 | for (ga_size i = LID_0; i < n; i += LDIM_0) { 8 | out[i] = 2 * a[i]; 9 | } 10 | } 11 | 12 | #section support_code_struct 13 | int double_fn(PyGpuArrayObject *inp, 14 | PyGpuArrayObject **out, 15 | PyGpuContextObject *ctx) { 16 | size_t n = 1; 17 | Py_XDECREF(*out); 18 | *out = pygpu_empty(PyGpuArray_NDIM(inp), 19 | PyGpuArray_DIMS(inp), 20 | GA_C_ORDER, ctx, Py_None); 21 | if (*out == NULL) return -1; 22 | for (unsigned int i = 0; i < inp->ga.nd; i++) 23 | n *= PyGpuArray_DIM(inp, i); 24 | if (doublek_scall(1, &n, 0, *out, inp, n)) { 25 | PyErr_SetString(PyExc_RuntimeError, 26 | "Error calling kernel"); 27 | return -1; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /doublecgpu.py: -------------------------------------------------------------------------------- 1 | from theano import Apply 2 | from theano.gpuarray.basic_ops import (as_gpuarray_variable, 3 | infer_context_name, CGpuKernelBase) 4 | 5 | 6 | class DoubleCGpu(CGpuKernelBase): 7 | __props__ = () 8 | 9 | def __init__(self): 10 | CGpuKernelBase.__init__(self, ["doublecgpu.c"], 11 | "double_fn") 12 | 13 | def make_node(self, x): 14 | ctx_name = infer_context_name(x) 15 | x = as_gpuarray_variable(x, ctx_name) 16 | return Apply(self, [x], [x.type()]) 17 | 18 | def get_params(self, node): 19 | return node.outputs[0].type.context 20 | 21 | def infer_shape(self, node, input_shapes): 22 | return input_shapes 23 | 24 | def grad(self, inputs, output_grads): 25 | return [output_grads[0] * 2] 26 | -------------------------------------------------------------------------------- /doublecop.c: -------------------------------------------------------------------------------- 1 | #section support_code 2 | 3 | int APPLY_SPECIFIC(doublecop)(PyArrayObject *x, 4 | PyArrayObject **out) { 5 | Py_XDECREF(*out); 6 | *out = (PyArrayObject *)PyArray_NewLikeArray( 7 | inp, NPY_ANYORDER, NULL, 0); 8 | if (*out == NULL) 9 | return -1; 10 | 11 | for (npy_intp i = 0; i < PyArray_DIM(x, 0); i++) { 12 | *(DTYPE_OUTPUT_0 *)PyArray_GETPTR1(*out, i) = 13 | (*(DTYPE_INPUT_0 *)PyArray_GETPTR1(x, i)) * 2; 14 | } 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /doublecop.py: -------------------------------------------------------------------------------- 1 | from theano import Apply 2 | from theano.gof import COp 3 | from theano.tensor import as_tensor_variable 4 | 5 | class DoubleCOp(COp): 6 | __props__ = () 7 | 8 | def __init__(self): 9 | COp.__init__(self, ["doublecop.c"], 10 | "APPLY_SPECIFIC(doublecop)") 11 | 12 | def make_node(self, x): 13 | x = as_tensor_variable(x) 14 | if x.ndim != 1: 15 | raise TypeError("DoubleCOp only works with 1D") 16 | return Apply(self, [x], [x.type()]) 17 | 18 | def infer_shape(self, input_shapes): 19 | return input_shapes 20 | 21 | def grad(self, inputs, g): 22 | return [g[0] * 2] 23 | -------------------------------------------------------------------------------- /doublegpu.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.gpuarray.basic_ops import (as_gpuarray_variable, Kernel, 3 | infer_context_name, GpuKernelBase) 4 | 5 | try: 6 | from pygpu import gpuarray 7 | except ImportError: 8 | pass 9 | 10 | 11 | class DoubleGpu(Op, GpuKernelBase): 12 | __props__ = () 13 | 14 | def make_node(self, x): 15 | ctx_name = infer_context_name(x) 16 | x = as_gpuarray_variable(x, ctx_name) 17 | return Apply(self, [x], [x.type()]) 18 | 19 | def get_params(self, node): 20 | return node.outputs[0].type.context 21 | 22 | def gpu_kernels(self, node, name): 23 | dt = node.inputs[0].type 24 | code = """ 25 | KERNEL void doublek(GLOBAL_MEM %(ctype) *out, 26 | GLOBAL_MEM const %(ctype)s *a, 27 | ga_size n) { 28 | for (ga_size i = LID_0; i < n; i += LDIM_0) { 29 | out[i] = 2 * a[i]; 30 | } 31 | } 32 | """ % dict(ctype=gpuarray.dtype_to_ctype(dt)) 33 | return [Kernel(code=code, name="doublek", 34 | params=[gpuarray.GpuArray, 35 | gpuarray.GpuArray, 36 | gpuarray.SIZE], 37 | flags=Kernel.get_flags(dt))] 38 | 39 | def c_code(self, node, name, inn, outn, sub): 40 | return """ 41 | size_t n = 1; 42 | Py_XDECREF(%(out)s); 43 | %(out)s = pygpu_empty(PyGpuArray_NDIM(%(inp)s), 44 | PyGpuArray_DIMS(%(inp)s), 45 | GA_C_ORDER, %(ctx)s, Py_None); 46 | if (%(out)s == NULL) %(fail)s 47 | for (unsigned int i = 0; i < %(inp)s->ga.nd; i++) 48 | n *= PyGpuArray_DIM(%(inp)s, i); 49 | if (doublek_scall(1, &n, 0, %(out)s, %(inp)s, n)) { 50 | PyErr_SetString(PyExc_RuntimeError, 51 | "Error calling kernel"); 52 | %(fail)s; 53 | } 54 | """ % dict(inp=inn[0], out=outn[0], fail=sub["fail"]) 55 | 56 | def infer_shape(self, node, input_shapes): 57 | return input_shapes 58 | 59 | def grad(self, inputs, output_grads): 60 | return [output_grads[0] * 2] 61 | -------------------------------------------------------------------------------- /doubleop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class DoubleOp(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * 2 15 | 16 | def infer_shape(self, node, input_shapes): 17 | return input_shapes 18 | 19 | def L_op(self, inputs, outputs, output_grads): 20 | return [output_grads[0] * 2] 21 | 22 | def R_op(self, inputs, eval_points): 23 | if eval_points[0] is None: 24 | return eval_points 25 | return self.grad(inputs, eval_points) 26 | -------------------------------------------------------------------------------- /gpu.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | from theano.gpuarray.type import gpu_context_type 3 | 4 | class GpuOp(Op): 5 | __props__ = () 6 | params_type = gpu_context_type 7 | 8 | def make_node(self, ...): 9 | # return apply node 10 | 11 | def get_params(self, node): 12 | return node.outputs[0].type.context 13 | 14 | def perform(self, node, inputs, output_storage): 15 | # python code 16 | 17 | def c_code(self, node, name, input_names, 18 | output_names, sub): 19 | # return C code string 20 | 21 | -------------------------------------------------------------------------------- /ipnb/01_scalar_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_scalar(): 7 | """ 8 | Returns a new Theano scalar. 9 | """ 10 | 11 | return T.scalar() 12 | 13 | 14 | def log(x): 15 | """ 16 | Returns the logarithm of a Theano scalar x. 17 | """ 18 | 19 | return T.log(x) 20 | 21 | 22 | def add(x, y): 23 | """ 24 | Adds two theano scalars together and returns the result. 25 | """ 26 | 27 | return x + y 28 | 29 | a = make_scalar() 30 | b = make_scalar() 31 | c = log(b) 32 | d = add(a, c) 33 | f = function([a, b], d) 34 | a = np.cast[a.dtype](1.) 35 | b = np.cast[b.dtype](2.) 36 | actual = f(a, b) 37 | expected = 1. + np.log(2.) 38 | assert np.allclose(actual, expected) 39 | print "SUCCESS!" 40 | -------------------------------------------------------------------------------- /ipnb/02_vector_mat_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_vector(): 7 | """ 8 | Returns a new Theano vector. 9 | """ 10 | 11 | return T.vector() 12 | 13 | 14 | def make_matrix(): 15 | """ 16 | Returns a new Theano matrix. 17 | """ 18 | 19 | return T.matrix() 20 | 21 | 22 | def elemwise_mul(a, b): 23 | """ 24 | a: A theano matrix 25 | b: A theano matrix 26 | Returns the elementwise product of a and b 27 | """ 28 | 29 | return a * b 30 | 31 | 32 | def matrix_vector_mul(a, b): 33 | """ 34 | a: A theano matrix 35 | b: A theano vector 36 | Returns the matrix-vector product of a and b 37 | """ 38 | 39 | return T.dot(a, b) 40 | 41 | a = make_vector() 42 | b = make_vector() 43 | c = elemwise_mul(a, b) 44 | d = make_matrix() 45 | e = matrix_vector_mul(d, c) 46 | 47 | f = function([a, b, d], e) 48 | 49 | rng = np.random.RandomState([1, 2, 3]) 50 | a_value = rng.randn(5).astype(a.dtype) 51 | b_value = rng.rand(5).astype(b.dtype) 52 | c_value = a_value * b_value 53 | d_value = rng.randn(5, 5).astype(d.dtype) 54 | expected = np.dot(d_value, c_value) 55 | 56 | actual = f(a_value, b_value, d_value) 57 | 58 | assert np.allclose(actual, expected) 59 | print "SUCCESS!" 60 | -------------------------------------------------------------------------------- /ipnb/03_tensor_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | import theano.tensor as T 4 | 5 | 6 | def make_tensor(dim): 7 | """ 8 | Returns a new Theano tensor with no broadcastable dimensions. 9 | dim: the total number of dimensions of the tensor. 10 | """ 11 | 12 | return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')() 13 | 14 | 15 | def broadcasted_add(a, b): 16 | """ 17 | a: a 3D theano tensor 18 | b: a 4D theano tensor 19 | Returns c, a 4D theano tensor, where 20 | 21 | c[i, j, k, l] = a[l, k, i] + b[i, j, k, l] 22 | 23 | for all i, j, k, l 24 | """ 25 | 26 | return a.dimshuffle(2, 'x', 1, 0) + b 27 | 28 | 29 | def partial_max(a): 30 | """ 31 | a: a 4D theano tensor 32 | 33 | Returns b, a theano matrix, where 34 | 35 | b[i, j] = max_{k,l} a[i, k, l, j] 36 | 37 | for all i, j 38 | """ 39 | 40 | return a.max(axis=(1, 2)) 41 | 42 | a = make_tensor(3) 43 | b = make_tensor(4) 44 | c = broadcasted_add(a, b) 45 | d = partial_max(c) 46 | 47 | f = function([a, b], d) 48 | 49 | rng = np.random.RandomState([1, 2, 3]) 50 | a_value = rng.randn(2, 2, 2).astype(a.dtype) 51 | b_value = rng.rand(2, 2, 2, 2).astype(b.dtype) 52 | c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value 53 | expected = c_value.max(axis=1).max(axis=1) 54 | 55 | actual = f(a_value, b_value) 56 | 57 | assert np.allclose(actual, expected), (actual, expected) 58 | print "SUCCESS!" 59 | -------------------------------------------------------------------------------- /ipnb/11_function_soln.py: -------------------------------------------------------------------------------- 1 | from theano import tensor as T 2 | from theano import function 3 | 4 | 5 | def evaluate(x, y, expr, x_value, y_value): 6 | """ 7 | x: A theano variable 8 | y: A theano variable 9 | expr: A theano expression involving x and y 10 | x_value: A numpy value 11 | y_value: A numpy value 12 | 13 | Returns the value of expr when x_value is substituted for x 14 | and y_value is substituted for y 15 | """ 16 | 17 | return function([x, y], expr)(x_value, y_value) 18 | 19 | 20 | x = T.iscalar() 21 | y = T.iscalar() 22 | z = x + y 23 | assert evaluate(x, y, z, 1, 2) == 3 24 | print "SUCCESS!" 25 | -------------------------------------------------------------------------------- /ipnb/12_shared_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano.compat.python2x import OrderedDict 3 | from theano import function 4 | from theano import shared 5 | 6 | 7 | def make_shared(shape): 8 | """ 9 | Returns a theano shared variable containing a tensor of the specified 10 | shape. 11 | You can use any value you want. 12 | """ 13 | return shared(np.zeros(shape)) 14 | 15 | 16 | def exchange_shared(a, b): 17 | """ 18 | a: a theano shared variable 19 | b: a theano shared variable 20 | Uses get_value and set_value to swap the values stored in a and b 21 | """ 22 | temp = a.get_value() 23 | a.set_value(b.get_value()) 24 | b.set_value(temp) 25 | 26 | 27 | def make_exchange_func(a, b): 28 | """ 29 | a: a theano shared variable 30 | b: a theano shared variable 31 | Returns f 32 | where f is a theano function, that, when called, swaps the 33 | values in a and b 34 | f should not return anything 35 | """ 36 | 37 | updates = OrderedDict() 38 | updates[a] = b 39 | updates[b] = a 40 | f = function([], updates=updates) 41 | return f 42 | 43 | 44 | a = make_shared((5, 4, 3)) 45 | assert a.get_value().shape == (5, 4, 3) 46 | b = make_shared((5, 4, 3)) 47 | assert a.get_value().shape == (5, 4, 3) 48 | a.set_value(np.zeros((5, 4, 3), dtype=a.dtype)) 49 | b.set_value(np.ones((5, 4, 3), dtype=b.dtype)) 50 | exchange_shared(a, b) 51 | assert np.all(a.get_value() == 1.) 52 | assert np.all(b.get_value() == 0.) 53 | f = make_exchange_func(a, b) 54 | rval = f() 55 | assert isinstance(rval, list) 56 | assert len(rval) == 0 57 | assert np.all(a.get_value() == 0.) 58 | assert np.all(b.get_value() == 1.) 59 | 60 | print "SUCCESS!" 61 | -------------------------------------------------------------------------------- /ipnb/13_bug_soln.py: -------------------------------------------------------------------------------- 1 | # The weird thing is that the function succeeds. 2 | # 3 | # This is weird because the two values passed in for x and y do not 4 | # have the same shape, yet x is added with something that has the same 5 | # shape as y (z). 6 | # 7 | # This happens because optimizations realize that z is always zero and 8 | # therefore remove the addition, which removes the error. 9 | # 10 | # The problem is more evident if FAST_COMPILE or DEBUG_MODE is used. 11 | -------------------------------------------------------------------------------- /ipnb/21_grad_soln.py: -------------------------------------------------------------------------------- 1 | # Fill in the TODOs in this exercise, then run 2 | # python 01_grad.py to see if your solution works! 3 | # 4 | from theano import tensor as T 5 | 6 | 7 | def grad_sum(x, y, z): 8 | """ 9 | x: A theano variable 10 | y: A theano variable 11 | z: A theano expression involving x and y 12 | 13 | Returns dz / dx + dz / dy 14 | """ 15 | 16 | return sum(T.grad(z, [x, y])) 17 | 18 | x = T.scalar() 19 | y = T.scalar() 20 | z = x + y 21 | s = grad_sum(x, y, z) 22 | assert s.eval({x: 0, y: 0}) == 2 23 | print "SUCCESS!" 24 | -------------------------------------------------------------------------------- /ipnb/22_traverse_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano.gof import Variable 3 | from theano import tensor as T 4 | 5 | 6 | def arg_to_softmax(prob): 7 | """ 8 | Oh no! Someone has passed you the probability output, 9 | "prob", of a softmax function, and you want the unnormalized 10 | log probability--the argument to the softmax. 11 | 12 | Verify that prob really is the output of a softmax. Raise a 13 | TypeError if it is not. 14 | 15 | If it is, return the argument to the softmax. 16 | """ 17 | 18 | if not isinstance(prob, Variable): 19 | raise TypeError() 20 | 21 | if prob.owner is None: 22 | raise TypeError() 23 | 24 | owner = prob.owner 25 | 26 | if not isinstance(owner.op, T.nnet.Softmax): 27 | raise TypeError() 28 | 29 | rval, = owner.inputs 30 | 31 | return rval 32 | 33 | if __name__ == "__main__": 34 | x = np.ones((5, 4)) 35 | try: 36 | arg_to_softmax(x) 37 | raise Exception("You should have raised an error.") 38 | except TypeError: 39 | pass 40 | 41 | x = T.matrix() 42 | try: 43 | arg_to_softmax(x) 44 | raise Exception("You should have raised an error.") 45 | except TypeError: 46 | pass 47 | 48 | y = T.nnet.sigmoid(x) 49 | try: 50 | arg_to_softmax(y) 51 | raise Exception("You should have raised an error.") 52 | except TypeError: 53 | pass 54 | 55 | y = T.nnet.softmax(x) 56 | rval = arg_to_softmax(y) 57 | assert rval is x 58 | 59 | print "SUCCESS!" 60 | -------------------------------------------------------------------------------- /ipnb/31_debug_soln.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from theano import function 3 | from theano import tensor as T 4 | from theano import config 5 | config.compute_test_value = 'raise' 6 | a = T.vector() 7 | a.tag.test_value = np.ones((3,)).astype(a.dtype) 8 | b = T.log(a) 9 | c = T.nnet.sigmoid(b) 10 | d = T.sqrt(c) 11 | e = T.concatenate((d, c), axis=0) 12 | f = b * c * d 13 | # This is the first bad line 14 | g = e + f 15 | h = g / c 16 | fn = function([a], h) 17 | fn(np.ones((3,)).astype(a.dtype)) 18 | -------------------------------------------------------------------------------- /ipnb/Theano-basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:33e931e54d686a2ab2c44bfcdc99a4383aecf754e0c80fef2e0ada6858e9b48c" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "All the exercices on this sheet work this way:\n", 16 | "\n", 17 | " 1. You have a cell with TODOs that raise errors with a description of what is needed. Do that.\n", 18 | " 2. Then run the cell(ctrl-enter) to execute it.\n", 19 | " 3. It should print \"Success\" at the end (there is validation code in the cell). If not, try again.\n", 20 | " 4. If you want to see the solution, execute the cell that start with \"%load\" after the exercice." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## 1. Builing expressions\n", 28 | "\n", 29 | "#### Excercice 1.1\n", 30 | "\n", 31 | "This exercice walks you through creating Theano variables and doing some computation with them." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "collapsed": false, 37 | "input": [ 38 | "import numpy as np\n", 39 | "from theano import function\n", 40 | "raise NotImplementedError(\"TODO: add any other imports you need\")\n", 41 | "\n", 42 | "\n", 43 | "def make_scalar():\n", 44 | " \"\"\"\n", 45 | " Returns a new Theano scalar.\n", 46 | " \"\"\"\n", 47 | "\n", 48 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 49 | "\n", 50 | "\n", 51 | "def log(x):\n", 52 | " \"\"\"\n", 53 | " Returns the logarithm of a Theano scalar x.\n", 54 | " \"\"\"\n", 55 | "\n", 56 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 57 | "\n", 58 | "\n", 59 | "def add(x, y):\n", 60 | " \"\"\"\n", 61 | " Adds two theano scalars together and returns the result.\n", 62 | " \"\"\"\n", 63 | "\n", 64 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 65 | " \n", 66 | "# The following code uses your code and tests it.\n", 67 | "a = make_scalar()\n", 68 | "b = make_scalar()\n", 69 | "c = log(b)\n", 70 | "d = add(a, c)\n", 71 | "f = function([a, b], d)\n", 72 | "a = np.cast[a.dtype](1.)\n", 73 | "b = np.cast[b.dtype](2.)\n", 74 | "actual = f(a, b)\n", 75 | "expected = 1. + np.log(2.)\n", 76 | "assert np.allclose(actual, expected)\n", 77 | "print \"SUCCESS!\"\n" 78 | ], 79 | "language": "python", 80 | "metadata": {}, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "collapsed": false, 86 | "input": [ 87 | "%load 01_scalar_soln.py" 88 | ], 89 | "language": "python", 90 | "metadata": {}, 91 | "outputs": [] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "#### Exercice 1.2\n", 98 | "\n", 99 | "This exercice asks you to make Theano variables, elementwise multiplication and matrix/vector dot product.\n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "collapsed": false, 105 | "input": [ 106 | "import numpy as np\n", 107 | "from theano import function\n", 108 | "raise NotImplementedError(\"TODO: add any other imports you need\")\n", 109 | "\n", 110 | "\n", 111 | "def make_vector():\n", 112 | " \"\"\"\n", 113 | " Returns a new Theano vector.\n", 114 | " \"\"\"\n", 115 | "\n", 116 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 117 | "\n", 118 | "\n", 119 | "def make_matrix():\n", 120 | " \"\"\"\n", 121 | " Returns a new Theano matrix.\n", 122 | " \"\"\"\n", 123 | "\n", 124 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 125 | "\n", 126 | "def elemwise_mul(a, b):\n", 127 | " \"\"\"\n", 128 | " a: A theano matrix\n", 129 | " b: A theano matrix\n", 130 | " Returns the elementwise product of a and b\n", 131 | " \"\"\"\n", 132 | "\n", 133 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 134 | "\n", 135 | "\n", 136 | "def matrix_vector_mul(a, b):\n", 137 | " \"\"\"\n", 138 | " a: A theano matrix\n", 139 | " b: A theano vector\n", 140 | " Returns the matrix-vector product of a and b\n", 141 | " \"\"\"\n", 142 | "\n", 143 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 144 | "\n", 145 | "# The following code uses your code and tests it.\n", 146 | "a = make_vector()\n", 147 | "b = make_vector()\n", 148 | "c = elemwise_mul(a, b)\n", 149 | "d = make_matrix()\n", 150 | "e = matrix_vector_mul(d, c)\n", 151 | "\n", 152 | "f = function([a, b, d], e)\n", 153 | "\n", 154 | "rng = np.random.RandomState([1, 2, 3])\n", 155 | "a_value = rng.randn(5).astype(a.dtype)\n", 156 | "b_value = rng.rand(5).astype(b.dtype)\n", 157 | "c_value = a_value * b_value\n", 158 | "d_value = rng.randn(5, 5).astype(d.dtype)\n", 159 | "expected = np.dot(d_value, c_value)\n", 160 | "\n", 161 | "actual = f(a_value, b_value, d_value)\n", 162 | "assert np.allclose(actual, expected)\n", 163 | "print \"SUCCESS!\"" 164 | ], 165 | "language": "python", 166 | "metadata": {}, 167 | "outputs": [] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "collapsed": false, 172 | "input": [ 173 | "%load 02_vector_mat_soln.py" 174 | ], 175 | "language": "python", 176 | "metadata": {}, 177 | "outputs": [] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "#### Exercice 1.3\n", 184 | "\n", 185 | "This exercices asks you to create a tensor variable, do broadcastable additions and compute the max over part of a tensor." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "collapsed": false, 191 | "input": [ 192 | "import numpy as np\n", 193 | "from theano import function\n", 194 | "raise NotImplementedError(\"TODO: add any other imports you need\")\n", 195 | "\n", 196 | "\n", 197 | "def make_tensor(dim):\n", 198 | " \"\"\"\n", 199 | " Returns a new Theano tensor with no broadcastable dimensions.\n", 200 | " dim: the total number of dimensions of the tensor.\n", 201 | " (You can use any dtype you like)\n", 202 | " \"\"\"\n", 203 | "\n", 204 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 205 | "\n", 206 | "\n", 207 | "def broadcasted_add(a, b):\n", 208 | " \"\"\"\n", 209 | " a: a 3D theano tensor\n", 210 | " b: a 4D theano tensor\n", 211 | " Returns c, a 4D theano tensor, where\n", 212 | "\n", 213 | " c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]\n", 214 | "\n", 215 | " for all i, j, k, l\n", 216 | " \"\"\"\n", 217 | "\n", 218 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 219 | "\n", 220 | "def partial_max(a):\n", 221 | " \"\"\"\n", 222 | " a: a 4D theano tensor\n", 223 | "\n", 224 | " Returns b, a theano matrix, where\n", 225 | "\n", 226 | " b[i, j] = max_{k,l} a[i, k, l, j]\n", 227 | "\n", 228 | " for all i, j\n", 229 | " \"\"\"\n", 230 | "\n", 231 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 232 | "\n", 233 | "# The following code uses your code and tests it.\n", 234 | "a = make_tensor(3)\n", 235 | "b = make_tensor(4)\n", 236 | "c = broadcasted_add(a, b)\n", 237 | "d = partial_max(c)\n", 238 | "\n", 239 | "f = function([a, b], d)\n", 240 | "\n", 241 | "rng = np.random.RandomState([1, 2, 3])\n", 242 | "a_value = rng.randn(2, 2, 2).astype(a.dtype)\n", 243 | "b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)\n", 244 | "c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value\n", 245 | "expected = c_value.max(axis=1).max(axis=1)\n", 246 | "\n", 247 | "actual = f(a_value, b_value)\n", 248 | "\n", 249 | "assert np.allclose(actual, expected), (actual, expected)\n", 250 | "print \"SUCCESS!\"" 251 | ], 252 | "language": "python", 253 | "metadata": {}, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "collapsed": false, 259 | "input": [ 260 | "%load 03_tensor_soln.py" 261 | ], 262 | "language": "python", 263 | "metadata": {}, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "## 2. Compiling and Running\n", 271 | "\n", 272 | "#### Exercice 2.1\n", 273 | "\n", 274 | "This exercice asks you to compile a Theano function and call it. " 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "collapsed": false, 280 | "input": [ 281 | "from theano import tensor as T\n", 282 | "raise NotImplementedError(\"TODO: add any other imports you need\")\n", 283 | "\n", 284 | "\n", 285 | "def evaluate(x, y, expr, x_value, y_value):\n", 286 | " \"\"\"\n", 287 | " x: A theano variable\n", 288 | " y: A theano variable\n", 289 | " expr: A theano expression involving x and y\n", 290 | " x_value: A numpy value\n", 291 | " y_value: A numpy value\n", 292 | "\n", 293 | " Returns the value of expr when x_value is substituted for x\n", 294 | " and y_value is substituted for y\n", 295 | " \"\"\"\n", 296 | "\n", 297 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 298 | "\n", 299 | "\n", 300 | "# The following code use your code and test it.\n", 301 | "x = T.iscalar()\n", 302 | "y = T.iscalar()\n", 303 | "z = x + y\n", 304 | "assert evaluate(x, y, z, 1, 2) == 3\n", 305 | "print \"SUCCESS!\"" 306 | ], 307 | "language": "python", 308 | "metadata": {}, 309 | "outputs": [] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "collapsed": false, 314 | "input": [ 315 | "%load 11_function_soln.py" 316 | ], 317 | "language": "python", 318 | "metadata": {}, 319 | "outputs": [] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "#### Exercice 2.2\n", 326 | "\n", 327 | "This exercice makes you use shared variables. You must create some and update them by swapping 2 shared variables value." 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "collapsed": false, 333 | "input": [ 334 | "import numpy as np\n", 335 | "raise NotImplementedError(\"TODO: add any other imports you need\")\n", 336 | "\n", 337 | "\n", 338 | "def make_shared(shape):\n", 339 | " \"\"\"\n", 340 | " Returns a theano shared variable containing a tensor of the specified\n", 341 | " shape.\n", 342 | " You can use any value you want.\n", 343 | " \"\"\"\n", 344 | " raise NotImplementedError(\"TODO: implement the function\")\n", 345 | "\n", 346 | "\n", 347 | "def exchange_shared(a, b):\n", 348 | " \"\"\"\n", 349 | " a: a theano shared variable\n", 350 | " b: a theano shared variable\n", 351 | " Uses get_value and set_value to swap the values stored in a and b\n", 352 | " \"\"\"\n", 353 | " raise NotImplementedError(\"TODO: implement the function\")\n", 354 | "\n", 355 | "\n", 356 | "def make_exchange_func(a, b):\n", 357 | " \"\"\"\n", 358 | " a: a theano shared variable\n", 359 | " b: a theano shared variable\n", 360 | " Returns f\n", 361 | " where f is a theano function, that, when called, swaps the\n", 362 | " values in a and b\n", 363 | " f should not return anything\n", 364 | " \"\"\"\n", 365 | " raise NotImplementedError(\"TODO: implement the function\")\n", 366 | "\n", 367 | "\n", 368 | "# The following code use your code and test it.\n", 369 | "a = make_shared((5, 4, 3))\n", 370 | "assert a.get_value().shape == (5, 4, 3)\n", 371 | "b = make_shared((5, 4, 3))\n", 372 | "assert a.get_value().shape == (5, 4, 3)\n", 373 | "a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))\n", 374 | "b.set_value(np.ones((5, 4, 3), dtype=b.dtype))\n", 375 | "exchange_shared(a, b)\n", 376 | "assert np.all(a.get_value() == 1.)\n", 377 | "assert np.all(b.get_value() == 0.)\n", 378 | "f = make_exchange_func(a, b)\n", 379 | "rval = f()\n", 380 | "assert isinstance(rval, list)\n", 381 | "assert len(rval) == 0\n", 382 | "assert np.all(a.get_value() == 0.)\n", 383 | "assert np.all(b.get_value() == 1.)\n", 384 | "\n", 385 | "print \"SUCCESS!\"" 386 | ], 387 | "language": "python", 388 | "metadata": {}, 389 | "outputs": [] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "collapsed": false, 394 | "input": [ 395 | "%load 12_shared_soln.py" 396 | ], 397 | "language": "python", 398 | "metadata": {}, 399 | "outputs": [] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "#### Exercice 2.3\n", 406 | "\n", 407 | "Something weird happens when you run this code, find the problem. Explain what is happening.\n", 408 | "\n", 409 | "Hint: some compilation modes make the problem more obvious than others." 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "collapsed": false, 415 | "input": [ 416 | "import numpy as np\n", 417 | "from theano import function\n", 418 | "from theano import tensor as T\n", 419 | "x = T.vector()\n", 420 | "y = T.vector()\n", 421 | "z = T.zeros_like(y)\n", 422 | "a = x + z\n", 423 | "f = function([x, y], a)\n", 424 | "output = f(np.zeros((1,), dtype=x.dtype), np.zeros((2,), dtype=y.dtype))" 425 | ], 426 | "language": "python", 427 | "metadata": {}, 428 | "outputs": [] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "collapsed": false, 433 | "input": [ 434 | "%load 13_bug_soln.py" 435 | ], 436 | "language": "python", 437 | "metadata": {}, 438 | "outputs": [] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "## 3. Modifying Graphs\n", 445 | "\n", 446 | "#### Exercice 3.1\n", 447 | "\n", 448 | "This exercice makes you use the Theano symbolic grad." 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "collapsed": false, 454 | "input": [ 455 | "from theano import tensor as T\n", 456 | "\n", 457 | "\n", 458 | "def grad_sum(x, y, z):\n", 459 | " \"\"\"\n", 460 | " x: A theano variable\n", 461 | " y: A theano variable\n", 462 | " z: A theano expression involving x and y\n", 463 | "\n", 464 | " Returns dz / dx + dz / dy\n", 465 | " \"\"\"\n", 466 | " raise NotImplementedError(\"TODO: implement this function.\")\n", 467 | "\n", 468 | "\n", 469 | "# The following code use your code and test it.\n", 470 | "x = T.scalar()\n", 471 | "y = T.scalar()\n", 472 | "z = x + y\n", 473 | "s = grad_sum(x, y, z)\n", 474 | "assert s.eval({x: 0, y: 0}) == 2\n", 475 | "print \"SUCCESS!\"" 476 | ], 477 | "language": "python", 478 | "metadata": {}, 479 | "outputs": [] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "collapsed": false, 484 | "input": [ 485 | "%load 21_grad_soln.py" 486 | ], 487 | "language": "python", 488 | "metadata": {}, 489 | "outputs": [] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "#### Exercice 3.2\n", 496 | "\n", 497 | "This exercice is here to show you how to navigate in a Theano graph. You will need to find the inputs used to produce\n", 498 | "some computation." 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "collapsed": false, 504 | "input": [ 505 | "import numpy as np\n", 506 | "from theano import tensor as T\n", 507 | "raise NotImplementedError(\"Add any imports you need.\")\n", 508 | "\n", 509 | "\n", 510 | "def arg_to_softmax(prob):\n", 511 | " \"\"\"\n", 512 | " Oh no! Someone has passed you the probability output,\n", 513 | " \"prob\", of a softmax function, and you want the unnormalized\n", 514 | " log probability--the argument to the softmax.\n", 515 | "\n", 516 | " Verify that prob really is the output of a softmax. Raise a\n", 517 | " TypeError if it is not.\n", 518 | "\n", 519 | " If it is, return the argument to the softmax.\n", 520 | " \"\"\"\n", 521 | "\n", 522 | " raise NotImplementedError(\"Implement this function.\")\n", 523 | "\n", 524 | "\n", 525 | "x = np.ones((5, 4))\n", 526 | "try:\n", 527 | " arg_to_softmax(x)\n", 528 | " raise Exception(\"You should have raised an error.\")\n", 529 | "except TypeError:\n", 530 | " pass\n", 531 | "\n", 532 | "x = T.matrix()\n", 533 | "try:\n", 534 | " arg_to_softmax(x)\n", 535 | " raise Exception(\"You should have raised an error.\")\n", 536 | "except TypeError:\n", 537 | " pass\n", 538 | "\n", 539 | "y = T.nnet.sigmoid(x)\n", 540 | "try:\n", 541 | " arg_to_softmax(y)\n", 542 | " raise Exception(\"You should have raised an error.\")\n", 543 | "except TypeError:\n", 544 | " pass\n", 545 | "\n", 546 | "y = T.nnet.softmax(x)\n", 547 | "rval = arg_to_softmax(y)\n", 548 | "assert rval is x\n", 549 | "\n", 550 | "print \"SUCCESS!\"" 551 | ], 552 | "language": "python", 553 | "metadata": {}, 554 | "outputs": [] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "collapsed": false, 559 | "input": [ 560 | "%load 22_traverse_soln.py" 561 | ], 562 | "language": "python", 563 | "metadata": {}, 564 | "outputs": [] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "## 4. Debugging\n", 571 | "\n", 572 | "#### Exercice 4.1\n", 573 | "\n", 574 | "The code in the next cell has a bug. Run the cell to see it.\n", 575 | "\n", 576 | "Use Theano flags or extra parameters to function() to find the cause.\n", 577 | "\n", 578 | "Don't try to find the bug by inspection of prints, the point of the exercice is to get you to work with the theano debugging tools that will be required for more complex code.\n", 579 | "\n", 580 | "To modify the environement for a cell use the `%env` magic command like this:\n", 581 | "\n", 582 | " %env THEANO_FLAGS=floatX=float32\n", 583 | "\n", 584 | "You will have to restart the ipython kernel from the Kernel menu above to get the enviroment changes to work." 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "collapsed": false, 590 | "input": [ 591 | "import numpy as np\n", 592 | "from theano import function\n", 593 | "from theano import tensor as T\n", 594 | "a = T.vector()\n", 595 | "b = T.log(a)\n", 596 | "c = T.nnet.sigmoid(b)\n", 597 | "d = T.sqrt(c)\n", 598 | "e = T.concatenate((d, c), axis=0)\n", 599 | "f = b * c * d\n", 600 | "g = e + f\n", 601 | "h = g / c\n", 602 | "fn = function([a], h)\n", 603 | "fn(np.ones((3,)).astype(a.dtype))" 604 | ], 605 | "language": "python", 606 | "metadata": {}, 607 | "outputs": [] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "collapsed": false, 612 | "input": [ 613 | "%load 31_debug_soln.py" 614 | ], 615 | "language": "python", 616 | "metadata": {}, 617 | "outputs": [] 618 | } 619 | ], 620 | "metadata": {} 621 | } 622 | ] 623 | } -------------------------------------------------------------------------------- /opt.py: -------------------------------------------------------------------------------- 1 | from scalmulop import ScalMulV1 2 | from doubleop import DoubleOp 3 | from doublecop import DoubleCOp 4 | from doublec import DoubleC 5 | from doublecgpu import DoubleCGpu 6 | 7 | from theano.gof import local_optimizer 8 | from theano.tensor.opt import register_specialize 9 | from theano.gpuarray.opt import (register_opt, op_lifter, 10 | register_opt2) 11 | 12 | 13 | @register_specialize 14 | @local_optimizer([ScalMulV1]) 15 | def local_scalmul_double(node): 16 | if not (isinstance(node.op, ScalMulV1) and 17 | node.op.scal == 2): 18 | return False 19 | 20 | return [DoubleOp()(node.inputs[0])] 21 | 22 | 23 | @register_opt('fast_compile') 24 | @op_lifter([DoubleOp, DoubleC, DoubleCOp]) 25 | @register_opt2([DoubleOp, DoubleC, DoubleCOp], 26 | 'fast_compile') 27 | def local_scalmul_double_gpu(op, context_name, inputs, 28 | outputs): 29 | return DoubleCGpu 30 | -------------------------------------------------------------------------------- /params.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | 3 | class MyOp(Op): 4 | params_type = # a params type here 5 | 6 | def __init__(self, ...): 7 | # Get some params 8 | 9 | # signature change 10 | def perform(self, node, inputs, out_storage, params): 11 | # do something 12 | 13 | def get_params(self, node): 14 | # Return a params object 15 | -------------------------------------------------------------------------------- /presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abergeron/ccw_tutorial_theano/f92aa8edbb567c9ac09149a382858f841a4a7749/presentation.pdf -------------------------------------------------------------------------------- /presentation.tex: -------------------------------------------------------------------------------- 1 | \documentclass[utf8x,hyperref={pdfpagelabels=false}]{beamer} 2 | 3 | \usepackage[utf8x]{inputenc} 4 | \usepackage[OT1]{fontenc} 5 | \usepackage{graphicx} 6 | \usepackage{amsmath} 7 | \usepackage{listings} 8 | \usepackage{hyperref} 9 | \usepackage{xcolor} 10 | \usepackage{tikz} 11 | \usetikzlibrary{shapes.arrows} 12 | %\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}} 13 | 14 | 15 | \usetheme{Malmoe} % Now it's a beamer presentation with the lisa theme! 16 | \usecolortheme{beaver} 17 | \setbeamertemplate{footline}[page number] 18 | \setbeamertemplate{navigation symbols}{} 19 | 20 | \lstloadlanguages{Python} 21 | 22 | \definecolor{darkgreen}{RGB}{0,93,21} 23 | \definecolor{greenblue}{RGB}{40,110,126} 24 | \definecolor{lightgray}{RGB}{246,246,246} 25 | \definecolor{bordergray}{RGB}{193,193,193} 26 | \definecolor{lightblue}{RGB}{0,114,168} 27 | \definecolor{methblue}{RGB}{0,31,108} 28 | 29 | \newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}} 30 | 31 | \mode 32 | 33 | \title{Introduction to Theano} 34 | \author{% 35 | \footnotesize 36 | Arnaud Bergeron \newline 37 | (slides adapted by Frédéric Bastien from slides by Ian G.) \newline 38 | (further adapted by Arnaud Bergeron) 39 | } 40 | \date{February 26, 2015} 41 | 42 | \lstdefinestyle{theano}{ 43 | language=Python, 44 | basicstyle=\fontfamily{pcr}\selectfont\footnotesize, 45 | keywordstyle=\color{darkgreen}\bfseries, 46 | commentstyle=\color{greenblue}\itshape, 47 | %commentstyle=\color{blue}\itshape, 48 | stringstyle=\color{violet}, 49 | showstringspaces=false, 50 | tabsize=4, 51 | backgroundcolor=\color{lightgray}, 52 | frame=single, 53 | emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}}, 54 | emph={[3]self},emphstyle={[3]\color{darkgreen}}, 55 | moredelim=**[is][{\color{red}}]{`}{`} 56 | } 57 | 58 | % We don't have code till the end of the file. 59 | \lstdefinestyle{output}{ 60 | language={}, 61 | basicstyle=\ttfamily\footnotesize, 62 | backgroundcolor=\color{white}, 63 | frame={}, 64 | breaklines=true, 65 | emph={[2]}, 66 | emph={[3]}, 67 | } 68 | 69 | \lstset{style=theano} 70 | 71 | \newcommand{\code}[1]{\lstinline[emph={[2]}]|#1|} 72 | 73 | \begin{document} 74 | 75 | \begin{frame}[plain] 76 | \titlepage 77 | % \vspace{-5em} 78 | % \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png} 79 | % \hfill 80 | % \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop} 81 | \end{frame} 82 | 83 | \section{Outline} 84 | \begin{frame}{High level}\setcounter{page}{1} 85 | \begin{itemize} 86 | \item Overview of library (3 min) 87 | \item Building expressions (30 min) 88 | \item Compiling and running expressions (30 min) 89 | \item Modifying expressions (25 min) 90 | \item Debugging (30 min) 91 | \item Citing Theano (2 min) 92 | \end{itemize} 93 | \end{frame} 94 | 95 | 96 | \begin{frame}{Overview of Library} 97 | Theano is many things 98 | \begin{itemize} 99 | \item Language 100 | \item Compiler 101 | \item Python library 102 | \end{itemize} 103 | \end{frame} 104 | 105 | \begin{frame}{Overview} 106 | Theano language: 107 | \begin{itemize} 108 | \item Operations on scalar, vector, matrix, tensor, and sparse variables 109 | \item Linear algebra 110 | \item Element-wise nonlinearities 111 | \item Convolution 112 | \item Extensible 113 | \end{itemize} 114 | \end{frame} 115 | 116 | \begin{frame}[fragile]{Overview} 117 | Using Theano: 118 | \begin{itemize} 119 | \item define expression $f(x,y) = x + y$ 120 | \begin{lstlisting} 121 | >>> z = x + y 122 | \end{lstlisting} 123 | \item compile expression 124 | \begin{lstlisting} 125 | >>> f = theano.function([x, y], z) 126 | \end{lstlisting} 127 | \item execute expression 128 | \begin{lstlisting} 129 | >>> f(1, 2) 130 | 3 131 | \end{lstlisting} 132 | \end{itemize} 133 | \end{frame} 134 | 135 | \section{Building} 136 | 137 | \begin{frame}{Building expressions} 138 | \begin{itemize} 139 | \item Scalars 140 | \item Vectors 141 | \item Matrices 142 | \item Tensors 143 | \item Broadcasting 144 | \item Reduction 145 | \item Dimshuffle 146 | \end{itemize} 147 | \end{frame} 148 | 149 | \begin{frame}[fragile]{Scalar math} 150 | \begin{lstlisting} 151 | from theano import tensor as T 152 | x = T.scalar() 153 | y = T.scalar() 154 | z = x+y 155 | w = z*x 156 | a = T.sqrt(w) 157 | b = T.exp(a) 158 | c = a ** b 159 | d = T.log(c) 160 | \end{lstlisting} 161 | \end{frame} 162 | 163 | \begin{frame}[fragile]{Vector math} 164 | \begin{lstlisting} 165 | from theano import tensor as T 166 | x = T.vector() 167 | y = T.vector() 168 | # Scalar math applied elementwise 169 | a = x * y 170 | # Vector dot product 171 | b = T.dot(x, y) 172 | # Broadcasting 173 | c = a + b 174 | \end{lstlisting} 175 | \end{frame} 176 | 177 | \begin{frame}[fragile]{Matrix math} 178 | \begin{lstlisting} 179 | from theano import tensor as T 180 | x = T.matrix() 181 | y = T.matrix() 182 | a = T.vector() 183 | # Matrix-matrix product 184 | b = T.dot(x, y) 185 | # Matrix-vector product 186 | c = T.dot(x, a) 187 | \end{lstlisting} 188 | \end{frame} 189 | 190 | \begin{frame}[fragile]{Tensors} 191 | \begin{itemize} 192 | \item Dimensionality defined by length of ``broadcastable'' argument 193 | \item Can add (or do other elemwise op) on two 194 | tensors with same dimensionality 195 | \item Duplicate tensors along broadcastable axes to 196 | make size match 197 | \end{itemize} 198 | \begin{lstlisting} 199 | from theano import tensor as T 200 | tensor3 = T.TensorType( 201 | broadcastable=(False, False, False), 202 | dtype='float32') 203 | x = tensor3() 204 | \end{lstlisting} 205 | \end{frame} 206 | 207 | \begin{frame}{Broadcasting} 208 | \begin{tabular}{lcccccccl} 209 | & 210 | \begin{tabular}{cc} 211 | 1 & 2 \\ 212 | 3 & 4 \\ 213 | 5 & 6 \\ 214 | \end{tabular} & 215 | + & 216 | \begin{tabular}{cc} 217 | 1 & 2 \\ 218 | \end{tabular} & 219 | = & 220 | \begin{tabular}{cc} 221 | 1 & 2 \\ 222 | 3 & 4 \\ 223 | 5 & 6 \\ 224 | \end{tabular} & 225 | + & 226 | \begin{tabular}{cc} 227 | 1 & 2 \\ 228 | \color{blue} 1 & \color{blue} 2 \\ 229 | \color{blue} 1 & \color{blue} 2 \\ 230 | \end{tabular} & 231 | \hspace{-1.3em} 232 | \tikz[baseline={([yshift=-.5ex]current bounding box.center)}]{ 233 | \draw [->, very thick] (0,0) -- (0,-1.2); 234 | } \\[1.5em] 235 | shape: & (3, 2) & & (2,) & & (3, 2) & & ({\color{blue}3}, 2) & 236 | \end{tabular} 237 | \vfill 238 | \begin{itemize} 239 | \item Pad shape with 1s on the left : $(2,) \equiv (1,2)$ 240 | \item Two dimensions are compatible when they have the same length or one of them is broadcastable 241 | \item broadcastable dimensions must have a length of 1 242 | \item Adding tensors of shape (8, 1, 6, 1) and (7, 1, 5) gives a tensor of shape (8, 7, 6, 5) 243 | \end{itemize} 244 | \end{frame} 245 | 246 | \begin{frame}[fragile]{Reductions} 247 | \begin{lstlisting} 248 | from theano import tensor as T 249 | tensor3 = T.TensorType( 250 | broadcastable=(False, False, False), 251 | dtype='float32') 252 | x = tensor3() 253 | total = x.sum() 254 | marginals = x.sum(axis=(0, 2)) 255 | mx = x.max(axis=1) 256 | \end{lstlisting} 257 | \end{frame} 258 | 259 | \begin{frame}[fragile]{Dimshuffle} 260 | \begin{lstlisting} 261 | from theano import tensor as T 262 | tensor3 = T.TensorType( 263 | broadcastable=(False, False, False), 264 | dtype='float32') 265 | x = tensor3() 266 | y = x.dimshuffle((2, 1, 0)) 267 | a = T.matrix() 268 | b = a.T 269 | # Same as b 270 | c = a.dimshuffle((0, 1)) 271 | # Adding to larger tensor 272 | d = a.dimshuffle((0, 1, 'x')) 273 | e = a + d 274 | \end{lstlisting} 275 | \end{frame} 276 | 277 | \begin{frame}{Exercices} 278 | Work through the "Building Expressions" section of the ipython notebook. 279 | \end{frame} 280 | 281 | \section{Compiling/Running} 282 | \begin{frame}{Compiling and running expression} 283 | \begin{itemize} 284 | \item \code{theano.function} 285 | \item shared variables and updates 286 | \item compilation modes 287 | \item compilation for GPU 288 | \item optimizations 289 | \end{itemize} 290 | \end{frame} 291 | 292 | \begin{frame}[fragile]{\code{theano.function}} 293 | 294 | \begin{lstlisting} 295 | >>> from theano import tensor as T 296 | >>> x = T.scalar() 297 | >>> y = T.scalar() 298 | >>> from theano import function 299 | >>> # first arg is list of symbolic inputs 300 | >>> # second arg is symbolic output 301 | >>> f = function([x, y], x + y) 302 | >>> # Call it with numerical values 303 | >>> # Get a numerical output 304 | >>> f(1., 2.) 305 | array(3.0) 306 | \end{lstlisting} 307 | \end{frame} 308 | 309 | \begin{frame}{Shared variables} 310 | \begin{itemize} 311 | \item It’s hard to do much with purely functional programming 312 | \item \emph{shared variables} add just a little bit of imperative programming 313 | \item A \emph{shared variable} is a buffer that stores a numerical value for a Theano variable 314 | \item Can write to as many shared variables as you want, once each, at the end of the function 315 | \item Modify outside Theano function with \code{get_value()} and \code{set_value()} methods. 316 | \end{itemize} 317 | \end{frame} 318 | 319 | \begin{frame}[fragile]{Shared variable example} 320 | \begin{lstlisting} 321 | >>> from theano import shared 322 | >>> x = shared(0.) 323 | # Can also use a dict for more complex code 324 | >>> updates = [(x, x + 1)] 325 | >>> f = function([], updates=updates) 326 | >>> f() 327 | >>> x.get_value() 328 | 1.0 329 | >>> x.set_value(100.) 330 | >>> f() 331 | >>> x.get_value() 332 | 101.0 333 | \end{lstlisting} 334 | \end{frame} 335 | 336 | \begin{frame}{Which dict?} 337 | \begin{itemize} 338 | \item Use theano.compat.python2x.OrderedDict 339 | \item Not collections.OrderedDict 340 | \begin{itemize} 341 | \item This isn’t available in older versions of python, and will limit the portability of your code. 342 | \end{itemize} 343 | \item Not \code{\{\}} aka dict 344 | \begin{itemize} 345 | \item The iteration order of this built-in class is not deterministic so if Theano accepted this, the same script could compile different C programs each time you run it. 346 | \end{itemize} 347 | \end{itemize} 348 | \end{frame} 349 | 350 | \begin{frame}{Compilation modes} 351 | \begin{itemize} 352 | \item Can compile in different modes to get different kinds of programs 353 | \item Can specify these modes very precisely with arguments to \code{theano.function()} 354 | \item Can use a few quick presets with environment variable flags 355 | \end{itemize} 356 | \end{frame} 357 | 358 | \begin{frame}{Example preset compilation modes} 359 | \begin{description}[FAST\_RUN] 360 | \item[FAST\_RUN] Default. Spends a lot of time on 361 | compilation to get an executable that runs 362 | fast. 363 | \item[FAST\_COMPILE] Doesn’t spend much time compiling. 364 | Executable usually uses python 365 | instead of compiled C code. Runs slow. 366 | \item[DEBUG\_MODE] Adds lots of checks. 367 | Raises error messages in situations other modes don't check for. 368 | \end{description} 369 | \end{frame} 370 | 371 | \begin{frame}{Compilation for GPU} 372 | \begin{itemize} 373 | \item Theano's current back-end only supports 32 bit on GPU 374 | \item CUDA supports 64 bit, but is slow in gamer card 375 | \item \code{T.fscalar}, \code{T.fvector}, \code{T.fmatrix} are all 32 bit 376 | \item \code{T.scalar}, \code{T.vector}, \code{T.matrix} resolve to 32 or 64 bit depending on theano’s floatX flag 377 | \item floatX is float64 by default, set it to float32 378 | \item Set the device flag to gpu (or a specific gpu, like gpu0) 379 | \item Optional: warn\_float64=\{'ignore', 'warn', 'raise', 'pdb'\} 380 | \end{itemize} 381 | \end{frame} 382 | 383 | \begin{frame}{Optimizations} 384 | \begin{itemize} 385 | \item Theano changes the symbolic expressions 386 | you write before converting them to C code 387 | \item It makes them faster 388 | \begin{itemize} 389 | \item $(x+y)+(x+y) \to 2\times(x + y)$ 390 | \end{itemize} 391 | \item It makes them more stable 392 | \begin{itemize} 393 | \item $\exp(a)/\sum{\exp(a)} \to \operatorname{softmax}(a)$ 394 | \end{itemize} 395 | \end{itemize} 396 | \end{frame} 397 | 398 | \begin{frame}[fragile]{Optimizations (2)} 399 | Sometimes optimizations discard error checking and produce incorrect output rather than an exception. 400 | \begin{lstlisting} 401 | >>> x = T.scalar() 402 | >>> f = function([x], x/x) 403 | >>> f(0.) 404 | array(1.0) 405 | \end{lstlisting} 406 | \end{frame} 407 | 408 | \begin{frame}{Exercises} 409 | Work through the "Compiling and Running" section of the ipython notebook. 410 | \end{frame} 411 | 412 | \section{Modifying expressions} 413 | \begin{frame}{Modifying expressions} 414 | \begin{itemize} 415 | \item The \code{grad()} method 416 | \item Variable nodes 417 | \item Types 418 | \item Ops 419 | \item Apply nodes 420 | \end{itemize} 421 | \end{frame} 422 | 423 | \begin{frame}[fragile]{The \code{grad()} method} 424 | \begin{lstlisting} 425 | >>> x = T.scalar('x') 426 | >>> y = 2. * x 427 | >>> g = T.grad(y, x) 428 | >>> from theano.printing import min_informative_str 429 | # Print the unoptimized graph 430 | >>> print min_informative_str(g) 431 | A. Elemwise{mul} 432 | B. Elemwise{second,no_inplace} 433 | C. Elemwise{mul,no_inplace} 434 | D. TensorConstant{2.0} 435 | E. x 436 | F. TensorConstant{1.0} 437 | 438 | \end{lstlisting} 439 | \end{frame} 440 | 441 | \begin{frame}[fragile]{The \code{grad()} method} 442 | \begin{lstlisting} 443 | >>> x = T.scalar('x') 444 | >>> y = 2. * x 445 | >>> g = T.grad(y, x) 446 | >>> from theano.printing import min_informative_str 447 | # Print the optimized graph 448 | >>> f = theano.function([x], g) 449 | >>> theano.printing.debugprint(f) 450 | DeepCopyOp [@A] '' 0 451 | |TensorConstant{2.0} [@B] 452 | \end{lstlisting} 453 | \end{frame} 454 | 455 | \begin{frame}{Theano variables} 456 | \begin{itemize} 457 | \item A \emph{variable} is a theano expression. 458 | \item Can come from \code{T.scalar()}, \code{T.matrix()}, etc. 459 | \item Can come from doing operations on other variables. 460 | \item Every variable has a type field, identifying its \emph{type}, such as \code{TensorType((True, False), 'float32')} 461 | \item Variables can be thought of as nodes in a graph 462 | \end{itemize} 463 | \end{frame} 464 | 465 | \begin{frame}{Ops} 466 | \begin{itemize} 467 | \item An Op is any class that describes a function operating on some variables 468 | \item Can call the op on some variables to get a 469 | new variable or variables 470 | \item An Op class can supply other forms of 471 | information about the function, such as its 472 | derivative 473 | \end{itemize} 474 | \end{frame} 475 | 476 | \begin{frame}{Apply nodes} 477 | \begin{itemize} 478 | \item The Apply class is a specific instance of an application of an Op. 479 | \item Notable fields: 480 | \begin{description}[\texttt{outputs}] 481 | \item[\texttt{op}] The Op to be applied 482 | \item[\texttt{inputs}] The Variables to be used as input 483 | \item[\texttt{outputs}] The Variables produced 484 | \end{description} 485 | \item The \code{owner} field on variables identifies the Apply that created it. 486 | \item Variable and Apply instances are nodes and owner/ 487 | inputs/outputs identify edges in a Theano graph. 488 | \end{itemize} 489 | \end{frame} 490 | 491 | \begin{frame}{Exercises} 492 | Work through the "Modifying" section in the ipython notebook. 493 | \end{frame} 494 | 495 | \section{Debugging} 496 | \begin{frame}{Debugging} 497 | \begin{itemize} 498 | \item DEBUG\_MODE 499 | \item Error message 500 | \item \code{theano.printing.debugprint()} 501 | \item \code{min_informative_str()} 502 | \item compute\_test\_value 503 | \item Accessing the FunctionGraph 504 | \end{itemize} 505 | \end{frame} 506 | 507 | \begin{frame}[fragile]{Error message: code} 508 | \begin{lstlisting} 509 | import numpy as np 510 | import theano 511 | import theano.tensor as T 512 | x = T.vector() 513 | y = T.vector() 514 | z = x + x 515 | z = z + y 516 | f = theano.function([x, y], z) 517 | f(np.ones((2,)), np.ones((3,))) 518 | \end{lstlisting} 519 | \end{frame} 520 | 521 | \begin{frame}[fragile,allowframebreaks]{Error message} 522 | \vspace{1em} 523 | \begin{lstlisting}[style=output] 524 | Traceback (most recent call last): 525 | File "test.py", line 9, in 526 | f(np.ones((2,)), np.ones((3,))) 527 | File "/Users/anakha/Library/Python/2.7/site-packages/theano/compile/function_module.py", line 606, in __call__ 528 | storage_map=self.fn.storage_map) 529 | File "/Users/anakha/Library/Python/2.7/site-packages/theano/compile/function_module.py", line 595, in __call__ 530 | outputs = self.fn() 531 | ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 2) 532 | Apply node that caused the error: Elemwise{add,no_inplace}(, , ) 533 | Inputs types: [TensorType(float64, vector), TensorType(float64, vector), TensorType(float64, vector)] 534 | Inputs shapes: [(3,), (2,), (2,)] 535 | Inputs strides: [(8,), (8,), (8,)] 536 | Inputs values: [array([ 1., 1., 1.]), array([ 1., 1.]), array([ 1., 1.])] 537 | 538 | HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'. 539 | HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node. 540 | \end{lstlisting} 541 | \end{frame} 542 | 543 | \begin{frame}[fragile]{Error message: exception\_verbosity=high} 544 | \begin{lstlisting}[style=output] 545 | Debugprint of the apply node: 546 | Elemwise{add,no_inplace} [@A] '' 547 | | [@B] 548 | | [@C] 549 | | [@C] 550 | 551 | Storage map footprint: 552 | - , Shape: (3,), ElemSize: 8 Byte(s), TotalSize: 24 Byte(s) 553 | - , Shape: (2,), ElemSize: 8 Byte(s), TotalSize: 16 Byte(s) 554 | \end{lstlisting} 555 | \end{frame} 556 | 557 | \begin{frame}[fragile]{Error message: optimizer=fast\_compile} 558 | \begin{lstlisting}[style=output] 559 | Backtrace when the node is created: 560 | File "test.py", line 7, in 561 | z = z + y 562 | \end{lstlisting} 563 | \end{frame} 564 | 565 | \begin{frame}[fragile]{debugprint} 566 | \begin{lstlisting} 567 | >>> from theano.printing import debugprint 568 | >>> debugprint(a) 569 | Elemwise{mul,no_inplace} [@A] '' 570 | |TensorConstant{2.0} [@B] 571 | |Elemwise{add,no_inplace} [@C] 'z' 572 | | [@D] 573 | | [@E] 574 | \end{lstlisting} 575 | \end{frame} 576 | 577 | \begin{frame}[fragile]{min\_informative\_str} 578 | \begin{lstlisting} 579 | >>> x = T.scalar() 580 | >>> y = T.scalar() 581 | >>> z = x + y 582 | >>> z.name = 'z' 583 | >>> a = 2. * z 584 | >>> from theano.printing import min_informative_str 585 | >>> print min_informative_str(a) 586 | A. Elemwise{mul,no_inplace} 587 | B. TensorConstant{2.0} 588 | C. z 589 | \end{lstlisting} 590 | \end{frame} 591 | 592 | \begin{frame}[fragile]{compute\_test\_value} 593 | \begin{lstlisting} 594 | >>> from theano import config 595 | >>> config.compute_test_value = 'raise' 596 | >>> x = T.vector() 597 | >>> import numpy as np 598 | >>> x.tag.test_value = np.ones((2,)) 599 | >>> y = T.vector() 600 | >>> y.tag.test_value = np.ones((3,)) 601 | >>> x + y 602 | ... 603 | ValueError: Input dimension mis-match. 604 | (input[0].shape[0] = 2, input[1].shape[0] = 3) 605 | \end{lstlisting} 606 | \end{frame} 607 | 608 | \begin{frame}[fragile]{Accessing a function’s fgraph} 609 | \begin{lstlisting} 610 | >>> x = T.scalar() 611 | >>> y = x / x 612 | >>> f = function([x], y) 613 | >>> debugprint(f.maker.fgraph.outputs[0]) 614 | DeepCopyOp [@A] '' 615 | |TensorConstant{1.0} [@B] 616 | \end{lstlisting} 617 | \end{frame} 618 | 619 | \begin{frame}{Exercises} 620 | Work through the "Debugging" section of the ipython notebook. 621 | \end{frame} 622 | 623 | \section*{} 624 | \begin{frame}{Citing Theano} 625 | Please cite both of the following papers in all work that uses Theano: 626 | \begin{itemize} 627 | \item Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Bergstra, James, Goodfellow, Ian, Bergeron, Arnaud, Bouchard, Nicolas, and 628 | Bengio,Yoshua. Theano: new features and speed improvements. Deep Learning and Unsupervised Feature Learning NIPS 2012 629 | Workshop, 2012. 630 | \item Bergstra, James, Breuleux, Olivier, Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Desjardins, Guillaume, Turian, Joseph, Warde- 631 | Farley, David, and Bengio,Yoshua. Theano: a CPU and GPU math expression compiler. In Proceedings of the Python for Scientific 632 | Computing Conference (SciPy), June 2010. Oral Presentation. 633 | \end{itemize} 634 | \end{frame} 635 | 636 | \begin{frame}{Example acknowledgments} 637 | We would like to thank the developers of Theano \textbackslash citep\{bergstra+al:2010-scipy,Bastien-Theano-2012\}. 638 | We would also like to thank NSERC, Compute Canada, and Calcul Québec for providing computational resources. 639 | \end{frame} 640 | 641 | 642 | \begin{frame} 643 | \begin{center} 644 | \bibliography{strings,strings-short,ml,aigaion-shorter} 645 | \Huge 646 | Questions? 647 | \end{center} 648 | \end{frame} 649 | 650 | 651 | \end{document} 652 | -------------------------------------------------------------------------------- /python.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | 3 | class MyOp(Op): 4 | __props__ = () 5 | 6 | def __init__(self, ...): 7 | # set up parameters 8 | 9 | def make_node(self, ...): 10 | # create apply node 11 | 12 | def perform(self, node, inputs, outputs_storage): 13 | # do the computation 14 | 15 | def infer_shape(self, node, input_shapes): 16 | # return output shapes 17 | 18 | def L_op(self, inputs, outputs, output_grads): 19 | # return gradient graph for each input 20 | 21 | def R_op(self, inputs, eval_points): 22 | # return R_op graph for each input 23 | -------------------------------------------------------------------------------- /scalmulop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | from theano.scalar import as_scalar 4 | 5 | class ScalMulV1(Op): 6 | __props__ = ('scal',) 7 | 8 | def __init__(self, scal): 9 | if not isinstance(scal, int): 10 | raise TypeError('expected an int') 11 | self.scal = scal 12 | 13 | def make_node(self, x): 14 | x = as_tensor_variable(x) 15 | return Apply(self, [x], [x.type()]) 16 | 17 | def perform(self, node, inputs, output_storage): 18 | x = inputs[0] 19 | z = output_storage[0] 20 | z[0] = x * self.scal 21 | 22 | def infer_shape(self, node, input_shapes): 23 | return input_shapes 24 | 25 | def grad(self, inputs, output_grads): 26 | return [output_grads[0] * self.scal] 27 | 28 | def R_op(self, inputs, eval_points): 29 | if eval_points[0] is None: 30 | return eval_points 31 | return self.grad(inputs, eval_points) 32 | 33 | 34 | class ScalMulV2(Op): 35 | __props__ = () 36 | 37 | def make_node(self, x, scal): 38 | x = as_tensor_variable(x) 39 | scal = as_scalar(scal) 40 | return Apply(self, [x, scal], [x.type()]) 41 | 42 | def perform(self, node, inputs, output_storage): 43 | x = inputs[0] 44 | scal = inputs[1] 45 | z = output_storage[0] 46 | z[0] = x * scal 47 | 48 | def infer_shape(self, node, input_shapes): 49 | return [input_shapes[0]] 50 | 51 | def grad(self, inputs, output_grads): 52 | return [output_grads[0] * inputs[1], (inputs[0] * outputs_grads[1]).sum()] 53 | 54 | # def R_op(self, inputs, eval_points): 55 | # if eval_points[0] is None: 56 | # return eval_points 57 | # return self.grad(inputs, eval_points) 58 | -------------------------------------------------------------------------------- /test_doubleop.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from theano import function, config 4 | from theano.tensor import matrix 5 | from theano.tests import unittest_tools as utt 6 | from theano.tests.test_rop import RopLop_checker 7 | 8 | from doubleop import DoubleOp 9 | 10 | 11 | def test_doubleop(): 12 | utt.seed_rng() 13 | x = matrix() 14 | f = function([x], DoubleOp()(x)) 15 | inp = numpy.asarray(numpy.random.rand(5, 4), 16 | dtype=config.floatX) 17 | out = f(inp) 18 | utt.assert_allclose(inp * 2, out) 19 | 20 | 21 | class test_Double(utt.InferShapeTester): 22 | def test_infer_shape(self): 23 | utt.seed_rng() 24 | x = matrix() 25 | self._compile_and_check( 26 | # function inputs (symbolic) 27 | [x], 28 | # Op instance 29 | [DoubleOp()(x)], 30 | # numeric input 31 | [numpy.asarray(numpy.random.rand(5, 4), 32 | dtype=config.floatX)], 33 | # Op class that should disappear 34 | DoubleOp) 35 | 36 | 37 | def test_doubleop_grad(): 38 | utt.seed_rng() 39 | utt.verify_grad( 40 | # Op instance 41 | DoubleOp(), 42 | # Numeric inputs 43 | [numpy.random.rand(5, 7, 2)] 44 | ) 45 | -------------------------------------------------------------------------------- /test_opt.py: -------------------------------------------------------------------------------- 1 | import theano 2 | 3 | from scalmulop import ScalMulV1 4 | from doubleop import DoubleOp 5 | import opt 6 | 7 | def test_scalmul_double(): 8 | x = theano.tensor.matrix() 9 | y = ScalMulV1(2)(x) 10 | f = theano.function([x], y) 11 | 12 | assert not any(isinstance(n.op, ScalMulV1) 13 | for n in f.maker.fgraph.toposort()) 14 | assert any(isinstance(n.op, DoubleOp) 15 | for n in f.maker.fgraph.toposort()) 16 | 17 | -------------------------------------------------------------------------------- /thunk.py: -------------------------------------------------------------------------------- 1 | from theano import Op 2 | 3 | class MyOp(Op): 4 | __props__ = () 5 | 6 | def __init__(self, ...): 7 | # set up parameters 8 | 9 | def make_node(self, ...): 10 | # create apply node 11 | 12 | def make_thunk(self, node, storage_map, 13 | compute_map, no_recycling): 14 | # return a thunk 15 | 16 | def infer_shape(self, input_shapes): 17 | # return output shapes 18 | 19 | def grad(self, inputs, output_grads): 20 | # return gradient graph for each input 21 | -------------------------------------------------------------------------------- /tripleop.py: -------------------------------------------------------------------------------- 1 | from theano import Op, Apply 2 | from theano.tensor import as_tensor_variable 3 | 4 | class `TripleOp`(Op): 5 | __props__ = () 6 | 7 | def make_node(self, x): 8 | x = as_tensor_variable(x) 9 | return Apply(self, [x], [x.type()]) 10 | 11 | def perform(self, node, inputs, output_storage): 12 | x = inputs[0] 13 | z = output_storage[0] 14 | z[0] = x * `3` 15 | 16 | def infer_shape(self, node, i0_shapes): 17 | return i0_shapes 18 | 19 | def grad(self, inputs, output_grads): 20 | return [output_grads[0] * `3`] 21 | --------------------------------------------------------------------------------