├── .gitignore
├── 01_building_expressions
    ├── 01_scalar.py
    ├── 01_scalar_soln.py
    ├── 02_vector_mat.py
    ├── 02_vector_mat_soln.py
    ├── 03_tensor.py
    └── 03_tensor_soln.py
├── 02_compiling_and_running
    ├── 01_function.py
    ├── 01_function_soln.py
    ├── 02_shared.py
    ├── 02_shared_soln.py
    ├── 03_bug.py
    └── 03_bug_soln.txt
├── 03_modifying
    ├── 01_grad.py
    ├── 01_grad_soln.py
    ├── 02_traverse.py
    └── 02_traverse_soln.py
├── 04_debugging
    ├── 02_compute_test_value.py
    └── 02_compute_test_value_soln.py
├── 05_tripleop
    ├── 01_tripleop.py
    └── 01_tripleop_soln.py
├── 06_scalmulop
    ├── 01_scalmulop.py
    └── 01_scalmulop_soln.py
├── 07_scalmulgrad
    ├── 01_scalmulop.py
    └── 01_scalmulop_soln.py
├── 08_scalmulc
    ├── 01_scalmulc.py
    └── 01_scalmulc_soln.py
├── 09_opt
    ├── 01_opt.py
    └── 01_opt_soln.py
├── LICENSE
├── Makefile
├── README.md
├── advanced.pdf
├── advanced.tex
├── apply_node.png
├── c.py
├── cop.py
├── doublec.py
├── doublecgpu.c
├── doublecgpu.py
├── doublecop.c
├── doublecop.py
├── doublegpu.py
├── doubleop.py
├── gpu.py
├── ipnb
    ├── 01_scalar_soln.py
    ├── 02_vector_mat_soln.py
    ├── 03_tensor_soln.py
    ├── 11_function_soln.py
    ├── 12_shared_soln.py
    ├── 13_bug_soln.py
    ├── 21_grad_soln.py
    ├── 22_traverse_soln.py
    ├── 31_debug_soln.py
    └── Theano-basic.ipynb
├── opt.py
├── params.py
├── presentation.pdf
├── presentation.tex
├── python.py
├── scalmulop.py
├── test_doubleop.py
├── test_opt.py
├── thunk.py
└── tripleop.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 | 
22 | # Installer logs
23 | pip-log.txt
24 | 
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 | 
30 | # Translations
31 | *.mo
32 | 
33 | # Mr Developer
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 | 
38 | # Latex stuff
39 | *.aux
40 | *.log
41 | *.nav
42 | *.out
43 | *.snm
44 | *.synctex.gz
45 | *.toc
46 | *.vrb
47 | 


--------------------------------------------------------------------------------
/01_building_expressions/01_scalar.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 01_scalar.py to see if your solution works!
 3 | #
 4 | # This exercice ask you to create Theano variable and do some
 5 | # computation on them.
 6 | import numpy as np
 7 | from theano import function
 8 | raise NotImplementedError("TODO: add any other imports you need")
 9 | 
10 | 
11 | def make_scalar():
12 |     """
13 |     Returns a new Theano scalar.
14 |     """
15 | 
16 |     raise NotImplementedError("TODO: implement this function.")
17 | 
18 | 
19 | def log(x):
20 |     """
21 |     Returns the logarithm of a Theano scalar x.
22 |     """
23 | 
24 |     raise NotImplementedError("TODO: implement this function.")
25 | 
26 | 
27 | def add(x, y):
28 |     """
29 |     Adds two theano scalars together and returns the result.
30 |     """
31 | 
32 |     raise NotImplementedError("TODO: implement this function.")
33 | 
34 | if __name__ == "__main__":
35 |     a = make_scalar()
36 |     b = make_scalar()
37 |     c = log(b)
38 |     d = add(a, c)
39 |     f = function([a, b], d)
40 |     a = np.cast[a.dtype](1.)
41 |     b = np.cast[b.dtype](2.)
42 |     actual = f(a, b)
43 |     expected = 1. + np.log(2.)
44 |     assert np.allclose(actual, expected)
45 |     print "SUCCESS!"
46 | 


--------------------------------------------------------------------------------
/01_building_expressions/01_scalar_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | import theano.tensor as T
 4 | 
 5 | 
 6 | def make_scalar():
 7 |     """
 8 |     Returns a new Theano scalar.
 9 |     """
10 | 
11 |     return T.scalar()
12 | 
13 | 
14 | def log(x):
15 |     """
16 |     Returns the logarithm of a Theano scalar x.
17 |     """
18 | 
19 |     return T.log(x)
20 | 
21 | 
22 | def add(x, y):
23 |     """
24 |     Adds two theano scalars together and returns the result.
25 |     """
26 | 
27 |     return x + y
28 | 
29 | if __name__ == "__main__":
30 |     a = make_scalar()
31 |     b = make_scalar()
32 |     c = log(b)
33 |     d = add(a, c)
34 |     f = function([a, b], d)
35 |     a = np.cast[a.dtype](1.)
36 |     b = np.cast[b.dtype](2.)
37 |     actual = f(a, b)
38 |     expected = 1. + np.log(2.)
39 |     assert np.allclose(actual, expected)
40 |     print "SUCCESS!"
41 | 


--------------------------------------------------------------------------------
/01_building_expressions/02_vector_mat.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 02_vector_mat.py to see if your solution works!
 3 | #
 4 | # This exercices ask you to make Theano variable, elemwise
 5 | # multiplication and matrix/vector dot product.
 6 | import numpy as np
 7 | from theano import function
 8 | raise NotImplementedError("TODO: add any other imports you need")
 9 | 
10 | 
11 | def make_vector():
12 |     """
13 |     Returns a new Theano vector.
14 |     """
15 | 
16 |     raise NotImplementedError("TODO: implement this function.")
17 | 
18 | 
19 | def make_matrix():
20 |     """
21 |     Returns a new Theano matrix.
22 |     """
23 | 
24 |     raise NotImplementedError("TODO: implement this function.")
25 | 
26 | 
27 | def elemwise_mul(a, b):
28 |     """
29 |     a: A theano matrix
30 |     b: A theano matrix
31 |     Returns the elementwise product of a and b
32 |     """
33 | 
34 |     raise NotImplementedError("TODO: implement this function.")
35 | 
36 | 
37 | def matrix_vector_mul(a, b):
38 |     """
39 |     a: A theano matrix
40 |     b: A theano vector
41 |     Returns the matrix-vector product of a and b
42 |     """
43 | 
44 |     raise NotImplementedError("TODO: implement this function.")
45 | 
46 | if __name__ == "__main__":
47 |     a = make_vector()
48 |     b = make_vector()
49 |     c = elemwise_mul(a, b)
50 |     d = make_matrix()
51 |     e = matrix_vector_mul(d, c)
52 | 
53 |     f = function([a, b, d], e)
54 | 
55 |     rng = np.random.RandomState([1, 2, 3])
56 |     a_value = rng.randn(5).astype(a.dtype)
57 |     b_value = rng.rand(5).astype(b.dtype)
58 |     c_value = a_value * b_value
59 |     d_value = rng.randn(5, 5).astype(d.dtype)
60 |     expected = np.dot(d_value, c_value)
61 | 
62 |     actual = f(a_value, b_value, d_value)
63 | 
64 |     assert np.allclose(actual, expected)
65 |     print "SUCCESS!"
66 | 


--------------------------------------------------------------------------------
/01_building_expressions/02_vector_mat_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | import theano.tensor as T
 4 | 
 5 | 
 6 | def make_vector():
 7 |     """
 8 |     Returns a new Theano vector.
 9 |     """
10 | 
11 |     return T.vector()
12 | 
13 | 
14 | def make_matrix():
15 |     """
16 |     Returns a new Theano matrix.
17 |     """
18 | 
19 |     return T.matrix()
20 | 
21 | 
22 | def elemwise_mul(a, b):
23 |     """
24 |     a: A theano matrix
25 |     b: A theano matrix
26 |     Returns the elementwise product of a and b
27 |     """
28 | 
29 |     return a * b
30 | 
31 | 
32 | def matrix_vector_mul(a, b):
33 |     """
34 |     a: A theano matrix
35 |     b: A theano vector
36 |     Returns the matrix-vector product of a and b
37 |     """
38 | 
39 |     return T.dot(a, b)
40 | 
41 | if __name__ == "__main__":
42 |     a = make_vector()
43 |     b = make_vector()
44 |     c = elemwise_mul(a, b)
45 |     d = make_matrix()
46 |     e = matrix_vector_mul(d, c)
47 | 
48 |     f = function([a, b, d], e)
49 | 
50 |     rng = np.random.RandomState([1, 2, 3])
51 |     a_value = rng.randn(5).astype(a.dtype)
52 |     b_value = rng.rand(5).astype(b.dtype)
53 |     c_value = a_value * b_value
54 |     d_value = rng.randn(5, 5).astype(d.dtype)
55 |     expected = np.dot(d_value, c_value)
56 | 
57 |     actual = f(a_value, b_value, d_value)
58 | 
59 |     assert np.allclose(actual, expected)
60 |     print "SUCCESS!"
61 | 


--------------------------------------------------------------------------------
/01_building_expressions/03_tensor.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 03_tensor.py to see if your solution works!
 3 | #
 4 | # This exercices ask you to create Theano tensor variable, do
 5 | # broadcastable addition and to compute the max over part of a tensor.
 6 | import numpy as np
 7 | from theano import function
 8 | raise NotImplementedError("TODO: add any other imports you need")
 9 | 
10 | 
11 | def make_tensor(dim):
12 |     """
13 |     Returns a new Theano tensor with no broadcastable dimensions.
14 |     dim: the total number of dimensions of the tensor.
15 |     (You can use any dtype you like)
16 |     """
17 | 
18 |     raise NotImplementedError("TODO: implement this function.")
19 | 
20 | 
21 | def broadcasted_add(a, b):
22 |     """
23 |     a: a 3D theano tensor
24 |     b: a 4D theano tensor
25 |     Returns c, a 4D theano tensor, where
26 | 
27 |     c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]
28 | 
29 |     for all i, j, k, l
30 |     """
31 | 
32 |     raise NotImplementedError("TODO: implement this function.")
33 | 
34 | 
35 | def partial_max(a):
36 |     """
37 |     a: a 4D theano tensor
38 | 
39 |     Returns b, a theano matrix, where
40 | 
41 |     b[i, j] = max_{k,l} a[i, k, l, j]
42 | 
43 |     for all i, j
44 |     """
45 | 
46 |     raise NotImplementedError("TODO: implement this function.")
47 | 
48 | if __name__ == "__main__":
49 |     a = make_tensor(3)
50 |     b = make_tensor(4)
51 |     c = broadcasted_add(a, b)
52 |     d = partial_max(c)
53 | 
54 |     f = function([a, b], d)
55 | 
56 |     rng = np.random.RandomState([1, 2, 3])
57 |     a_value = rng.randn(2, 2, 2).astype(a.dtype)
58 |     b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)
59 |     c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value
60 |     expected = c_value.max(axis=1).max(axis=1)
61 | 
62 |     actual = f(a_value, b_value)
63 | 
64 |     assert np.allclose(actual, expected), (actual, expected)
65 |     print "SUCCESS!"
66 | 


--------------------------------------------------------------------------------
/01_building_expressions/03_tensor_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | import theano.tensor as T
 4 | 
 5 | 
 6 | def make_tensor(dim):
 7 |     """
 8 |     Returns a new Theano tensor with no broadcastable dimensions.
 9 |     dim: the total number of dimensions of the tensor.
10 |     """
11 | 
12 |     return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')()
13 | 
14 | 
15 | def broadcasted_add(a, b):
16 |     """
17 |     a: a 3D theano tensor
18 |     b: a 4D theano tensor
19 |     Returns c, a 4D theano tensor, where
20 | 
21 |     c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]
22 | 
23 |     for all i, j, k, l
24 |     """
25 | 
26 |     return a.dimshuffle(2, 'x', 1, 0) + b
27 | 
28 | 
29 | def partial_max(a):
30 |     """
31 |     a: a 4D theano tensor
32 | 
33 |     Returns b, a theano matrix, where
34 | 
35 |     b[i, j] = max_{k,l} a[i, k, l, j]
36 | 
37 |     for all i, j
38 |     """
39 | 
40 |     return a.max(axis=(1, 2))
41 | 
42 | if __name__ == "__main__":
43 |     a = make_tensor(3)
44 |     b = make_tensor(4)
45 |     c = broadcasted_add(a, b)
46 |     d = partial_max(c)
47 | 
48 |     f = function([a, b], d)
49 | 
50 |     rng = np.random.RandomState([1, 2, 3])
51 |     a_value = rng.randn(2, 2, 2).astype(a.dtype)
52 |     b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)
53 |     c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value
54 |     expected = c_value.max(axis=1).max(axis=1)
55 | 
56 |     actual = f(a_value, b_value)
57 | 
58 |     assert np.allclose(actual, expected), (actual, expected)
59 |     print "SUCCESS!"
60 | 


--------------------------------------------------------------------------------
/02_compiling_and_running/01_function.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 01_function.py to see if your solution works!
 3 | #
 4 | # This exercice ask you to compile a Theano functiont and call it to
 5 | # execute "x + y".
 6 | from theano import tensor as T
 7 | raise NotImplementedError("TODO: add any other imports you need")
 8 | 
 9 | 
10 | def evaluate(x, y, expr, x_value, y_value):
11 |     """
12 |     x: A theano variable
13 |     y: A theano variable
14 |     expr: A theano expression involving x and y
15 |     x_value: A numpy value
16 |     y_value: A numpy value
17 | 
18 |     Returns the value of expr when x_value is substituted for x
19 |     and y_value is substituted for y
20 |     """
21 | 
22 |     raise NotImplementedError("TODO: implement this function.")
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     x = T.iscalar()
27 |     y = T.iscalar()
28 |     z = x + y
29 |     assert evaluate(x, y, z, 1, 2) == 3
30 |     print "SUCCESS!"
31 | 


--------------------------------------------------------------------------------
/02_compiling_and_running/01_function_soln.py:
--------------------------------------------------------------------------------
 1 | from theano import tensor as T
 2 | from theano import function
 3 | 
 4 | 
 5 | def evaluate(x, y, expr, x_value, y_value):
 6 |     """
 7 |     x: A theano variable
 8 |     y: A theano variable
 9 |     expr: A theano expression involving x and y
10 |     x_value: A numpy value
11 |     y_value: A numpy value
12 | 
13 |     Returns the value of expr when x_value is substituted for x
14 |     and y_value is substituted for y
15 |     """
16 | 
17 |     return function([x, y], expr)(x_value, y_value)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     x = T.iscalar()
22 |     y = T.iscalar()
23 |     z = x + y
24 |     assert evaluate(x, y, z, 1, 2) == 3
25 |     print "SUCCESS!"
26 | 


--------------------------------------------------------------------------------
/02_compiling_and_running/02_shared.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 01_function.py to see if your solution works!
 3 | #
 4 | # This exercice make you use shared variable. You must create them and
 5 | # update them by swapping 2 shared variables values.
 6 | import numpy as np
 7 | raise NotImplementedError("TODO: add any other imports you need")
 8 | 
 9 | 
10 | def make_shared(shape):
11 |     """
12 |     Returns a theano shared variable containing a tensor of the specified
13 |     shape.
14 |     You can use any value you want.
15 |     """
16 |     raise NotImplementedError("TODO: implement the function")
17 | 
18 | 
19 | def exchange_shared(a, b):
20 |     """
21 |     a: a theano shared variable
22 |     b: a theano shared variable
23 |     Uses get_value and set_value to swap the values stored in a and b
24 |     """
25 |     raise NotImplementedError("TODO: implement the function")
26 | 
27 | 
28 | def make_exchange_func(a, b):
29 |     """
30 |     a: a theano shared variable
31 |     b: a theano shared variable
32 |     Returns f
33 |     where f is a theano function, that, when called, swaps the
34 |     values in a and b
35 |     f should not return anything
36 |     """
37 |     raise NotImplementedError("TODO: implement the function")
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     a = make_shared((5, 4, 3))
42 |     assert a.get_value().shape == (5, 4, 3)
43 |     b = make_shared((5, 4, 3))
44 |     assert a.get_value().shape == (5, 4, 3)
45 |     a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))
46 |     b.set_value(np.ones((5, 4, 3), dtype=b.dtype))
47 |     exchange_shared(a, b)
48 |     assert np.all(a.get_value() == 1.)
49 |     assert np.all(b.get_value() == 0.)
50 |     f = make_exchange_func(a, b)
51 |     rval = f()
52 |     assert isinstance(rval, list)
53 |     assert len(rval) == 0
54 |     assert np.all(a.get_value() == 0.)
55 |     assert np.all(b.get_value() == 1.)
56 | 
57 |     print "SUCCESS!"
58 | 


--------------------------------------------------------------------------------
/02_compiling_and_running/02_shared_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano.compat.python2x import OrderedDict
 3 | from theano import function
 4 | from theano import shared
 5 | 
 6 | 
 7 | def make_shared(shape):
 8 |     """
 9 |     Returns a theano shared variable containing a tensor of the specified
10 |     shape.
11 |     You can use any value you want.
12 |     """
13 |     return shared(np.zeros(shape))
14 | 
15 | 
16 | def exchange_shared(a, b):
17 |     """
18 |     a: a theano shared variable
19 |     b: a theano shared variable
20 |     Uses get_value and set_value to swap the values stored in a and b
21 |     """
22 |     temp = a.get_value()
23 |     a.set_value(b.get_value())
24 |     b.set_value(temp)
25 | 
26 | 
27 | def make_exchange_func(a, b):
28 |     """
29 |     a: a theano shared variable
30 |     b: a theano shared variable
31 |     Returns f
32 |     where f is a theano function, that, when called, swaps the
33 |     values in a and b
34 |     f should not return anything
35 |     """
36 | 
37 |     updates = OrderedDict()
38 |     updates[a] = b
39 |     updates[b] = a
40 |     f = function([], updates=updates)
41 |     return f
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     a = make_shared((5, 4, 3))
46 |     assert a.get_value().shape == (5, 4, 3)
47 |     b = make_shared((5, 4, 3))
48 |     assert a.get_value().shape == (5, 4, 3)
49 |     a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))
50 |     b.set_value(np.ones((5, 4, 3), dtype=b.dtype))
51 |     exchange_shared(a, b)
52 |     assert np.all(a.get_value() == 1.)
53 |     assert np.all(b.get_value() == 0.)
54 |     f = make_exchange_func(a, b)
55 |     rval = f()
56 |     assert isinstance(rval, list)
57 |     assert len(rval) == 0
58 |     assert np.all(a.get_value() == 0.)
59 |     assert np.all(b.get_value() == 1.)
60 | 
61 |     print "SUCCESS!"
62 | 


--------------------------------------------------------------------------------
/02_compiling_and_running/03_bug.py:
--------------------------------------------------------------------------------
 1 | # Something weird happens when you run this code.
 2 | # Find something that is not quite right.
 3 | # Figure out which compilation modes make the problem more obvious.
 4 | # Explain why what is happening.
 5 | import numpy as np
 6 | from theano import function
 7 | from theano import tensor as T
 8 | x = T.vector()
 9 | y = T.vector()
10 | z = T.zeros_like(y)
11 | a = x + z
12 | f = function([x, y], a)
13 | output = f(np.zeros((1,), dtype=x.dtype), np.zeros((2,), dtype=y.dtype))
14 | 


--------------------------------------------------------------------------------
/02_compiling_and_running/03_bug_soln.txt:
--------------------------------------------------------------------------------
 1 | The weird thing is that if you think about how the function call is
 2 | implemented, the two arguments have different shapes, and so should
 3 | the resulting values of x and z. The line adding x and z should therefore
 4 | result in a ValueError. However, when run in the default mode it does not.
 5 | The reason is that the optimizations realize that z is always 0, so adding
 6 | z to x has no effect. The optimizations thus remove the addition of z.
 7 | However, this causes the function to fail to raise an error for bad values
 8 | of x and y. To use fewer optimizations and see the bug, you can use
 9 | THEANO_FLAGS="mode=FAST_COMPILE". DEBUG_MODE will also catch the bug.
10 | 


--------------------------------------------------------------------------------
/03_modifying/01_grad.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 01_grad.py to see if your solution works!
 3 | #
 4 | # This exercice ask you to use Theano automatic gradient system to
 5 | # compute some derivative.
 6 | from theano import tensor as T
 7 | 
 8 | 
 9 | def grad_sum(x, y, z):
10 |     """
11 |     x: A theano variable
12 |     y: A theano variable
13 |     z: A theano expression involving x and y
14 | 
15 |     Returns dz / dx + dz / dy
16 |     """
17 | 
18 |     raise NotImplementedError("TODO: implement this function.")
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     x = T.scalar()
23 |     y = T.scalar()
24 |     z = x + y
25 |     s = grad_sum(x, y, z)
26 |     assert s.eval({x: 0, y: 0}) == 2
27 |     print "SUCCESS!"
28 | 


--------------------------------------------------------------------------------
/03_modifying/01_grad_soln.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 01_grad.py to see if your solution works!
 3 | #
 4 | from theano import tensor as T
 5 | 
 6 | 
 7 | def grad_sum(x, y, z):
 8 |     """
 9 |     x: A theano variable
10 |     y: A theano variable
11 |     z: A theano expression involving x and y
12 | 
13 |     Returns dz / dx + dz / dy
14 |     """
15 | 
16 |     return sum(T.grad(z, [x, y]))
17 | 
18 | if __name__ == "__main__":
19 |     x = T.scalar()
20 |     y = T.scalar()
21 |     z = x + y
22 |     s = grad_sum(x, y, z)
23 |     assert s.eval({x: 0, y: 0}) == 2
24 |     print "SUCCESS!"
25 | 


--------------------------------------------------------------------------------
/03_modifying/02_traverse.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs and run python 02_traverse.py to see if your solution
 2 | # works!
 3 | #
 4 | # This exercice is here to show you how to navigate a little in the
 5 | # Theano graph. You will need to find the inputs used that produce
 6 | # some computation.
 7 | import numpy as np
 8 | from theano import tensor as T
 9 | raise NotImplementedError("Add any imports you need.")
10 | 
11 | 
12 | def arg_to_softmax(prob):
13 |     """
14 |     Oh no! Someone has passed you the probability output,
15 |     "prob", of a softmax function, and you want the unnormalized
16 |     log probability--the argument to the softmax.
17 | 
18 |     Verify that prob really is the output of a softmax. Raise a
19 |     TypeError if it is not.
20 | 
21 |     If it is, return the argument to the softmax.
22 |     """
23 | 
24 |     raise NotImplementedError("Implement this function.")
25 | 
26 | if __name__ == "__main__":
27 |     x = np.ones((5, 4))
28 |     try:
29 |         arg_to_softmax(x)
30 |         raise Exception("You should have raised an error.")
31 |     except TypeError:
32 |         pass
33 | 
34 |     x = T.matrix()
35 |     try:
36 |         arg_to_softmax(x)
37 |         raise Exception("You should have raised an error.")
38 |     except TypeError:
39 |         pass
40 | 
41 |     y = T.nnet.sigmoid(x)
42 |     try:
43 |         arg_to_softmax(y)
44 |         raise Exception("You should have raised an error.")
45 |     except TypeError:
46 |         pass
47 | 
48 |     y = T.nnet.softmax(x)
49 |     rval = arg_to_softmax(y)
50 |     assert rval is x
51 | 
52 |     print "SUCCESS!"
53 | 


--------------------------------------------------------------------------------
/03_modifying/02_traverse_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano.gof import Variable
 3 | from theano import tensor as T
 4 | 
 5 | 
 6 | def arg_to_softmax(prob):
 7 |     """
 8 |     Oh no! Someone has passed you the probability output,
 9 |     "prob", of a softmax function, and you want the unnormalized
10 |     log probability--the argument to the softmax.
11 | 
12 |     Verify that prob really is the output of a softmax. Raise a
13 |     TypeError if it is not.
14 | 
15 |     If it is, return the argument to the softmax.
16 |     """
17 | 
18 |     if not isinstance(prob, Variable):
19 |         raise TypeError()
20 | 
21 |     if prob.owner is None:
22 |         raise TypeError()
23 | 
24 |     owner = prob.owner
25 | 
26 |     if not isinstance(owner.op, T.nnet.Softmax):
27 |         raise TypeError()
28 | 
29 |     rval, = owner.inputs
30 | 
31 |     return rval
32 | 
33 | if __name__ == "__main__":
34 |     x = np.ones((5, 4))
35 |     try:
36 |         arg_to_softmax(x)
37 |         raise Exception("You should have raised an error.")
38 |     except TypeError:
39 |         pass
40 | 
41 |     x = T.matrix()
42 |     try:
43 |         arg_to_softmax(x)
44 |         raise Exception("You should have raised an error.")
45 |     except TypeError:
46 |         pass
47 | 
48 |     y = T.nnet.sigmoid(x)
49 |     try:
50 |         arg_to_softmax(y)
51 |         raise Exception("You should have raised an error.")
52 |     except TypeError:
53 |         pass
54 | 
55 |     y = T.nnet.softmax(x)
56 |     rval = arg_to_softmax(y)
57 |     assert rval is x
58 | 
59 |     print "SUCCESS!"
60 | 


--------------------------------------------------------------------------------
/04_debugging/02_compute_test_value.py:
--------------------------------------------------------------------------------
 1 | # Run
 2 | # python 01_compute_test_value.py
 3 | # It should raise an exception when it tries to execute the call to fn.
 4 | # The exception doesn't make it easy to tell which line of the python script
 5 | # first created an invalid expression though.
 6 | # Modify the script to use compute_test_value to find the first bad line.
 7 | #
 8 | # This show you another way then using Theano flags to find the line
 9 | # in your code that build a bad graph.
10 | import numpy as np
11 | from theano import function
12 | from theano import tensor as T
13 | a = T.vector()
14 | b = T.log(a)
15 | c = T.nnet.sigmoid(b)
16 | d = T.sqrt(c)
17 | e = T.concatenate((d, c), axis=0)
18 | f = b * c * d
19 | g = e + f
20 | h = g / c
21 | fn = function([a], h)
22 | fn(np.ones((3,)).astype(a.dtype))
23 | 


--------------------------------------------------------------------------------
/04_debugging/02_compute_test_value_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | from theano import tensor as T
 4 | from theano import config
 5 | config.compute_test_value = 'raise'
 6 | a = T.vector()
 7 | a.tag.test_value = np.ones((3,)).astype(a.dtype)
 8 | b = T.log(a)
 9 | c = T.nnet.sigmoid(b)
10 | d = T.sqrt(c)
11 | e = T.concatenate((d, c), axis=0)
12 | f = b * c * d
13 | # This is the first bad line
14 | g = e + f
15 | h = g / c
16 | fn = function([a], h)
17 | fn(np.ones((3,)).astype(a.dtype))
18 | 


--------------------------------------------------------------------------------
/05_tripleop/01_tripleop.py:
--------------------------------------------------------------------------------
 1 | # Modify this file to get a new op TripleOp that multiplies the
 2 | # elements of the array by 3 instead of 2.
 3 | from theano import Op, Apply
 4 | from theano.tensor import as_tensor_variable
 5 | 
 6 | class DoubleOp(Op):
 7 |     __props__ = ()
 8 | 
 9 |     def make_node(self, x):
10 |         x = as_tensor_variable(x)
11 |         return Apply(self, [x], [x.type()])
12 | 
13 |     def perform(self, node, inputs, output_storage):
14 |         x = inputs[0]
15 |         z = output_storage[0]
16 |         z[0] = x * 2
17 | 
18 |     def infer_shape(self, node, input_shapes):
19 |         return input_shapes
20 | 
21 |     def grad(self, inputs, output_grads):
22 |         return [output_grads[0] * 2]
23 | 


--------------------------------------------------------------------------------
/05_tripleop/01_tripleop_soln.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class TripleOp(Op):
 5 |     __props__ = ()
 6 | 
 7 |     def make_node(self, x):
 8 |         x = as_tensor_variable(x)
 9 |         return Apply(self, [x], [x.type()])
10 | 
11 |     def perform(self, node, inputs, output_storage):
12 |         x = inputs[0]
13 |         z = output_storage[0]
14 |         z[0] = x * 3
15 | 
16 |     def infer_shape(self, node, i0_shapes):
17 |         return i0_shapes
18 | 
19 |     def grad(self, inputs, output_grads):
20 |         return [output_grads[0] * 3]
21 | 


--------------------------------------------------------------------------------
/06_scalmulop/01_scalmulop.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class DoubleOp(Op):
 5 |     __props__ = ()
 6 | 
 7 |     def make_node(self, x):
 8 |         x = as_tensor_variable(x)
 9 |         return Apply(self, [x], [x.type()])
10 | 
11 |     def perform(self, node, inputs, output_storage):
12 |         x = inputs[0]
13 |         z = output_storage[0]
14 |         z[0] = x * 2
15 | 


--------------------------------------------------------------------------------
/06_scalmulop/01_scalmulop_soln.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | from theano.scalar import as_scalar
 4 | 
 5 | class ScalMulV1(Op):
 6 |     __props__ = ('scal',)
 7 | 
 8 |     def __init__(self, scal):
 9 |         if not isinstance(scal, int):
10 |             raise TypeError('expected an int')
11 |         self.scal = scal
12 | 
13 |     def make_node(self, x):
14 |         x = as_tensor_variable(x)
15 |         return Apply(self, [x], [x.type()])
16 | 
17 |     def perform(self, node, inputs, output_storage):
18 |         x = inputs[0]
19 |         z = output_storage[0]
20 |         z[0] = x * self.scal
21 | 
22 | 
23 | class ScalMulV2(Op):
24 |     __props__ = ()
25 | 
26 |     def make_node(self, x, scal):
27 |         x = as_tensor_variable(x)
28 |         scal = as_scalar(scal)
29 |         return Apply(self, [x, scal], [x.type()])
30 | 
31 |     def perform(self, node, inputs, output_storage):
32 |         x = inputs[0]
33 |         scal = inputs[1]
34 |         z = output_storage[0]
35 |         z[0] = x * scal
36 | 


--------------------------------------------------------------------------------
/07_scalmulgrad/01_scalmulop.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class ScalMul(Op):
 5 |     __props__ = ('scal',)
 6 | 
 7 |     def __init__(self, scal):
 8 |         if not isinstance(scal, int):
 9 |             raise TypeError('expected an int')
10 |         self.scal = scal
11 | 
12 |     def make_node(self, x):
13 |         x = as_tensor_variable(x)
14 |         return Apply(self, [x], [x.type()])
15 | 
16 |     def perform(self, node, inputs, output_storage):
17 |         x = inputs[0]
18 |         z = output_storage[0]
19 |         z[0] = x * self.scal
20 | 


--------------------------------------------------------------------------------
/07_scalmulgrad/01_scalmulop_soln.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class ScalMul(Op):
 5 |     __props__ = ('scal',)
 6 | 
 7 |     def __init__(self, scal):
 8 |         if not isinstance(scal, int):
 9 |             raise TypeError('expected an int')
10 |         self.scal = scal
11 | 
12 |     def make_node(self, x):
13 |         x = as_tensor_variable(x)
14 |         return Apply(self, [x], [x.type()])
15 | 
16 |     def perform(self, node, inputs, output_storage):
17 |         x = inputs[0]
18 |         z = output_storage[0]
19 |         z[0] = x * self.scal
20 | 
21 |     def infer_shape(self, node, input_shapes):
22 |         return input_shapes
23 | 
24 |     def grad(self, inputs, output_grads):
25 |         return [output_grads[0] * self.scal]
26 | 


--------------------------------------------------------------------------------
/08_scalmulc/01_scalmulc.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class DoubleC(Op):
 5 |     __props__ = ()
 6 | 
 7 |     def make_node(self, x):
 8 |         x = as_tensor_variable(x)
 9 |         if x.ndim != 1:
10 |             raise TypeError("DoubleC only works on 1D")
11 |         return Apply(self, [x], [x.type()])
12 | 
13 |     def c_code(self, node, name, input_names,
14 |                output_names, sub):
15 |         return """
16 | Py_XDECREF(%(out)s);
17 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray(
18 |     %(inp)s, NPY_ANYORDER, NULL, 0);
19 | if (%(out)s == NULL) {
20 |   %(fail)s
21 | }
22 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) {
23 |   *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) =
24 |     (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * 2;
25 | }
26 | """ % dict(inp=input_names[0], out=output_names[0],
27 |            fail=sub["fail"])
28 | 
29 |     def infer_shape(self, node, input_shapes):
30 |         return input_shapes
31 | 
32 |     def grad(self, inputs, output_grads):
33 |         return [output_grads[0] * 2]
34 | 


--------------------------------------------------------------------------------
/08_scalmulc/01_scalmulc_soln.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class ScalMulC(Op):
 5 |     __props__ = ('scal',)
 6 | 
 7 |     def __init__(self, scal):
 8 |         if not isinstance(scal, int):
 9 |             raise TypeError('expected an int')
10 |         self.scal = scal
11 | 
12 |     def make_node(self, x):
13 |         x = as_tensor_variable(x)
14 |         if x.ndim != 1:
15 |             raise TypeError("ScalMulC only works on 1D")
16 |         return Apply(self, [x], [x.type()])
17 | 
18 |     def c_code(self, node, name, input_names,
19 |                output_names, sub):
20 |         return """
21 | Py_XDECREF(%(out)s);
22 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray(
23 |     %(inp)s, NPY_ANYORDER, NULL, 0);
24 | if (%(out)s == NULL) {
25 |   %(fail)s
26 | }
27 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) {
28 |   *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) =
29 |     (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * %(scal)d;
30 | }
31 | """ % % dict(inp=input_names[0], out=output_names[0],
32 |            fail=sub["fail"], scal=self.scal)
33 | 
34 |     def infer_shape(self, node, input_shapes):
35 |         return input_shapes
36 | 
37 |     def grad(self, inputs, output_grads):
38 |         return [output_grads[0] * self.scal]
39 | 


--------------------------------------------------------------------------------
/09_opt/01_opt.py:
--------------------------------------------------------------------------------
 1 | from scalmulop import ScalMulV1
 2 | from doubleop import DoubleOp
 3 | 
 4 | from theano.gof import local_optimizer
 5 | 
 6 | from theano.tensor.opt import register_specialize
 7 | 
 8 | @register_specialize
 9 | @local_optimizer([ScalMulV1])
10 | def local_scalmul_double_v1(node):
11 |     if not (isinstance(node.op, ScalMulV1)
12 |             and node.op.scal == 2):
13 |         return False
14 | 
15 |     return [DoubleOp()(node.inputs[0])]
16 | 
17 | from theano.gof.opt import OpSub
18 | 
19 | local_scalmul_double_v2 = OpSub(ScalMulV1(2), DoubleOp())
20 | 
21 | register_specialize(local_scalmul_double_v2,
22 |                     name='local_scalmul_double_v2')
23 | 


--------------------------------------------------------------------------------
/09_opt/01_opt_soln.py:
--------------------------------------------------------------------------------
 1 | from doubleop import DoubleOp
 2 | from doublec import DoubleC
 3 | 
 4 | from theano.gof import local_optimizer
 5 | 
 6 | from theano.tensor.opt import register_specialize
 7 | 
 8 | @register_specialize
 9 | @local_optimizer([DoubleOp])
10 | def local_scalmul_double_v1(node):
11 |     if not (isinstance(node.op, DoubleOp)
12 |             and node.inputs[0].ndim == 1):
13 |         return False
14 | 
15 |     return [DoubleC()(node.inputs[0])]
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Ian Goodfellow
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice, this
11 |   list of conditions and the following disclaimer in the documentation and/or
12 |   other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all: presentation.pdf
2 | 
3 | clean:
4 | 	rm -f *~ *.toc *.vrb *.out *.nav *.snm *.aux *.log
5 | 
6 | presentation.pdf: presentation.tex
7 | 	pdflatex presentation
8 | 	pdflatex presentation
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ccw_tutorial_theano
 2 | ===================
 3 | 
 4 | This repo contains two theano tutorials.
 5 | The first one covers the basics of running and debugging theano code.
 6 | The second one covers extending theano in python and C.
 7 | 
 8 | Basic tutorial
 9 | --------------
10 | 
11 | This tutorial covers:
12 | 
13 |  * Overview of library (3 min)
14 |  * Building expressions (30 min)
15 |  * Compiling and running expressions (30 min)
16 |  * Modifying expressions (25 min)
17 |  * Debugging (30 min)
18 |  * Citing Theano (2 min)
19 | 
20 | In order to follow this tutorial you will need the ipython-notebook
21 | python package on your computer and a clone of this repo to get the
22 | notebook with exercices.
23 | 
24 | The following commands should perform the correct installation on most
25 | unix-like machines:
26 | 
27 |     pip install ipython-notebook
28 |     git clone https://github.com/abergeron/ccw_tutorial_theano.git
29 |     cd ccw_tutorial_theano/ipnb
30 |     ipython notebook Theano-basic.ipynb
31 | 
32 | This should open your browser to the notebook page.
33 | 
34 | Advanced tutorial
35 | -----------------
36 | 
37 | COMING SOON
38 | 


--------------------------------------------------------------------------------
/advanced.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abergeron/ccw_tutorial_theano/f92aa8edbb567c9ac09149a382858f841a4a7749/advanced.pdf


--------------------------------------------------------------------------------
/advanced.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[utf8x]{beamer}
  2 | 
  3 | \usepackage[utf8x]{inputenc}
  4 | \usepackage[OT1]{fontenc}
  5 | \usepackage{graphicx}
  6 | \usepackage{listings}
  7 | \usepackage{hyperref}
  8 | \usepackage{xcolor}
  9 | 
 10 | \usetheme{Malmoe}
 11 | \usecolortheme{beaver}
 12 | 
 13 | \lstloadlanguages{Python,C,sh}
 14 | 
 15 | \definecolor{darkgreen}{RGB}{0,93,21}
 16 | \definecolor{greenblue}{RGB}{40,110,126}
 17 | \definecolor{lightgray}{RGB}{246,246,246}
 18 | \definecolor{bordergray}{RGB}{193,193,193}
 19 | \definecolor{lightblue}{RGB}{0,114,168}
 20 | \definecolor{methblue}{RGB}{0,31,108}
 21 | 
 22 | 
 23 | \title{Extending Theano}
 24 | \author{Arnaud Bergeron}
 25 | \date{\today}
 26 | 
 27 | \lstset{
 28 | language=Python,
 29 | basicstyle=\fontfamily{pcr}\selectfont\footnotesize,
 30 | keywordstyle=\color{darkgreen}\bfseries,
 31 | commentstyle=\color{greenblue}\itshape,
 32 | stringstyle=\color{violet},
 33 | showstringspaces=false,
 34 | tabsize=4,
 35 | backgroundcolor=\color{lightgray},
 36 | frame=single,
 37 | emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}},
 38 | emph={[3]self},emphstyle={[3]\color{darkgreen}},
 39 | moredelim=**[is][{\color{red}}]{`}{`}
 40 | }
 41 | 
 42 | \newcommand{\code}[1]{\lstinline[emph={[2]}]|#1|}
 43 | 
 44 | \begin{document}
 45 | 
 46 | \frame[plain]{\titlepage}
 47 | 
 48 | \section*{}
 49 | 
 50 | \begin{frame}{Outline}
 51 | \begin{enumerate}
 52 | \item How to Make an Op (Python) (45 min)
 53 | \item How to Make an Op (C) (30 min)
 54 | \item Op Params (10 min)
 55 | \item Optimizations (20 min)
 56 | \end{enumerate}
 57 | \end{frame}
 58 | 
 59 | \section{How to Make an Op (Python)}
 60 | 
 61 | \begin{frame}[plain]{}
 62 | \begin{center}
 63 | \Huge How to Make an Op (Python)
 64 | \end{center}
 65 | \end{frame}
 66 | 
 67 | \begin{frame}[fragile]{Overview}
 68 | \lstinputlisting[lastline=14]{python.py}
 69 | \end{frame}
 70 | 
 71 | \begin{frame}{\code{__init__}}
 72 | \lstinputlisting[firstline=6,lastline=8]{python.py}
 73 | \begin{itemize}
 74 | \item Optional, a lot of Ops don't have one
 75 | \item Serves to set up Op-level parameters
 76 | \item Should also perform validation on those parameters
 77 | \end{itemize}
 78 | \end{frame}
 79 | 
 80 | \begin{frame}{\code{__props__}}
 81 | \lstinputlisting[firstline=4,lastline=5]{python.py}
 82 | \begin{itemize}
 83 | \item Optional (although very useful)
 84 | \item Generates \code{__hash__}, \code{__eq__} and \code{__str__} methods if present
 85 | \item Empty tuple signifies no properties that should take part in comparison
 86 | \item If you have only one property, make sure you add a final comma: \code{('property',)}
 87 | \end{itemize}
 88 | \end{frame}
 89 | 
 90 | \begin{frame}{\code{make_node}}
 91 | \lstinputlisting[firstline=9,lastline=11]{python.py}
 92 | \begin{itemize}
 93 | \item This creates the node object that represents our computation in the graph
 94 | \item The parameters are usually Theano variables, but can be python objects too
 95 | \item The return value must be an \code{Apply} instance
 96 | \end{itemize}
 97 | \end{frame}
 98 | 
 99 | \begin{frame}{What Is an Apply Node?}
100 | \begin{center}
101 | \includegraphics[width=\textwidth]{apply_node}
102 | \end{center}
103 | \end{frame}
104 | 
105 | \begin{frame}{\code{perform}}
106 | \lstinputlisting[firstline=12,lastline=14]{python.py}
107 | \begin{itemize}
108 | \item This performs the computation on a set of values (hence the method name)
109 | \item The parameters are all python objects (not symbolic values)
110 | \item This method must not return its result, but rather store it in the 1-element lists (or cells) provided in \code{outputs_storage}
111 | \item The output storage may contain a pre-existing value from a previous run that may be reused for storage.
112 | \end{itemize}
113 | \end{frame}
114 | 
115 | \begin{frame}{DoubleOp}
116 | \lstinputlisting[lastline=15]{doubleop.py}
117 | \end{frame}
118 | 
119 | \begin{frame}{Op Instances and Nodes}
120 | When you call an op class you get an instance of that Op:
121 | \vskip4mm
122 | \hskip3em\code{double_op = DoubleOp()}
123 | \vskip4mm
124 | But when you want to use that op as a node in a graph you need to call the \textit{instance}:
125 | \vskip4mm
126 | \hskip3em\code{node = double_op(x)}
127 | \vskip4mm
128 | You can do both steps at once with a double call like this:
129 | \vskip4mm
130 | \hskip3em\code{node = DoubleOp()(x)}
131 | \end{frame}
132 | 
133 | \begin{frame}{Basic Tests}
134 | \lstinputlisting[linerange={1-5,8-18}]{test_doubleop.py}
135 | \end{frame}
136 | 
137 | \begin{frame}[fragile]{Run Tests}
138 | The simplest way to run your tests is to use \texttt{nosetests} directly on your test file like this:
139 | 
140 | \begin{lstlisting}[language={},backgroundcolor=\color{white},frame={}]
141 | $ nosetests test_doubleop.py
142 | .
143 | ------------------------------------------------------
144 | Ran 1 test in 0.427s
145 | 
146 | OK
147 | \end{lstlisting}
148 | 
149 | You can also use \texttt{theano-nose} which is a wrapper around \texttt{nosetests} with some extra options.
150 | \end{frame}
151 | 
152 | \begin{frame}{\code{infer_shape}}
153 | \lstinputlisting[firstline=15,lastline=17]{python.py}
154 | \begin{itemize}
155 | \item This functions is optional, although highly recommended
156 | \item It takes as input the symbolic shapes of the input variables
157 | \item \code{input_shapes} is of the form \code{[[i0_shp0, i0_shp1, ...], ...]}
158 | \item It must return a list with the symbolic shape of the output variables
159 | \end{itemize}
160 | \end{frame}
161 | 
162 | \begin{frame}{Example}
163 | \lstinputlisting[firstline=16,lastline=18]{doubleop.py}
164 | \begin{itemize}
165 | \item Here the code is really simple since we don't change the shape in any way in our Op
166 | \item \code{input_shapes} would be an expression equivalent to \code{[x.shape]}
167 | \end{itemize}
168 | \end{frame}
169 | 
170 | \begin{frame}{Tests}
171 | \lstinputlisting[linerange={5-5,20-34}]{test_doubleop.py}
172 | \end{frame}
173 | 
174 | \begin{frame}{Gradient}
175 | \lstinputlisting[firstline=18,lastline=20]{python.py}
176 | \begin{itemize}
177 | \item This function is required for graphs including your op to work with \code{theano.grad()}
178 | \item Each item you return represents the gradient with respect to that input computed based on the gradient with respect to the outputs (which you get in \code{output_grads}).
179 | \item It must return a list of symbolic graphs for each of your inputs
180 | \item Inputs that have no valid gradient should have a special \code{DisconnectedType} value
181 | \end{itemize}
182 | \end{frame}
183 | 
184 | \begin{frame}{Example}
185 | \lstinputlisting[firstline=19,lastline=21]{doubleop.py}
186 | \begin{itemize}
187 | \item Here since the operation is simple the gradient is simple
188 | \item Note that we return a list
189 | \end{itemize}
190 | \end{frame}
191 | 
192 | \begin{frame}{Tests}
193 | To test the gradient we use \code{verify_grad}
194 | \lstinputlisting[linerange={5-5,36-44}]{test_doubleop.py}
195 | It will compute the gradient numerically and symbolically (using our \code{L_op()} method) and compare the two.
196 | \end{frame}
197 | 
198 | \section{How to Make an Op (C)}
199 | 
200 | \begin{frame}[plain]{}
201 | \begin{center}
202 | \Huge How to Make an Op (C)
203 | \end{center}
204 | \end{frame}
205 | 
206 | \begin{frame}{Overview}
207 | \lstinputlisting{c.py}
208 | \end{frame}
209 | 
210 | \begin{frame}{\code{c_code}}
211 | \lstinputlisting[linerange={9-11}]{c.py}
212 | \begin{itemize}
213 | \item This method returns a python string containing C code
214 | \item \code{input_names} contains the variable names where the inputs are
215 | \item \code{output_names} contains the variable names where to place the outputs
216 | \item \code{sub} contains some code snippets to insert into our code (mostly to indicate failure)
217 | \item The variables in \code{output_names} may contain a reference to a pre-existing value from a previous run that may be reused for storage.
218 | \end{itemize}
219 | \end{frame}
220 | 
221 | \begin{frame}{Support Code}
222 | \lstinputlisting[linerange={13-14}]{c.py}
223 | \begin{itemize}
224 | \item This method return a python string containing C code
225 | \item The code may be shared with multiple instances of the op
226 | \item It can contain things like helper functions
227 | \end{itemize}
228 | There are a number of similar methods to insert code at various points
229 | \end{frame}
230 | 
231 | \begin{frame}{Headers, Libraries, Compilers}
232 | Some of the methods available to customize the compilation environment:
233 | \begin{description}
234 | \item[\texttt{c\_libraries}] Return a list of shared libraries the op needs
235 | \item[\texttt{c\_headers}] Return a list of included headers the op needs
236 | \item[\texttt{c\_compiler}] C compiler to use (if not the default)
237 | \end{description}
238 | Again others are available.  Refer to the documentation for a complete list.
239 | \end{frame}
240 | 
241 | \begin{frame}{Python C-API}
242 | \begin{description}
243 | \item[\texttt{void Py\_INCREF(PyObject *o)}] Increase the reference count of a python object.
244 | \item[\texttt{void Py\_DECREF(PyObject *o)}] Decrease the reference count of a python object.
245 | \item[\texttt{void Py\_XINCREF(PyObject *o)}] Increase the reference count of a (potentially NULL) python object.
246 | \item[\texttt{void Py\_XDECREF(PyObject *o)}] Decrease the reference count of a (potentially NULL) python object.
247 | \end{description}
248 | \end{frame}
249 | 
250 | \begin{frame}{Numpy C-API}
251 | \begin{description}
252 | \item[\texttt{int PyArray\_NDIM(PyArrayObject *a)}] Get the number of dimension of an array.
253 | \item[\texttt{npy\_intp *PyArray\_DIMS(PyArrayObject *a)}] Get the shape of an array.
254 | \item[\texttt{npy\_intp *PyArray\_STRIDES(PyArrayObject *a)}] Get the strides of an array.
255 | \item[\texttt{void * PyArray\_DATA(PyArrayObject *a)}] Get the data pointer (pointer to element 0) of an array.
256 | \end{description}
257 | \end{frame}
258 | 
259 | \begin{frame}[allowframebreaks]{Example}
260 | \vskip5mm
261 | This is the C code equivalent to \code{perform}
262 | \vskip4mm
263 | \lstinputlisting[linerange={1-27}]{doublec.py}
264 | \end{frame}
265 | 
266 | \begin{frame}{COp}
267 | \lstinputlisting{cop.py}
268 | \end{frame}
269 | 
270 | \begin{frame}{Constructor Arguments}
271 | \begin{itemize}
272 | \item Basically you just pass arguments to the constructor of COp
273 | \begin{itemize}
274 | \item Either by calling the constructor directly \code{COp.__init__(self, ...)}
275 | \item Or via the superclass \code{super(MyOp, self).__init__(...)}
276 | \end{itemize}
277 | \item The arguments are:
278 | \begin{itemize}
279 | \item a list of file names with code sections (relative to the location of the op class)
280 | \item the name of a function to call to make the computation (optional)
281 | \end{itemize}
282 | \end{itemize}
283 | \end{frame}
284 | 
285 | \begin{frame}{COp: Example}
286 | \only<1>{\lstinputlisting[linerange={1-16}]{doublecop.py}}
287 | \only<2>{\lstinputlisting[language=C]{doublecop.c}}
288 | \end{frame}
289 | 
290 | \begin{frame}{Tests}
291 | \begin{itemize}
292 | \item Testing ops with C code is done the same way as testing for python ops
293 | \item One thing to watch for is tests for ops which don't have python code
294 | \begin{itemize}
295 | \item You should skip the test in those cases
296 | \item Test for \code{theano.config.gxx == ""}
297 | \end{itemize}
298 | \item Using DebugMode will compare the output of the Python version to the output of the C version and raise an error if they don't match
299 | \end{itemize}
300 | \end{frame}
301 | 
302 | \begin{frame}{Gradient and Other Concerns}
303 | \begin{itemize}
304 | \item The code for \code{grad()} and \code{infer_shape()} is done the same way as for a python Op
305 | \item In fact you can have the same Op with a python and a C version sharing the \code{grad()} and \code{infer_shape()} code
306 | \begin{itemize}
307 | \item That's how most Ops are implemented
308 | \end{itemize}
309 | \end{itemize}
310 | \end{frame}
311 | 
312 | \section{Op Params}
313 | 
314 | \begin{frame}[plain]{}
315 | \begin{center}
316 | \Huge Op Params
317 | \end{center}
318 | \end{frame}
319 | 
320 | \begin{frame}{Purpose}
321 | \begin{itemize}
322 | \item Used to pass information to the C code
323 | \item Can reduce the amount of compiled C code
324 | \item Required for things that can change from one script run to the other.
325 | \end{itemize}
326 | \end{frame}
327 | 
328 | \begin{frame}{Usage}
329 | \lstinputlisting{params.py}
330 | \end{frame}
331 | 
332 | \section{GPU Ops}
333 | 
334 | \begin{frame}[plain]{}
335 | \begin{center}
336 | \Huge GPU Ops
337 | \end{center}
338 | \end{frame}
339 | 
340 | \begin{frame}{Overview}
341 | \only<1>{\lstinputlisting[linerange=1-12]{gpu.py}}
342 | \only<2>{\lstinputlisting[linerange=14-20]{gpu.py}
343 | \begin{itemize}
344 | \item \texttt{params\_type} is new.
345 | \item \texttt{get\_params} is new.
346 | \end{itemize}}
347 | \end{frame}
348 | 
349 | \begin{frame}{Context and Context Name}
350 | \begin{itemize}
351 | \item Context is what is used to refer to the chosen GPU.
352 | 
353 | It is a C object that can't be serialized.
354 | \item Context Name is a name internal to Theano to refer to a given context object.  It is a python string.
355 | \item Context Names are used whenever you need a symbolic object.
356 | \end{itemize}
357 | \end{frame}
358 | 
359 | \begin{frame}{Double on GPU}
360 | \only<1>{\lstinputlisting[linerange=5-21]{doublegpu.py}}
361 | \only<2>{\lstinputlisting[linerange=22-37]{doublegpu.py}}
362 | \only<3>{\lstinputlisting[linerange=39-55]{doublegpu.py}}
363 | \end{frame}
364 | 
365 | \begin{frame}{GpuKernelBase}
366 | \only<1>{\lstinputlisting[linerange=6-20]{doublecgpu.py}}
367 | \only<2>{\lstinputlisting[linerange=1-10]{doublecgpu.c}}
368 | \only<3>{\lstinputlisting[linerange=12-28]{doublecgpu.c}}
369 | \end{frame}
370 | 
371 | \section{Optimizations}
372 | 
373 | \begin{frame}[plain]{}
374 | \begin{center}
375 | \Huge Optimizations
376 | \end{center}
377 | \end{frame}
378 | 
379 | \begin{frame}{Purpose}
380 | \begin{itemize}
381 | \item End goal is to make code run faster
382 | \item Sometimes they look after stability or memory usage
383 | \item Most of the time you will make one to insert a new Op you wrote
384 | \end{itemize}
385 | \end{frame}
386 | 
387 | \begin{frame}{Replace an Op}
388 | Here is code to use \code{DoubleOp()} instead of \code{ScalMul(2)}.
389 | \lstinputlisting[linerange={1-2,7-8,11-20}]{opt.py}
390 | \end{frame}
391 | 
392 | \begin{frame}{Replace an Op for GPU}
393 | Here is code to move the Double op to GPU.
394 | \lstinputlisting[linerange={1-5,9-10,22-30}]{opt.py}
395 | \end{frame}
396 | 
397 | \begin{frame}{Tests}
398 | \lstinputlisting{test_opt.py}
399 | \end{frame}
400 | 
401 | \begin{frame}{Exercice}
402 | \begin{itemize}
403 | \item Implement a ScalMulOp that multiplies its input by an arbitrary scalar value.  Start with a python implementation
404 | \item Add C code to your implementation
405 | \item Create a GPU version of your op.
406 | \item Create an optimization that replace the CPU version with a GPU version when appropriate.
407 | \end{itemize}
408 | Clone the repo at \url{https://github.com/abergeron/ccw_tutorial_theano.git}.
409 | \end{frame}
410 | 
411 | \end{document}
412 | 


--------------------------------------------------------------------------------
/apply_node.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abergeron/ccw_tutorial_theano/f92aa8edbb567c9ac09149a382858f841a4a7749/apply_node.png


--------------------------------------------------------------------------------
/c.py:
--------------------------------------------------------------------------------
 1 | from theano import Op
 2 | 
 3 | class MyOp(Op):
 4 |     __props__ = ()
 5 | 
 6 |     def make_node(self, ...):
 7 |         # return apply node
 8 | 
 9 |     def c_code(self, node, name, input_names,
10 |                output_names, sub):
11 |         # return C code string
12 | 
13 |     def c_support_code(self):
14 |         # return C code string
15 | 
16 |     def c_code_cache_version(self):
17 |         # return hashable object
18 | 


--------------------------------------------------------------------------------
/cop.py:
--------------------------------------------------------------------------------
 1 | from theano.gof import COp
 2 | 
 3 | class MyOp(COp):
 4 |     __props__ = ()
 5 | 
 6 |     def __init__(self, ...):
 7 |         COp.__init__(self, c_files, func_name)
 8 |         # Other init code if needed
 9 | 
10 |     def make_node(self, ...):
11 |         # make the Apply node
12 | 


--------------------------------------------------------------------------------
/doublec.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class DoubleC(Op):
 5 |     __props__ = ()
 6 | 
 7 |     def make_node(self, x):
 8 |         x = as_tensor_variable(x)
 9 |         if x.ndim != 1:
10 |             raise TypeError("DoubleC only works on 1D")
11 |         return Apply(self, [x], [x.type()])
12 | 
13 |     def c_code(self, node, name, input_names,
14 |                output_names, sub):
15 |         return """
16 | Py_XDECREF(%(out)s);
17 | %(out)s = (PyArrayObject *)PyArray_NewLikeArray(
18 |     %(inp)s, NPY_ANYORDER, NULL, 0);
19 | if (%(out)s == NULL) {
20 |   %(fail)s
21 | }
22 | for (npy_intp i = 0; i < PyArray_DIM(%(inp)s, 0); i++) {
23 |   *(dtype_%(out)s *)PyArray_GETPTR1(%(out)s, i) =
24 |     (*(dtype_%(inp)s *)PyArray_GETPTR1(%(inp)s, i)) * 2;
25 | }
26 | """ % dict(inp=input_names[0], out=output_names[0],
27 |            fail=sub["fail"])
28 | 
29 |     def infer_shape(self, node, input_shapes):
30 |         return input_shapes
31 | 
32 |     def grad(self, inputs, output_grads):
33 |         return [output_grads[0] * 2]
34 | 


--------------------------------------------------------------------------------
/doublecgpu.c:
--------------------------------------------------------------------------------
 1 | #section kernels
 2 | #kernel doublek : *, *, size :
 3 | 
 4 | KERNEL void doublek(GLOBAL_MEM DTYPE_o0 *out,
 5 |                     GLOBAL_MEM DTYPE_i0 *a,
 6 |                     ga_size n) {
 7 |   for (ga_size i = LID_0; i < n; i += LDIM_0) {
 8 |     out[i] = 2 * a[i];
 9 |   }
10 | }
11 | 
12 | #section support_code_struct
13 | int double_fn(PyGpuArrayObject *inp,
14 |               PyGpuArrayObject **out,
15 |               PyGpuContextObject *ctx) {
16 |   size_t n = 1;
17 |   Py_XDECREF(*out);
18 |   *out = pygpu_empty(PyGpuArray_NDIM(inp),
19 |                      PyGpuArray_DIMS(inp),
20 |                      GA_C_ORDER, ctx, Py_None);
21 |   if (*out == NULL) return -1;
22 |   for (unsigned int i = 0; i < inp->ga.nd; i++)
23 |     n *= PyGpuArray_DIM(inp, i);
24 |   if (doublek_scall(1, &n, 0, *out, inp, n)) {
25 |     PyErr_SetString(PyExc_RuntimeError,
26 |                     "Error calling kernel");
27 |     return -1;
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/doublecgpu.py:
--------------------------------------------------------------------------------
 1 | from theano import Apply
 2 | from theano.gpuarray.basic_ops import (as_gpuarray_variable,
 3 |                                        infer_context_name, CGpuKernelBase)
 4 | 
 5 | 
 6 | class DoubleCGpu(CGpuKernelBase):
 7 |     __props__ = ()
 8 | 
 9 |     def __init__(self):
10 |         CGpuKernelBase.__init__(self, ["doublecgpu.c"],
11 |                                 "double_fn")
12 | 
13 |     def make_node(self, x):
14 |         ctx_name = infer_context_name(x)
15 |         x = as_gpuarray_variable(x, ctx_name)
16 |         return Apply(self, [x], [x.type()])
17 | 
18 |     def get_params(self, node):
19 |         return node.outputs[0].type.context
20 | 
21 |     def infer_shape(self, node, input_shapes):
22 |         return input_shapes
23 | 
24 |     def grad(self, inputs, output_grads):
25 |         return [output_grads[0] * 2]
26 | 


--------------------------------------------------------------------------------
/doublecop.c:
--------------------------------------------------------------------------------
 1 | #section support_code
 2 | 
 3 | int APPLY_SPECIFIC(doublecop)(PyArrayObject *x,
 4 |                               PyArrayObject **out) {
 5 |   Py_XDECREF(*out);
 6 |   *out = (PyArrayObject *)PyArray_NewLikeArray(
 7 |                            inp, NPY_ANYORDER, NULL, 0);
 8 |   if (*out == NULL)
 9 |     return -1;
10 | 
11 |   for (npy_intp i = 0; i < PyArray_DIM(x, 0); i++) {
12 |     *(DTYPE_OUTPUT_0 *)PyArray_GETPTR1(*out, i) =
13 |       (*(DTYPE_INPUT_0 *)PyArray_GETPTR1(x, i)) * 2;
14 |   }
15 |   return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/doublecop.py:
--------------------------------------------------------------------------------
 1 | from theano import Apply
 2 | from theano.gof import COp
 3 | from theano.tensor import as_tensor_variable
 4 | 
 5 | class DoubleCOp(COp):
 6 |     __props__ = ()
 7 | 
 8 |     def __init__(self):
 9 |         COp.__init__(self, ["doublecop.c"],
10 |                      "APPLY_SPECIFIC(doublecop)")
11 | 
12 |     def make_node(self, x):
13 |         x = as_tensor_variable(x)
14 |         if x.ndim != 1:
15 |             raise TypeError("DoubleCOp only works with 1D")
16 |         return Apply(self, [x], [x.type()])
17 | 
18 |     def infer_shape(self, input_shapes):
19 |         return input_shapes
20 | 
21 |     def grad(self, inputs, g):
22 |         return [g[0] * 2]
23 | 


--------------------------------------------------------------------------------
/doublegpu.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.gpuarray.basic_ops import (as_gpuarray_variable, Kernel,
 3 |                                        infer_context_name, GpuKernelBase)
 4 | 
 5 | try:
 6 |     from pygpu import gpuarray
 7 | except ImportError:
 8 |     pass
 9 | 
10 | 
11 | class DoubleGpu(Op, GpuKernelBase):
12 |     __props__ = ()
13 | 
14 |     def make_node(self, x):
15 |         ctx_name = infer_context_name(x)
16 |         x = as_gpuarray_variable(x, ctx_name)
17 |         return Apply(self, [x], [x.type()])
18 | 
19 |     def get_params(self, node):
20 |         return node.outputs[0].type.context
21 | 
22 |     def gpu_kernels(self, node, name):
23 |         dt = node.inputs[0].type
24 |         code = """
25 | KERNEL void doublek(GLOBAL_MEM %(ctype) *out,
26 |                    GLOBAL_MEM const %(ctype)s *a,
27 |                    ga_size n) {
28 |   for (ga_size i = LID_0; i < n; i += LDIM_0) {
29 |     out[i] = 2 * a[i];
30 |   }
31 | }
32 | """ % dict(ctype=gpuarray.dtype_to_ctype(dt))
33 |         return [Kernel(code=code, name="doublek",
34 |                        params=[gpuarray.GpuArray,
35 |                                gpuarray.GpuArray,
36 |                                gpuarray.SIZE],
37 |                        flags=Kernel.get_flags(dt))]
38 | 
39 |     def c_code(self, node, name, inn, outn, sub):
40 |         return """
41 | size_t n = 1;
42 | Py_XDECREF(%(out)s);
43 | %(out)s = pygpu_empty(PyGpuArray_NDIM(%(inp)s),
44 |                       PyGpuArray_DIMS(%(inp)s),
45 |                       GA_C_ORDER, %(ctx)s, Py_None);
46 | if (%(out)s == NULL) %(fail)s
47 | for (unsigned int i = 0; i < %(inp)s->ga.nd; i++)
48 |   n *= PyGpuArray_DIM(%(inp)s, i);
49 | if (doublek_scall(1, &n, 0, %(out)s, %(inp)s, n)) {
50 |   PyErr_SetString(PyExc_RuntimeError,
51 |                   "Error calling kernel");
52 |   %(fail)s;
53 | }
54 | """ % dict(inp=inn[0], out=outn[0], fail=sub["fail"])
55 | 
56 |     def infer_shape(self, node, input_shapes):
57 |         return input_shapes
58 | 
59 |     def grad(self, inputs, output_grads):
60 |         return [output_grads[0] * 2]
61 | 


--------------------------------------------------------------------------------
/doubleop.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class DoubleOp(Op):
 5 |     __props__ = ()
 6 | 
 7 |     def make_node(self, x):
 8 |         x = as_tensor_variable(x)
 9 |         return Apply(self, [x], [x.type()])
10 | 
11 |     def perform(self, node, inputs, output_storage):
12 |         x = inputs[0]
13 |         z = output_storage[0]
14 |         z[0] = x * 2
15 | 
16 |     def infer_shape(self, node, input_shapes):
17 |         return input_shapes
18 | 
19 |     def L_op(self, inputs, outputs, output_grads):
20 |         return [output_grads[0] * 2]
21 | 
22 |     def R_op(self, inputs, eval_points):
23 |         if eval_points[0] is None:
24 |             return eval_points
25 |         return self.grad(inputs, eval_points)
26 | 


--------------------------------------------------------------------------------
/gpu.py:
--------------------------------------------------------------------------------
 1 | from theano import Op
 2 | from theano.gpuarray.type import gpu_context_type
 3 | 
 4 | class GpuOp(Op):
 5 |     __props__ = ()
 6 |     params_type = gpu_context_type
 7 | 
 8 |     def make_node(self, ...):
 9 |         # return apply node
10 | 
11 |     def get_params(self, node):
12 |         return node.outputs[0].type.context
13 | 
14 |     def perform(self, node, inputs, output_storage):
15 |         # python code
16 | 
17 |     def c_code(self, node, name, input_names,
18 |                output_names, sub):
19 |         # return C code string
20 | 
21 | 


--------------------------------------------------------------------------------
/ipnb/01_scalar_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | import theano.tensor as T
 4 | 
 5 | 
 6 | def make_scalar():
 7 |     """
 8 |     Returns a new Theano scalar.
 9 |     """
10 | 
11 |     return T.scalar()
12 | 
13 | 
14 | def log(x):
15 |     """
16 |     Returns the logarithm of a Theano scalar x.
17 |     """
18 | 
19 |     return T.log(x)
20 | 
21 | 
22 | def add(x, y):
23 |     """
24 |     Adds two theano scalars together and returns the result.
25 |     """
26 | 
27 |     return x + y
28 | 
29 | a = make_scalar()
30 | b = make_scalar()
31 | c = log(b)
32 | d = add(a, c)
33 | f = function([a, b], d)
34 | a = np.cast[a.dtype](1.)
35 | b = np.cast[b.dtype](2.)
36 | actual = f(a, b)
37 | expected = 1. + np.log(2.)
38 | assert np.allclose(actual, expected)
39 | print "SUCCESS!"
40 | 


--------------------------------------------------------------------------------
/ipnb/02_vector_mat_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | import theano.tensor as T
 4 | 
 5 | 
 6 | def make_vector():
 7 |     """
 8 |     Returns a new Theano vector.
 9 |     """
10 | 
11 |     return T.vector()
12 | 
13 | 
14 | def make_matrix():
15 |     """
16 |     Returns a new Theano matrix.
17 |     """
18 | 
19 |     return T.matrix()
20 | 
21 | 
22 | def elemwise_mul(a, b):
23 |     """
24 |     a: A theano matrix
25 |     b: A theano matrix
26 |     Returns the elementwise product of a and b
27 |     """
28 | 
29 |     return a * b
30 | 
31 | 
32 | def matrix_vector_mul(a, b):
33 |     """
34 |     a: A theano matrix
35 |     b: A theano vector
36 |     Returns the matrix-vector product of a and b
37 |     """
38 | 
39 |     return T.dot(a, b)
40 | 
41 | a = make_vector()
42 | b = make_vector()
43 | c = elemwise_mul(a, b)
44 | d = make_matrix()
45 | e = matrix_vector_mul(d, c)
46 | 
47 | f = function([a, b, d], e)
48 | 
49 | rng = np.random.RandomState([1, 2, 3])
50 | a_value = rng.randn(5).astype(a.dtype)
51 | b_value = rng.rand(5).astype(b.dtype)
52 | c_value = a_value * b_value
53 | d_value = rng.randn(5, 5).astype(d.dtype)
54 | expected = np.dot(d_value, c_value)
55 | 
56 | actual = f(a_value, b_value, d_value)
57 | 
58 | assert np.allclose(actual, expected)
59 | print "SUCCESS!"
60 | 


--------------------------------------------------------------------------------
/ipnb/03_tensor_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | import theano.tensor as T
 4 | 
 5 | 
 6 | def make_tensor(dim):
 7 |     """
 8 |     Returns a new Theano tensor with no broadcastable dimensions.
 9 |     dim: the total number of dimensions of the tensor.
10 |     """
11 | 
12 |     return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')()
13 | 
14 | 
15 | def broadcasted_add(a, b):
16 |     """
17 |     a: a 3D theano tensor
18 |     b: a 4D theano tensor
19 |     Returns c, a 4D theano tensor, where
20 | 
21 |     c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]
22 | 
23 |     for all i, j, k, l
24 |     """
25 | 
26 |     return a.dimshuffle(2, 'x', 1, 0) + b
27 | 
28 | 
29 | def partial_max(a):
30 |     """
31 |     a: a 4D theano tensor
32 | 
33 |     Returns b, a theano matrix, where
34 | 
35 |     b[i, j] = max_{k,l} a[i, k, l, j]
36 | 
37 |     for all i, j
38 |     """
39 | 
40 |     return a.max(axis=(1, 2))
41 | 
42 | a = make_tensor(3)
43 | b = make_tensor(4)
44 | c = broadcasted_add(a, b)
45 | d = partial_max(c)
46 | 
47 | f = function([a, b], d)
48 | 
49 | rng = np.random.RandomState([1, 2, 3])
50 | a_value = rng.randn(2, 2, 2).astype(a.dtype)
51 | b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)
52 | c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value
53 | expected = c_value.max(axis=1).max(axis=1)
54 | 
55 | actual = f(a_value, b_value)
56 | 
57 | assert np.allclose(actual, expected), (actual, expected)
58 | print "SUCCESS!"
59 | 


--------------------------------------------------------------------------------
/ipnb/11_function_soln.py:
--------------------------------------------------------------------------------
 1 | from theano import tensor as T
 2 | from theano import function
 3 | 
 4 | 
 5 | def evaluate(x, y, expr, x_value, y_value):
 6 |     """
 7 |     x: A theano variable
 8 |     y: A theano variable
 9 |     expr: A theano expression involving x and y
10 |     x_value: A numpy value
11 |     y_value: A numpy value
12 | 
13 |     Returns the value of expr when x_value is substituted for x
14 |     and y_value is substituted for y
15 |     """
16 | 
17 |     return function([x, y], expr)(x_value, y_value)
18 | 
19 | 
20 | x = T.iscalar()
21 | y = T.iscalar()
22 | z = x + y
23 | assert evaluate(x, y, z, 1, 2) == 3
24 | print "SUCCESS!"
25 | 


--------------------------------------------------------------------------------
/ipnb/12_shared_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano.compat.python2x import OrderedDict
 3 | from theano import function
 4 | from theano import shared
 5 | 
 6 | 
 7 | def make_shared(shape):
 8 |     """
 9 |     Returns a theano shared variable containing a tensor of the specified
10 |     shape.
11 |     You can use any value you want.
12 |     """
13 |     return shared(np.zeros(shape))
14 | 
15 | 
16 | def exchange_shared(a, b):
17 |     """
18 |     a: a theano shared variable
19 |     b: a theano shared variable
20 |     Uses get_value and set_value to swap the values stored in a and b
21 |     """
22 |     temp = a.get_value()
23 |     a.set_value(b.get_value())
24 |     b.set_value(temp)
25 | 
26 | 
27 | def make_exchange_func(a, b):
28 |     """
29 |     a: a theano shared variable
30 |     b: a theano shared variable
31 |     Returns f
32 |     where f is a theano function, that, when called, swaps the
33 |     values in a and b
34 |     f should not return anything
35 |     """
36 | 
37 |     updates = OrderedDict()
38 |     updates[a] = b
39 |     updates[b] = a
40 |     f = function([], updates=updates)
41 |     return f
42 | 
43 | 
44 | a = make_shared((5, 4, 3))
45 | assert a.get_value().shape == (5, 4, 3)
46 | b = make_shared((5, 4, 3))
47 | assert a.get_value().shape == (5, 4, 3)
48 | a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))
49 | b.set_value(np.ones((5, 4, 3), dtype=b.dtype))
50 | exchange_shared(a, b)
51 | assert np.all(a.get_value() == 1.)
52 | assert np.all(b.get_value() == 0.)
53 | f = make_exchange_func(a, b)
54 | rval = f()
55 | assert isinstance(rval, list)
56 | assert len(rval) == 0
57 | assert np.all(a.get_value() == 0.)
58 | assert np.all(b.get_value() == 1.)
59 | 
60 | print "SUCCESS!"
61 | 


--------------------------------------------------------------------------------
/ipnb/13_bug_soln.py:
--------------------------------------------------------------------------------
 1 | # The weird thing is that the function succeeds.
 2 | #
 3 | # This is weird because the two values passed in for x and y do not
 4 | # have the same shape, yet x is added with something that has the same
 5 | # shape as y (z).
 6 | #
 7 | # This happens because optimizations realize that z is always zero and
 8 | # therefore remove the addition, which removes the error.
 9 | #
10 | # The problem is more evident if FAST_COMPILE or DEBUG_MODE is used.
11 | 


--------------------------------------------------------------------------------
/ipnb/21_grad_soln.py:
--------------------------------------------------------------------------------
 1 | # Fill in the TODOs in this exercise, then run
 2 | # python 01_grad.py to see if your solution works!
 3 | #
 4 | from theano import tensor as T
 5 | 
 6 | 
 7 | def grad_sum(x, y, z):
 8 |     """
 9 |     x: A theano variable
10 |     y: A theano variable
11 |     z: A theano expression involving x and y
12 | 
13 |     Returns dz / dx + dz / dy
14 |     """
15 | 
16 |     return sum(T.grad(z, [x, y]))
17 | 
18 | x = T.scalar()
19 | y = T.scalar()
20 | z = x + y
21 | s = grad_sum(x, y, z)
22 | assert s.eval({x: 0, y: 0}) == 2
23 | print "SUCCESS!"
24 | 


--------------------------------------------------------------------------------
/ipnb/22_traverse_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano.gof import Variable
 3 | from theano import tensor as T
 4 | 
 5 | 
 6 | def arg_to_softmax(prob):
 7 |     """
 8 |     Oh no! Someone has passed you the probability output,
 9 |     "prob", of a softmax function, and you want the unnormalized
10 |     log probability--the argument to the softmax.
11 | 
12 |     Verify that prob really is the output of a softmax. Raise a
13 |     TypeError if it is not.
14 | 
15 |     If it is, return the argument to the softmax.
16 |     """
17 | 
18 |     if not isinstance(prob, Variable):
19 |         raise TypeError()
20 | 
21 |     if prob.owner is None:
22 |         raise TypeError()
23 | 
24 |     owner = prob.owner
25 | 
26 |     if not isinstance(owner.op, T.nnet.Softmax):
27 |         raise TypeError()
28 | 
29 |     rval, = owner.inputs
30 | 
31 |     return rval
32 | 
33 | if __name__ == "__main__":
34 |     x = np.ones((5, 4))
35 |     try:
36 |         arg_to_softmax(x)
37 |         raise Exception("You should have raised an error.")
38 |     except TypeError:
39 |         pass
40 | 
41 |     x = T.matrix()
42 |     try:
43 |         arg_to_softmax(x)
44 |         raise Exception("You should have raised an error.")
45 |     except TypeError:
46 |         pass
47 | 
48 |     y = T.nnet.sigmoid(x)
49 |     try:
50 |         arg_to_softmax(y)
51 |         raise Exception("You should have raised an error.")
52 |     except TypeError:
53 |         pass
54 | 
55 |     y = T.nnet.softmax(x)
56 |     rval = arg_to_softmax(y)
57 |     assert rval is x
58 | 
59 |     print "SUCCESS!"
60 | 


--------------------------------------------------------------------------------
/ipnb/31_debug_soln.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from theano import function
 3 | from theano import tensor as T
 4 | from theano import config
 5 | config.compute_test_value = 'raise'
 6 | a = T.vector()
 7 | a.tag.test_value = np.ones((3,)).astype(a.dtype)
 8 | b = T.log(a)
 9 | c = T.nnet.sigmoid(b)
10 | d = T.sqrt(c)
11 | e = T.concatenate((d, c), axis=0)
12 | f = b * c * d
13 | # This is the first bad line
14 | g = e + f
15 | h = g / c
16 | fn = function([a], h)
17 | fn(np.ones((3,)).astype(a.dtype))
18 | 


--------------------------------------------------------------------------------
/ipnb/Theano-basic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:33e931e54d686a2ab2c44bfcdc99a4383aecf754e0c80fef2e0ada6858e9b48c"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "markdown",
 13 |      "metadata": {},
 14 |      "source": [
 15 |       "All the exercices on this sheet work this way:\n",
 16 |       "\n",
 17 |       " 1. You have a cell with TODOs that raise errors with a description of what is needed. Do that.\n",
 18 |       " 2. Then run the cell(ctrl-enter) to execute it.\n",
 19 |       " 3. It should print \"Success\" at the end (there is validation code in the cell). If not, try again.\n",
 20 |       " 4. If you want to see the solution, execute the cell that start with \"%load\" after the exercice."
 21 |      ]
 22 |     },
 23 |     {
 24 |      "cell_type": "markdown",
 25 |      "metadata": {},
 26 |      "source": [
 27 |       "## 1. Builing expressions\n",
 28 |       "\n",
 29 |       "#### Excercice 1.1\n",
 30 |       "\n",
 31 |       "This exercice walks you through creating Theano variables and doing some computation with them."
 32 |      ]
 33 |     },
 34 |     {
 35 |      "cell_type": "code",
 36 |      "collapsed": false,
 37 |      "input": [
 38 |       "import numpy as np\n",
 39 |       "from theano import function\n",
 40 |       "raise NotImplementedError(\"TODO: add any other imports you need\")\n",
 41 |       "\n",
 42 |       "\n",
 43 |       "def make_scalar():\n",
 44 |       "    \"\"\"\n",
 45 |       "    Returns a new Theano scalar.\n",
 46 |       "    \"\"\"\n",
 47 |       "\n",
 48 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
 49 |       "\n",
 50 |       "\n",
 51 |       "def log(x):\n",
 52 |       "    \"\"\"\n",
 53 |       "    Returns the logarithm of a Theano scalar x.\n",
 54 |       "    \"\"\"\n",
 55 |       "\n",
 56 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
 57 |       "\n",
 58 |       "\n",
 59 |       "def add(x, y):\n",
 60 |       "    \"\"\"\n",
 61 |       "    Adds two theano scalars together and returns the result.\n",
 62 |       "    \"\"\"\n",
 63 |       "\n",
 64 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
 65 |       "    \n",
 66 |       "# The following code uses your code and tests it.\n",
 67 |       "a = make_scalar()\n",
 68 |       "b = make_scalar()\n",
 69 |       "c = log(b)\n",
 70 |       "d = add(a, c)\n",
 71 |       "f = function([a, b], d)\n",
 72 |       "a = np.cast[a.dtype](1.)\n",
 73 |       "b = np.cast[b.dtype](2.)\n",
 74 |       "actual = f(a, b)\n",
 75 |       "expected = 1. + np.log(2.)\n",
 76 |       "assert np.allclose(actual, expected)\n",
 77 |       "print \"SUCCESS!\"\n"
 78 |      ],
 79 |      "language": "python",
 80 |      "metadata": {},
 81 |      "outputs": []
 82 |     },
 83 |     {
 84 |      "cell_type": "code",
 85 |      "collapsed": false,
 86 |      "input": [
 87 |       "%load 01_scalar_soln.py"
 88 |      ],
 89 |      "language": "python",
 90 |      "metadata": {},
 91 |      "outputs": []
 92 |     },
 93 |     {
 94 |      "cell_type": "markdown",
 95 |      "metadata": {},
 96 |      "source": [
 97 |       "#### Exercice 1.2\n",
 98 |       "\n",
 99 |       "This exercice asks you to make Theano variables, elementwise multiplication and matrix/vector dot product.\n"
100 |      ]
101 |     },
102 |     {
103 |      "cell_type": "code",
104 |      "collapsed": false,
105 |      "input": [
106 |       "import numpy as np\n",
107 |       "from theano import function\n",
108 |       "raise NotImplementedError(\"TODO: add any other imports you need\")\n",
109 |       "\n",
110 |       "\n",
111 |       "def make_vector():\n",
112 |       "    \"\"\"\n",
113 |       "    Returns a new Theano vector.\n",
114 |       "    \"\"\"\n",
115 |       "\n",
116 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
117 |       "\n",
118 |       "\n",
119 |       "def make_matrix():\n",
120 |       "    \"\"\"\n",
121 |       "    Returns a new Theano matrix.\n",
122 |       "    \"\"\"\n",
123 |       "\n",
124 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
125 |       "\n",
126 |       "def elemwise_mul(a, b):\n",
127 |       "    \"\"\"\n",
128 |       "    a: A theano matrix\n",
129 |       "    b: A theano matrix\n",
130 |       "    Returns the elementwise product of a and b\n",
131 |       "    \"\"\"\n",
132 |       "\n",
133 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
134 |       "\n",
135 |       "\n",
136 |       "def matrix_vector_mul(a, b):\n",
137 |       "    \"\"\"\n",
138 |       "    a: A theano matrix\n",
139 |       "    b: A theano vector\n",
140 |       "    Returns the matrix-vector product of a and b\n",
141 |       "    \"\"\"\n",
142 |       "\n",
143 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
144 |       "\n",
145 |       "# The following code uses your code and tests it.\n",
146 |       "a = make_vector()\n",
147 |       "b = make_vector()\n",
148 |       "c = elemwise_mul(a, b)\n",
149 |       "d = make_matrix()\n",
150 |       "e = matrix_vector_mul(d, c)\n",
151 |       "\n",
152 |       "f = function([a, b, d], e)\n",
153 |       "\n",
154 |       "rng = np.random.RandomState([1, 2, 3])\n",
155 |       "a_value = rng.randn(5).astype(a.dtype)\n",
156 |       "b_value = rng.rand(5).astype(b.dtype)\n",
157 |       "c_value = a_value * b_value\n",
158 |       "d_value = rng.randn(5, 5).astype(d.dtype)\n",
159 |       "expected = np.dot(d_value, c_value)\n",
160 |       "\n",
161 |       "actual = f(a_value, b_value, d_value)\n",
162 |       "assert np.allclose(actual, expected)\n",
163 |       "print \"SUCCESS!\""
164 |      ],
165 |      "language": "python",
166 |      "metadata": {},
167 |      "outputs": []
168 |     },
169 |     {
170 |      "cell_type": "code",
171 |      "collapsed": false,
172 |      "input": [
173 |       "%load 02_vector_mat_soln.py"
174 |      ],
175 |      "language": "python",
176 |      "metadata": {},
177 |      "outputs": []
178 |     },
179 |     {
180 |      "cell_type": "markdown",
181 |      "metadata": {},
182 |      "source": [
183 |       "#### Exercice 1.3\n",
184 |       "\n",
185 |       "This exercices asks you to create a tensor variable, do broadcastable additions and compute the max over part of a tensor."
186 |      ]
187 |     },
188 |     {
189 |      "cell_type": "code",
190 |      "collapsed": false,
191 |      "input": [
192 |       "import numpy as np\n",
193 |       "from theano import function\n",
194 |       "raise NotImplementedError(\"TODO: add any other imports you need\")\n",
195 |       "\n",
196 |       "\n",
197 |       "def make_tensor(dim):\n",
198 |       "    \"\"\"\n",
199 |       "    Returns a new Theano tensor with no broadcastable dimensions.\n",
200 |       "    dim: the total number of dimensions of the tensor.\n",
201 |       "    (You can use any dtype you like)\n",
202 |       "    \"\"\"\n",
203 |       "\n",
204 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
205 |       "\n",
206 |       "\n",
207 |       "def broadcasted_add(a, b):\n",
208 |       "    \"\"\"\n",
209 |       "    a: a 3D theano tensor\n",
210 |       "    b: a 4D theano tensor\n",
211 |       "    Returns c, a 4D theano tensor, where\n",
212 |       "\n",
213 |       "    c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]\n",
214 |       "\n",
215 |       "    for all i, j, k, l\n",
216 |       "    \"\"\"\n",
217 |       "\n",
218 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
219 |       "\n",
220 |       "def partial_max(a):\n",
221 |       "    \"\"\"\n",
222 |       "    a: a 4D theano tensor\n",
223 |       "\n",
224 |       "    Returns b, a theano matrix, where\n",
225 |       "\n",
226 |       "    b[i, j] = max_{k,l} a[i, k, l, j]\n",
227 |       "\n",
228 |       "    for all i, j\n",
229 |       "    \"\"\"\n",
230 |       "\n",
231 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
232 |       "\n",
233 |       "# The following code uses your code and tests it.\n",
234 |       "a = make_tensor(3)\n",
235 |       "b = make_tensor(4)\n",
236 |       "c = broadcasted_add(a, b)\n",
237 |       "d = partial_max(c)\n",
238 |       "\n",
239 |       "f = function([a, b], d)\n",
240 |       "\n",
241 |       "rng = np.random.RandomState([1, 2, 3])\n",
242 |       "a_value = rng.randn(2, 2, 2).astype(a.dtype)\n",
243 |       "b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)\n",
244 |       "c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value\n",
245 |       "expected = c_value.max(axis=1).max(axis=1)\n",
246 |       "\n",
247 |       "actual = f(a_value, b_value)\n",
248 |       "\n",
249 |       "assert np.allclose(actual, expected), (actual, expected)\n",
250 |       "print \"SUCCESS!\""
251 |      ],
252 |      "language": "python",
253 |      "metadata": {},
254 |      "outputs": []
255 |     },
256 |     {
257 |      "cell_type": "code",
258 |      "collapsed": false,
259 |      "input": [
260 |       "%load 03_tensor_soln.py"
261 |      ],
262 |      "language": "python",
263 |      "metadata": {},
264 |      "outputs": []
265 |     },
266 |     {
267 |      "cell_type": "markdown",
268 |      "metadata": {},
269 |      "source": [
270 |       "## 2. Compiling and Running\n",
271 |       "\n",
272 |       "#### Exercice 2.1\n",
273 |       "\n",
274 |       "This exercice asks you to compile a Theano function and call it. "
275 |      ]
276 |     },
277 |     {
278 |      "cell_type": "code",
279 |      "collapsed": false,
280 |      "input": [
281 |       "from theano import tensor as T\n",
282 |       "raise NotImplementedError(\"TODO: add any other imports you need\")\n",
283 |       "\n",
284 |       "\n",
285 |       "def evaluate(x, y, expr, x_value, y_value):\n",
286 |       "    \"\"\"\n",
287 |       "    x: A theano variable\n",
288 |       "    y: A theano variable\n",
289 |       "    expr: A theano expression involving x and y\n",
290 |       "    x_value: A numpy value\n",
291 |       "    y_value: A numpy value\n",
292 |       "\n",
293 |       "    Returns the value of expr when x_value is substituted for x\n",
294 |       "    and y_value is substituted for y\n",
295 |       "    \"\"\"\n",
296 |       "\n",
297 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
298 |       "\n",
299 |       "\n",
300 |       "# The following code use your code and test it.\n",
301 |       "x = T.iscalar()\n",
302 |       "y = T.iscalar()\n",
303 |       "z = x + y\n",
304 |       "assert evaluate(x, y, z, 1, 2) == 3\n",
305 |       "print \"SUCCESS!\""
306 |      ],
307 |      "language": "python",
308 |      "metadata": {},
309 |      "outputs": []
310 |     },
311 |     {
312 |      "cell_type": "code",
313 |      "collapsed": false,
314 |      "input": [
315 |       "%load 11_function_soln.py"
316 |      ],
317 |      "language": "python",
318 |      "metadata": {},
319 |      "outputs": []
320 |     },
321 |     {
322 |      "cell_type": "markdown",
323 |      "metadata": {},
324 |      "source": [
325 |       "#### Exercice 2.2\n",
326 |       "\n",
327 |       "This exercice makes you use shared variables. You must create some and update them by swapping 2 shared variables value."
328 |      ]
329 |     },
330 |     {
331 |      "cell_type": "code",
332 |      "collapsed": false,
333 |      "input": [
334 |       "import numpy as np\n",
335 |       "raise NotImplementedError(\"TODO: add any other imports you need\")\n",
336 |       "\n",
337 |       "\n",
338 |       "def make_shared(shape):\n",
339 |       "    \"\"\"\n",
340 |       "    Returns a theano shared variable containing a tensor of the specified\n",
341 |       "    shape.\n",
342 |       "    You can use any value you want.\n",
343 |       "    \"\"\"\n",
344 |       "    raise NotImplementedError(\"TODO: implement the function\")\n",
345 |       "\n",
346 |       "\n",
347 |       "def exchange_shared(a, b):\n",
348 |       "    \"\"\"\n",
349 |       "    a: a theano shared variable\n",
350 |       "    b: a theano shared variable\n",
351 |       "    Uses get_value and set_value to swap the values stored in a and b\n",
352 |       "    \"\"\"\n",
353 |       "    raise NotImplementedError(\"TODO: implement the function\")\n",
354 |       "\n",
355 |       "\n",
356 |       "def make_exchange_func(a, b):\n",
357 |       "    \"\"\"\n",
358 |       "    a: a theano shared variable\n",
359 |       "    b: a theano shared variable\n",
360 |       "    Returns f\n",
361 |       "    where f is a theano function, that, when called, swaps the\n",
362 |       "    values in a and b\n",
363 |       "    f should not return anything\n",
364 |       "    \"\"\"\n",
365 |       "    raise NotImplementedError(\"TODO: implement the function\")\n",
366 |       "\n",
367 |       "\n",
368 |       "# The following code use your code and test it.\n",
369 |       "a = make_shared((5, 4, 3))\n",
370 |       "assert a.get_value().shape == (5, 4, 3)\n",
371 |       "b = make_shared((5, 4, 3))\n",
372 |       "assert a.get_value().shape == (5, 4, 3)\n",
373 |       "a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))\n",
374 |       "b.set_value(np.ones((5, 4, 3), dtype=b.dtype))\n",
375 |       "exchange_shared(a, b)\n",
376 |       "assert np.all(a.get_value() == 1.)\n",
377 |       "assert np.all(b.get_value() == 0.)\n",
378 |       "f = make_exchange_func(a, b)\n",
379 |       "rval = f()\n",
380 |       "assert isinstance(rval, list)\n",
381 |       "assert len(rval) == 0\n",
382 |       "assert np.all(a.get_value() == 0.)\n",
383 |       "assert np.all(b.get_value() == 1.)\n",
384 |       "\n",
385 |       "print \"SUCCESS!\""
386 |      ],
387 |      "language": "python",
388 |      "metadata": {},
389 |      "outputs": []
390 |     },
391 |     {
392 |      "cell_type": "code",
393 |      "collapsed": false,
394 |      "input": [
395 |       "%load 12_shared_soln.py"
396 |      ],
397 |      "language": "python",
398 |      "metadata": {},
399 |      "outputs": []
400 |     },
401 |     {
402 |      "cell_type": "markdown",
403 |      "metadata": {},
404 |      "source": [
405 |       "#### Exercice 2.3\n",
406 |       "\n",
407 |       "Something weird happens when you run this code, find the problem. Explain what is happening.\n",
408 |       "\n",
409 |       "Hint: some compilation modes make the problem more obvious than others."
410 |      ]
411 |     },
412 |     {
413 |      "cell_type": "code",
414 |      "collapsed": false,
415 |      "input": [
416 |       "import numpy as np\n",
417 |       "from theano import function\n",
418 |       "from theano import tensor as T\n",
419 |       "x = T.vector()\n",
420 |       "y = T.vector()\n",
421 |       "z = T.zeros_like(y)\n",
422 |       "a = x + z\n",
423 |       "f = function([x, y], a)\n",
424 |       "output = f(np.zeros((1,), dtype=x.dtype), np.zeros((2,), dtype=y.dtype))"
425 |      ],
426 |      "language": "python",
427 |      "metadata": {},
428 |      "outputs": []
429 |     },
430 |     {
431 |      "cell_type": "code",
432 |      "collapsed": false,
433 |      "input": [
434 |       "%load 13_bug_soln.py"
435 |      ],
436 |      "language": "python",
437 |      "metadata": {},
438 |      "outputs": []
439 |     },
440 |     {
441 |      "cell_type": "markdown",
442 |      "metadata": {},
443 |      "source": [
444 |       "## 3. Modifying Graphs\n",
445 |       "\n",
446 |       "#### Exercice 3.1\n",
447 |       "\n",
448 |       "This exercice makes you use the Theano symbolic grad."
449 |      ]
450 |     },
451 |     {
452 |      "cell_type": "code",
453 |      "collapsed": false,
454 |      "input": [
455 |       "from theano import tensor as T\n",
456 |       "\n",
457 |       "\n",
458 |       "def grad_sum(x, y, z):\n",
459 |       "    \"\"\"\n",
460 |       "    x: A theano variable\n",
461 |       "    y: A theano variable\n",
462 |       "    z: A theano expression involving x and y\n",
463 |       "\n",
464 |       "    Returns dz / dx + dz / dy\n",
465 |       "    \"\"\"\n",
466 |       "    raise NotImplementedError(\"TODO: implement this function.\")\n",
467 |       "\n",
468 |       "\n",
469 |       "# The following code use your code and test it.\n",
470 |       "x = T.scalar()\n",
471 |       "y = T.scalar()\n",
472 |       "z = x + y\n",
473 |       "s = grad_sum(x, y, z)\n",
474 |       "assert s.eval({x: 0, y: 0}) == 2\n",
475 |       "print \"SUCCESS!\""
476 |      ],
477 |      "language": "python",
478 |      "metadata": {},
479 |      "outputs": []
480 |     },
481 |     {
482 |      "cell_type": "code",
483 |      "collapsed": false,
484 |      "input": [
485 |       "%load 21_grad_soln.py"
486 |      ],
487 |      "language": "python",
488 |      "metadata": {},
489 |      "outputs": []
490 |     },
491 |     {
492 |      "cell_type": "markdown",
493 |      "metadata": {},
494 |      "source": [
495 |       "#### Exercice 3.2\n",
496 |       "\n",
497 |       "This exercice is here to show you how to navigate in a Theano graph. You will need to find the inputs used to produce\n",
498 |       "some computation."
499 |      ]
500 |     },
501 |     {
502 |      "cell_type": "code",
503 |      "collapsed": false,
504 |      "input": [
505 |       "import numpy as np\n",
506 |       "from theano import tensor as T\n",
507 |       "raise NotImplementedError(\"Add any imports you need.\")\n",
508 |       "\n",
509 |       "\n",
510 |       "def arg_to_softmax(prob):\n",
511 |       "    \"\"\"\n",
512 |       "    Oh no! Someone has passed you the probability output,\n",
513 |       "    \"prob\", of a softmax function, and you want the unnormalized\n",
514 |       "    log probability--the argument to the softmax.\n",
515 |       "\n",
516 |       "    Verify that prob really is the output of a softmax. Raise a\n",
517 |       "    TypeError if it is not.\n",
518 |       "\n",
519 |       "    If it is, return the argument to the softmax.\n",
520 |       "    \"\"\"\n",
521 |       "\n",
522 |       "    raise NotImplementedError(\"Implement this function.\")\n",
523 |       "\n",
524 |       "\n",
525 |       "x = np.ones((5, 4))\n",
526 |       "try:\n",
527 |       "    arg_to_softmax(x)\n",
528 |       "    raise Exception(\"You should have raised an error.\")\n",
529 |       "except TypeError:\n",
530 |       "    pass\n",
531 |       "\n",
532 |       "x = T.matrix()\n",
533 |       "try:\n",
534 |       "    arg_to_softmax(x)\n",
535 |       "    raise Exception(\"You should have raised an error.\")\n",
536 |       "except TypeError:\n",
537 |       "    pass\n",
538 |       "\n",
539 |       "y = T.nnet.sigmoid(x)\n",
540 |       "try:\n",
541 |       "    arg_to_softmax(y)\n",
542 |       "    raise Exception(\"You should have raised an error.\")\n",
543 |       "except TypeError:\n",
544 |       "    pass\n",
545 |       "\n",
546 |       "y = T.nnet.softmax(x)\n",
547 |       "rval = arg_to_softmax(y)\n",
548 |       "assert rval is x\n",
549 |       "\n",
550 |       "print \"SUCCESS!\""
551 |      ],
552 |      "language": "python",
553 |      "metadata": {},
554 |      "outputs": []
555 |     },
556 |     {
557 |      "cell_type": "code",
558 |      "collapsed": false,
559 |      "input": [
560 |       "%load 22_traverse_soln.py"
561 |      ],
562 |      "language": "python",
563 |      "metadata": {},
564 |      "outputs": []
565 |     },
566 |     {
567 |      "cell_type": "markdown",
568 |      "metadata": {},
569 |      "source": [
570 |       "## 4. Debugging\n",
571 |       "\n",
572 |       "#### Exercice 4.1\n",
573 |       "\n",
574 |       "The code in the next cell has a bug.  Run the cell to see it.\n",
575 |       "\n",
576 |       "Use Theano flags or extra parameters to function() to find the cause.\n",
577 |       "\n",
578 |       "Don't try to find the bug by inspection of prints, the point of the exercice is to get you to work with the theano debugging tools that will be required for more complex code.\n",
579 |       "\n",
580 |       "To modify the environement for a cell use the `%env` magic command like this:\n",
581 |       "\n",
582 |       "    %env THEANO_FLAGS=floatX=float32\n",
583 |       "\n",
584 |       "You will have to restart the ipython kernel from the Kernel menu above to get the enviroment changes to work."
585 |      ]
586 |     },
587 |     {
588 |      "cell_type": "code",
589 |      "collapsed": false,
590 |      "input": [
591 |       "import numpy as np\n",
592 |       "from theano import function\n",
593 |       "from theano import tensor as T\n",
594 |       "a = T.vector()\n",
595 |       "b = T.log(a)\n",
596 |       "c = T.nnet.sigmoid(b)\n",
597 |       "d = T.sqrt(c)\n",
598 |       "e = T.concatenate((d, c), axis=0)\n",
599 |       "f = b * c * d\n",
600 |       "g = e + f\n",
601 |       "h = g / c\n",
602 |       "fn = function([a], h)\n",
603 |       "fn(np.ones((3,)).astype(a.dtype))"
604 |      ],
605 |      "language": "python",
606 |      "metadata": {},
607 |      "outputs": []
608 |     },
609 |     {
610 |      "cell_type": "code",
611 |      "collapsed": false,
612 |      "input": [
613 |       "%load 31_debug_soln.py"
614 |      ],
615 |      "language": "python",
616 |      "metadata": {},
617 |      "outputs": []
618 |     }
619 |    ],
620 |    "metadata": {}
621 |   }
622 |  ]
623 | }


--------------------------------------------------------------------------------
/opt.py:
--------------------------------------------------------------------------------
 1 | from scalmulop import ScalMulV1
 2 | from doubleop import DoubleOp
 3 | from doublecop import DoubleCOp
 4 | from doublec import DoubleC
 5 | from doublecgpu import DoubleCGpu
 6 | 
 7 | from theano.gof import local_optimizer
 8 | from theano.tensor.opt import register_specialize
 9 | from theano.gpuarray.opt import (register_opt, op_lifter,
10 |                                  register_opt2)
11 | 
12 | 
13 | @register_specialize
14 | @local_optimizer([ScalMulV1])
15 | def local_scalmul_double(node):
16 |     if not (isinstance(node.op, ScalMulV1) and
17 |                 node.op.scal == 2):
18 |         return False
19 | 
20 |     return [DoubleOp()(node.inputs[0])]
21 | 
22 | 
23 | @register_opt('fast_compile')
24 | @op_lifter([DoubleOp, DoubleC, DoubleCOp])
25 | @register_opt2([DoubleOp, DoubleC, DoubleCOp],
26 |                'fast_compile')
27 | def local_scalmul_double_gpu(op, context_name, inputs,
28 |                              outputs):
29 |     return DoubleCGpu
30 | 


--------------------------------------------------------------------------------
/params.py:
--------------------------------------------------------------------------------
 1 | from theano import Op
 2 | 
 3 | class MyOp(Op):
 4 |     params_type = # a params type here
 5 | 
 6 |     def __init__(self, ...):
 7 |         # Get some params
 8 | 
 9 |     # signature change
10 |     def perform(self, node, inputs, out_storage, params):
11 |         # do something
12 | 
13 |     def get_params(self, node):
14 |         # Return a params object
15 | 


--------------------------------------------------------------------------------
/presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abergeron/ccw_tutorial_theano/f92aa8edbb567c9ac09149a382858f841a4a7749/presentation.pdf


--------------------------------------------------------------------------------
/presentation.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[utf8x,hyperref={pdfpagelabels=false}]{beamer}
  2 | 
  3 | \usepackage[utf8x]{inputenc}
  4 | \usepackage[OT1]{fontenc}
  5 | \usepackage{graphicx}
  6 | \usepackage{amsmath}
  7 | \usepackage{listings}
  8 | \usepackage{hyperref}
  9 | \usepackage{xcolor}
 10 | \usepackage{tikz}
 11 | \usetikzlibrary{shapes.arrows}
 12 | %\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}}
 13 | 
 14 | 
 15 | \usetheme{Malmoe}  % Now it's a beamer presentation with the lisa theme!
 16 | \usecolortheme{beaver}
 17 | \setbeamertemplate{footline}[page number]
 18 | \setbeamertemplate{navigation symbols}{}
 19 | 
 20 | \lstloadlanguages{Python}
 21 | 
 22 | \definecolor{darkgreen}{RGB}{0,93,21}
 23 | \definecolor{greenblue}{RGB}{40,110,126}
 24 | \definecolor{lightgray}{RGB}{246,246,246}
 25 | \definecolor{bordergray}{RGB}{193,193,193}
 26 | \definecolor{lightblue}{RGB}{0,114,168}
 27 | \definecolor{methblue}{RGB}{0,31,108}
 28 | 
 29 | \newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}}
 30 | 
 31 | \mode<presentation>
 32 | 
 33 | \title{Introduction to Theano}
 34 | \author{%
 35 | \footnotesize
 36 | Arnaud Bergeron \newline
 37 | (slides adapted by Frédéric Bastien from slides by Ian G.) \newline
 38 | (further adapted by Arnaud Bergeron)
 39 | }
 40 | \date{February 26, 2015}
 41 | 
 42 | \lstdefinestyle{theano}{
 43 | language=Python,
 44 | basicstyle=\fontfamily{pcr}\selectfont\footnotesize,
 45 | keywordstyle=\color{darkgreen}\bfseries,
 46 | commentstyle=\color{greenblue}\itshape,
 47 | %commentstyle=\color{blue}\itshape,
 48 | stringstyle=\color{violet},
 49 | showstringspaces=false,
 50 | tabsize=4,
 51 | backgroundcolor=\color{lightgray},
 52 | frame=single,
 53 | emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}},
 54 | emph={[3]self},emphstyle={[3]\color{darkgreen}},
 55 | moredelim=**[is][{\color{red}}]{`}{`}
 56 | }
 57 | 
 58 | % We don't have code till the end of the file.
 59 | \lstdefinestyle{output}{
 60 | language={},
 61 | basicstyle=\ttfamily\footnotesize,
 62 | backgroundcolor=\color{white},
 63 | frame={},
 64 | breaklines=true,
 65 | emph={[2]},
 66 | emph={[3]},
 67 | }
 68 | 
 69 | \lstset{style=theano}
 70 | 
 71 | \newcommand{\code}[1]{\lstinline[emph={[2]}]|#1|}
 72 | 
 73 | \begin{document}
 74 | 
 75 | \begin{frame}[plain]
 76 |  \titlepage
 77 | % \vspace{-5em}
 78 | % \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png}
 79 | % \hfill
 80 | % \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop}
 81 | \end{frame}
 82 | 
 83 | \section{Outline}
 84 | \begin{frame}{High level}\setcounter{page}{1}
 85 |   \begin{itemize}
 86 |   \item Overview of library (3 min)
 87 |   \item Building expressions (30 min)
 88 |   \item Compiling and running expressions (30 min)
 89 |   \item Modifying expressions (25 min)
 90 |   \item Debugging (30 min)
 91 |   \item Citing Theano (2 min)
 92 |   \end{itemize}
 93 | \end{frame}
 94 | 
 95 | 
 96 | \begin{frame}{Overview of Library}
 97 |   Theano is many things
 98 |   \begin{itemize}
 99 |   \item Language
100 |   \item Compiler
101 |   \item Python library
102 |   \end{itemize}
103 | \end{frame}
104 | 
105 | \begin{frame}{Overview}
106 |   Theano language:
107 |   \begin{itemize}
108 |   \item Operations on scalar, vector, matrix, tensor, and sparse variables
109 |   \item Linear algebra
110 |   \item Element-wise nonlinearities
111 |   \item Convolution
112 |   \item Extensible
113 |   \end{itemize}
114 | \end{frame}
115 | 
116 | \begin{frame}[fragile]{Overview}
117 |   Using Theano:
118 |   \begin{itemize}
119 |   \item define expression $f(x,y) = x + y$
120 | \begin{lstlisting}
121 | >>> z = x + y
122 | \end{lstlisting}
123 |   \item compile expression
124 | \begin{lstlisting}
125 | >>> f = theano.function([x, y], z)
126 | \end{lstlisting}
127 |   \item execute expression
128 | \begin{lstlisting}
129 | >>> f(1, 2)
130 | 3
131 | \end{lstlisting}
132 |   \end{itemize}
133 | \end{frame}
134 | 
135 | \section{Building}
136 | 
137 | \begin{frame}{Building expressions}
138 |   \begin{itemize}
139 |   \item Scalars
140 |   \item Vectors
141 |   \item Matrices
142 |   \item Tensors
143 |   \item Broadcasting
144 |   \item Reduction
145 |   \item Dimshuffle
146 |   \end{itemize}
147 | \end{frame}
148 | 
149 | \begin{frame}[fragile]{Scalar math}
150 | \begin{lstlisting}
151 | from theano import tensor as T
152 | x = T.scalar()
153 | y = T.scalar()
154 | z = x+y
155 | w = z*x
156 | a = T.sqrt(w)
157 | b = T.exp(a)
158 | c = a ** b
159 | d = T.log(c)
160 | \end{lstlisting}
161 | \end{frame}
162 | 
163 | \begin{frame}[fragile]{Vector math}
164 | \begin{lstlisting}
165 | from theano import tensor as T
166 | x = T.vector()
167 | y = T.vector()
168 | # Scalar math applied elementwise
169 | a = x * y
170 | # Vector dot product
171 | b = T.dot(x, y)
172 | # Broadcasting
173 | c = a + b
174 | \end{lstlisting}
175 | \end{frame}
176 | 
177 | \begin{frame}[fragile]{Matrix math}
178 | \begin{lstlisting}
179 | from theano import tensor as T
180 | x = T.matrix()
181 | y = T.matrix()
182 | a = T.vector()
183 | # Matrix-matrix product
184 | b = T.dot(x, y)
185 | # Matrix-vector product
186 | c = T.dot(x, a)
187 | \end{lstlisting}
188 | \end{frame}
189 | 
190 | \begin{frame}[fragile]{Tensors}
191 |    \begin{itemize}
192 |     \item Dimensionality defined by length of ``broadcastable'' argument
193 |     \item Can add (or do other elemwise op) on two
194 |       tensors with same dimensionality
195 |     \item Duplicate tensors along broadcastable axes to
196 |       make size match
197 |   \end{itemize}
198 | \begin{lstlisting}
199 | from theano import tensor as T
200 | tensor3 = T.TensorType(
201 |     broadcastable=(False, False, False),
202 |     dtype='float32')
203 | x = tensor3()
204 | \end{lstlisting}
205 | \end{frame}
206 | 
207 | \begin{frame}{Broadcasting}
208 | \begin{tabular}{lcccccccl}
209 |     &
210 |     \begin{tabular}{cc}
211 |         1 & 2 \\
212 |         3 & 4 \\
213 |         5 & 6 \\
214 |     \end{tabular} &
215 |     + &
216 |     \begin{tabular}{cc}
217 |         1 & 2 \\
218 |     \end{tabular} &
219 |     = &
220 |     \begin{tabular}{cc}
221 |         1 & 2 \\
222 |         3 & 4 \\
223 |         5 & 6 \\
224 |     \end{tabular} &
225 |     + &
226 |     \begin{tabular}{cc}
227 |         1 & 2 \\
228 |         \color{blue} 1 & \color{blue} 2 \\
229 |         \color{blue} 1 & \color{blue} 2 \\
230 |     \end{tabular} &
231 |     \hspace{-1.3em}
232 |     \tikz[baseline={([yshift=-.5ex]current bounding box.center)}]{
233 |         \draw [->, very thick] (0,0) -- (0,-1.2);
234 |     } \\[1.5em]
235 |     shape: & (3, 2) & & (2,) & & (3, 2) & & ({\color{blue}3}, 2) &
236 | \end{tabular}
237 | \vfill
238 | \begin{itemize}
239 |     \item Pad shape with 1s on the left : $(2,) \equiv (1,2)$
240 |     \item Two dimensions are compatible when they have the same length or one of them is broadcastable
241 |     \item broadcastable dimensions must have a length of 1
242 |     \item Adding tensors of shape (8, 1, 6, 1) and (7, 1, 5) gives a tensor of shape (8, 7, 6, 5)
243 | \end{itemize}
244 | \end{frame}
245 | 
246 | \begin{frame}[fragile]{Reductions}
247 | \begin{lstlisting}
248 | from theano import tensor as T
249 | tensor3 = T.TensorType(
250 |     broadcastable=(False, False, False),
251 |     dtype='float32')
252 | x = tensor3()
253 | total = x.sum()
254 | marginals = x.sum(axis=(0, 2))
255 | mx = x.max(axis=1)
256 | \end{lstlisting}
257 | \end{frame}
258 | 
259 | \begin{frame}[fragile]{Dimshuffle}
260 | \begin{lstlisting}
261 | from theano import tensor as T
262 | tensor3 = T.TensorType(
263 |     broadcastable=(False, False, False),
264 |     dtype='float32')
265 | x = tensor3()
266 | y = x.dimshuffle((2, 1, 0))
267 | a = T.matrix()
268 | b = a.T
269 | # Same as b
270 | c = a.dimshuffle((0, 1))
271 | # Adding to larger tensor
272 | d = a.dimshuffle((0, 1, 'x'))
273 | e = a + d
274 | \end{lstlisting}
275 | \end{frame}
276 | 
277 | \begin{frame}{Exercices}
278 | Work through the "Building Expressions" section of the ipython notebook.
279 | \end{frame}
280 | 
281 | \section{Compiling/Running}
282 | \begin{frame}{Compiling and running expression}
283 |   \begin{itemize}
284 |   \item \code{theano.function}
285 |   \item shared variables and updates
286 |   \item compilation modes
287 |   \item compilation for GPU
288 |   \item optimizations
289 |   \end{itemize}
290 | \end{frame}
291 | 
292 | \begin{frame}[fragile]{\code{theano.function}}
293 | 
294 | \begin{lstlisting}
295 | >>> from theano import tensor as T
296 | >>> x = T.scalar()
297 | >>> y = T.scalar()
298 | >>> from theano import function
299 | >>> # first arg is list of symbolic inputs
300 | >>> # second arg is symbolic output
301 | >>> f = function([x, y], x + y)
302 | >>> # Call it with numerical values
303 | >>> # Get a numerical output
304 | >>> f(1., 2.)
305 | array(3.0)
306 | \end{lstlisting}
307 | \end{frame}
308 | 
309 | \begin{frame}{Shared variables}
310 |   \begin{itemize}
311 |   \item It’s hard to do much with purely functional programming
312 |   \item \emph{shared variables} add just a little bit of imperative programming
313 |   \item A \emph{shared variable} is a buffer that stores a numerical value for a Theano variable
314 |   \item Can write to as many shared variables as you want, once each, at the end of the function
315 |   \item  Modify outside Theano function with \code{get_value()} and \code{set_value()} methods.
316 |   \end{itemize}
317 | \end{frame}
318 | 
319 | \begin{frame}[fragile]{Shared variable example}
320 | \begin{lstlisting}
321 | >>> from theano import shared
322 | >>> x = shared(0.)
323 | # Can also use a dict for more complex code
324 | >>> updates = [(x,  x + 1)]
325 | >>> f = function([], updates=updates)
326 | >>> f()
327 | >>> x.get_value()
328 | 1.0
329 | >>> x.set_value(100.)
330 | >>> f()
331 | >>> x.get_value()
332 | 101.0
333 | \end{lstlisting}
334 | \end{frame}
335 | 
336 | \begin{frame}{Which dict?}
337 |   \begin{itemize}
338 |   \item Use theano.compat.python2x.OrderedDict
339 |   \item Not collections.OrderedDict
340 |   \begin{itemize}
341 |   \item This isn’t available in older versions of python, and will limit the portability of your code.
342 |   \end{itemize}
343 |   \item Not \code{\{\}} aka dict
344 |   \begin{itemize}
345 |   \item The iteration order of this built-in class is not deterministic so if Theano accepted this, the same script could compile different C programs each time you run it.
346 |   \end{itemize}
347 |   \end{itemize}
348 | \end{frame}
349 | 
350 | \begin{frame}{Compilation modes}
351 |   \begin{itemize}
352 |   \item Can compile in different modes to get different kinds of programs
353 |   \item Can specify these modes very precisely with arguments to \code{theano.function()}
354 |   \item Can use a few quick presets with environment variable flags
355 |   \end{itemize}
356 | \end{frame}
357 | 
358 | \begin{frame}{Example preset compilation modes}
359 |   \begin{description}[FAST\_RUN]
360 |   \item[FAST\_RUN] Default. Spends a lot of time on
361 | compilation to get an executable that runs
362 | fast.
363 |   \item[FAST\_COMPILE] Doesn’t spend much time compiling.
364 | Executable usually uses python
365 | instead of compiled C code. Runs slow.
366 |   \item[DEBUG\_MODE] Adds lots of checks.
367 | Raises error messages in situations other modes don't check for.
368 |   \end{description}
369 | \end{frame}
370 | 
371 | \begin{frame}{Compilation for GPU}
372 |   \begin{itemize}
373 |   \item Theano's current back-end only supports 32 bit on GPU
374 |   \item CUDA supports 64 bit, but is slow in gamer card
375 |   \item \code{T.fscalar}, \code{T.fvector}, \code{T.fmatrix} are all 32 bit
376 |   \item \code{T.scalar}, \code{T.vector}, \code{T.matrix} resolve to 32 or 64 bit depending on theano’s floatX flag
377 |   \item floatX is float64 by default, set it to float32
378 |   \item Set the device flag to gpu (or a specific gpu, like gpu0)
379 |   \item Optional: warn\_float64=\{'ignore', 'warn', 'raise', 'pdb'\}
380 |   \end{itemize}
381 | \end{frame}
382 | 
383 | \begin{frame}{Optimizations}
384 |   \begin{itemize}
385 |   \item Theano changes the symbolic expressions
386 |     you write before converting them to C code
387 |   \item It makes them faster
388 |   \begin{itemize}
389 |   \item $(x+y)+(x+y) \to 2\times(x + y)$
390 |   \end{itemize}
391 |   \item It makes them more stable
392 |   \begin{itemize}
393 |   \item $\exp(a)/\sum{\exp(a)} \to \operatorname{softmax}(a)$
394 |   \end{itemize}
395 |   \end{itemize}
396 | \end{frame}
397 | 
398 | \begin{frame}[fragile]{Optimizations (2)}
399 | Sometimes optimizations discard error checking and produce incorrect output rather than an exception.
400 | \begin{lstlisting}
401 | >>> x = T.scalar()
402 | >>> f = function([x], x/x)
403 | >>> f(0.)
404 | array(1.0)
405 | \end{lstlisting}
406 | \end{frame}
407 | 
408 | \begin{frame}{Exercises}
409 | Work through the "Compiling and Running" section of the ipython notebook.
410 | \end{frame}
411 | 
412 | \section{Modifying expressions}
413 | \begin{frame}{Modifying expressions}
414 |   \begin{itemize}
415 |   \item The \code{grad()} method
416 |   \item Variable nodes
417 |   \item Types
418 |   \item Ops
419 |   \item Apply nodes
420 |   \end{itemize}
421 | \end{frame}
422 | 
423 | \begin{frame}[fragile]{The \code{grad()} method}
424 | \begin{lstlisting}
425 | >>> x = T.scalar('x')
426 | >>> y = 2. * x
427 | >>> g = T.grad(y, x)
428 | >>> from theano.printing import min_informative_str
429 | # Print the unoptimized graph
430 | >>> print min_informative_str(g)
431 | A. Elemwise{mul}
432 |  B. Elemwise{second,no_inplace}
433 |   C. Elemwise{mul,no_inplace}
434 |    D. TensorConstant{2.0}
435 |    E. x
436 |   F. TensorConstant{1.0}
437 |  <D>
438 | \end{lstlisting}
439 | \end{frame}
440 | 
441 | \begin{frame}[fragile]{The \code{grad()} method}
442 | \begin{lstlisting}
443 | >>> x = T.scalar('x')
444 | >>> y = 2. * x
445 | >>> g = T.grad(y, x)
446 | >>> from theano.printing import min_informative_str
447 | # Print the optimized graph
448 | >>> f = theano.function([x], g)
449 | >>> theano.printing.debugprint(f)
450 | DeepCopyOp [@A] ''   0
451 |  |TensorConstant{2.0} [@B]
452 | \end{lstlisting}
453 | \end{frame}
454 | 
455 | \begin{frame}{Theano variables}
456 |   \begin{itemize}
457 |   \item A \emph{variable} is a theano expression.
458 |   \item Can come from \code{T.scalar()}, \code{T.matrix()}, etc.
459 |   \item Can come from doing operations on other variables.
460 |   \item Every variable has a type field, identifying its \emph{type}, such as \code{TensorType((True, False), 'float32')}
461 |   \item Variables can be thought of as nodes in a graph
462 |   \end{itemize}
463 | \end{frame}
464 | 
465 | \begin{frame}{Ops}
466 |   \begin{itemize}
467 |   \item  An Op is any class that describes a function operating on some variables
468 |   \item Can call the op on some variables to get a
469 | new variable or variables
470 |   \item An Op class can supply other forms of
471 | information about the function, such as its
472 | derivative
473 |   \end{itemize}
474 | \end{frame}
475 | 
476 | \begin{frame}{Apply nodes}
477 |   \begin{itemize}
478 |   \item The Apply class is a specific instance of an application of an Op.
479 |   \item Notable fields:
480 |     \begin{description}[\texttt{outputs}]
481 |     \item[\texttt{op}] The Op to be applied
482 |     \item[\texttt{inputs}] The Variables to be used as input
483 |     \item[\texttt{outputs}] The Variables produced
484 |     \end{description}
485 |   \item The \code{owner} field on variables identifies the Apply that created it.
486 |   \item Variable and Apply instances are nodes and owner/
487 |     inputs/outputs identify edges in a Theano graph.
488 |   \end{itemize}
489 | \end{frame}
490 | 
491 | \begin{frame}{Exercises}
492 | Work through the "Modifying" section in the ipython notebook.
493 | \end{frame}
494 | 
495 | \section{Debugging}
496 | \begin{frame}{Debugging}
497 |   \begin{itemize}
498 |   \item DEBUG\_MODE
499 |   \item Error message
500 |   \item \code{theano.printing.debugprint()}
501 |   \item \code{min_informative_str()}
502 |   \item compute\_test\_value
503 |   \item Accessing the FunctionGraph
504 |   \end{itemize}
505 | \end{frame}
506 | 
507 | \begin{frame}[fragile]{Error message: code}
508 | \begin{lstlisting}
509 | import numpy as np
510 | import theano
511 | import theano.tensor as T
512 | x = T.vector()
513 | y = T.vector()
514 | z = x + x
515 | z = z + y
516 | f = theano.function([x, y], z)
517 | f(np.ones((2,)), np.ones((3,)))
518 | \end{lstlisting}
519 | \end{frame}
520 | 
521 | \begin{frame}[fragile,allowframebreaks]{Error message}
522 | \vspace{1em}
523 | \begin{lstlisting}[style=output]
524 | Traceback (most recent call last):
525 |   File "test.py", line 9, in <module>
526 |     f(np.ones((2,)), np.ones((3,)))
527 |   File "/Users/anakha/Library/Python/2.7/site-packages/theano/compile/function_module.py", line 606, in __call__
528 |     storage_map=self.fn.storage_map)
529 |   File "/Users/anakha/Library/Python/2.7/site-packages/theano/compile/function_module.py", line 595, in __call__
530 |     outputs = self.fn()
531 | ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 2)
532 | Apply node that caused the error: Elemwise{add,no_inplace}(<TensorType(float64, vector)>, <TensorType(float64, vector)>, <TensorType(float64, vector)>)
533 | Inputs types: [TensorType(float64, vector), TensorType(float64, vector), TensorType(float64, vector)]
534 | Inputs shapes: [(3,), (2,), (2,)]
535 | Inputs strides: [(8,), (8,), (8,)]
536 | Inputs values: [array([ 1.,  1.,  1.]), array([ 1.,  1.]), array([ 1.,  1.])]
537 | 
538 | HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag  'optimizer=fast_compile'. If that does not work, Theano  optimizations can be disabled with 'optimizer=None'.
539 | HINT: Use the Theano flag 'exception_verbosity=high'  for a debugprint and storage map footprint of this apply node.
540 | \end{lstlisting}
541 | \end{frame}
542 | 
543 | \begin{frame}[fragile]{Error message: exception\_verbosity=high}
544 | \begin{lstlisting}[style=output]
545 | Debugprint of the apply node: 
546 | Elemwise{add,no_inplace} [@A] <TensorType(float64, vector)> ''   
547 |  |<TensorType(float64, vector)> [@B] <TensorType(float64, vector)>
548 |  |<TensorType(float64, vector)> [@C] <TensorType(float64, vector)>
549 |  |<TensorType(float64, vector)> [@C] <TensorType(float64, vector)>
550 | 
551 | Storage map footprint:
552 |  - <TensorType(float64, vector)>, Shape: (3,), ElemSize: 8 Byte(s), TotalSize: 24 Byte(s)
553 |  - <TensorType(float64, vector)>, Shape: (2,), ElemSize: 8 Byte(s), TotalSize: 16 Byte(s)
554 | \end{lstlisting}
555 | \end{frame}
556 | 
557 | \begin{frame}[fragile]{Error message: optimizer=fast\_compile}
558 | \begin{lstlisting}[style=output]
559 | Backtrace when the node is created:
560 |   File "test.py", line 7, in <module>
561 |     z = z + y
562 | \end{lstlisting}
563 | \end{frame}
564 | 
565 | \begin{frame}[fragile]{debugprint}
566 | \begin{lstlisting}
567 | >>> from theano.printing import debugprint
568 | >>> debugprint(a)
569 | Elemwise{mul,no_inplace} [@A] ''
570 |  |TensorConstant{2.0} [@B]
571 |  |Elemwise{add,no_inplace} [@C] 'z'
572 |    |<TensorType(float64, scalar)> [@D]
573 |    |<TensorType(float64, scalar)> [@E]
574 | \end{lstlisting}
575 | \end{frame}
576 | 
577 | \begin{frame}[fragile]{min\_informative\_str}
578 | \begin{lstlisting}
579 | >>> x = T.scalar()
580 | >>> y = T.scalar()
581 | >>> z = x + y
582 | >>> z.name = 'z'
583 | >>> a = 2. * z
584 | >>> from theano.printing import min_informative_str
585 | >>> print min_informative_str(a)
586 | A. Elemwise{mul,no_inplace}
587 |  B. TensorConstant{2.0}
588 |  C. z
589 | \end{lstlisting}
590 | \end{frame}
591 | 
592 | \begin{frame}[fragile]{compute\_test\_value}
593 | \begin{lstlisting}
594 | >>> from theano import config
595 | >>> config.compute_test_value = 'raise'
596 | >>> x = T.vector()
597 | >>> import numpy as np
598 | >>> x.tag.test_value = np.ones((2,))
599 | >>> y = T.vector()
600 | >>> y.tag.test_value = np.ones((3,))
601 | >>> x + y
602 | ...
603 | ValueError: Input dimension mis-match.
604 | (input[0].shape[0] = 2, input[1].shape[0] = 3)
605 | \end{lstlisting}
606 | \end{frame}
607 | 
608 | \begin{frame}[fragile]{Accessing a function’s fgraph}
609 | \begin{lstlisting}
610 | >>> x = T.scalar()
611 | >>> y = x / x
612 | >>> f = function([x], y)
613 | >>> debugprint(f.maker.fgraph.outputs[0])
614 | DeepCopyOp [@A] ''
615 |  |TensorConstant{1.0} [@B]
616 | \end{lstlisting}
617 | \end{frame}
618 | 
619 | \begin{frame}{Exercises}
620 | Work through the "Debugging" section of the ipython notebook.
621 | \end{frame}
622 | 
623 | \section*{}
624 | \begin{frame}{Citing Theano}
625 | Please cite both of the following papers in all work that uses Theano:
626 |   \begin{itemize}
627 |   \item Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Bergstra, James, Goodfellow, Ian, Bergeron, Arnaud, Bouchard, Nicolas, and
628 |      Bengio,Yoshua. Theano: new features and speed improvements. Deep Learning and Unsupervised Feature Learning NIPS 2012
629 |     Workshop, 2012.
630 |   \item Bergstra, James, Breuleux, Olivier, Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Desjardins, Guillaume, Turian, Joseph, Warde-
631 |      Farley, David, and Bengio,Yoshua. Theano: a CPU and GPU math expression compiler. In Proceedings of the Python for Scientific
632 |       Computing Conference (SciPy), June 2010. Oral Presentation.
633 |   \end{itemize}
634 | \end{frame}
635 | 
636 | \begin{frame}{Example acknowledgments}
637 | We would like to thank the developers of Theano \textbackslash citep\{bergstra+al:2010-scipy,Bastien-Theano-2012\}.
638 | We would also like to thank NSERC, Compute Canada, and Calcul Québec for providing computational resources.
639 | \end{frame}
640 | 
641 | 
642 | \begin{frame}
643 | \begin{center}
644 | \bibliography{strings,strings-short,ml,aigaion-shorter}
645 | \Huge
646 | Questions?
647 | \end{center}
648 | \end{frame}
649 | 
650 | 
651 | \end{document}
652 | 


--------------------------------------------------------------------------------
/python.py:
--------------------------------------------------------------------------------
 1 | from theano import Op
 2 | 
 3 | class MyOp(Op):
 4 |     __props__ = ()
 5 | 
 6 |     def __init__(self, ...):
 7 |         # set up parameters
 8 | 
 9 |     def make_node(self, ...):
10 |         # create apply node
11 | 
12 |     def perform(self, node, inputs, outputs_storage):
13 |         # do the computation
14 | 
15 |     def infer_shape(self, node, input_shapes):
16 |         # return output shapes
17 | 
18 |     def L_op(self, inputs, outputs, output_grads):
19 |         # return gradient graph for each input
20 | 
21 |     def R_op(self, inputs, eval_points):
22 |         # return R_op graph for each input
23 | 


--------------------------------------------------------------------------------
/scalmulop.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | from theano.scalar import as_scalar
 4 | 
 5 | class ScalMulV1(Op):
 6 |     __props__ = ('scal',)
 7 | 
 8 |     def __init__(self, scal):
 9 |         if not isinstance(scal, int):
10 |             raise TypeError('expected an int')
11 |         self.scal = scal
12 | 
13 |     def make_node(self, x):
14 |         x = as_tensor_variable(x)
15 |         return Apply(self, [x], [x.type()])
16 | 
17 |     def perform(self, node, inputs, output_storage):
18 |         x = inputs[0]
19 |         z = output_storage[0]
20 |         z[0] = x * self.scal
21 | 
22 |     def infer_shape(self, node, input_shapes):
23 |         return input_shapes
24 | 
25 |     def grad(self, inputs, output_grads):
26 |         return [output_grads[0] * self.scal]
27 | 
28 |     def R_op(self, inputs, eval_points):
29 |         if eval_points[0] is None:
30 |             return eval_points
31 |         return self.grad(inputs, eval_points)
32 | 
33 | 
34 | class ScalMulV2(Op):
35 |     __props__ = ()
36 | 
37 |     def make_node(self, x, scal):
38 |         x = as_tensor_variable(x)
39 |         scal = as_scalar(scal)
40 |         return Apply(self, [x, scal], [x.type()])
41 | 
42 |     def perform(self, node, inputs, output_storage):
43 |         x = inputs[0]
44 |         scal = inputs[1]
45 |         z = output_storage[0]
46 |         z[0] = x * scal
47 | 
48 |     def infer_shape(self, node, input_shapes):
49 |         return [input_shapes[0]]
50 | 
51 |     def grad(self, inputs, output_grads):
52 |         return [output_grads[0] * inputs[1], (inputs[0] * outputs_grads[1]).sum()]
53 | 
54 | #    def R_op(self, inputs, eval_points):
55 | #        if eval_points[0] is None:
56 | #            return eval_points
57 | #        return self.grad(inputs, eval_points)
58 | 


--------------------------------------------------------------------------------
/test_doubleop.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from theano import function, config
 4 | from theano.tensor import matrix
 5 | from theano.tests import unittest_tools as utt
 6 | from theano.tests.test_rop import RopLop_checker
 7 | 
 8 | from doubleop import DoubleOp
 9 | 
10 | 
11 | def test_doubleop():
12 |     utt.seed_rng()
13 |     x = matrix()
14 |     f = function([x], DoubleOp()(x))
15 |     inp = numpy.asarray(numpy.random.rand(5, 4),
16 |                         dtype=config.floatX)
17 |     out = f(inp)
18 |     utt.assert_allclose(inp * 2, out)
19 | 
20 | 
21 | class test_Double(utt.InferShapeTester):
22 |     def test_infer_shape(self):
23 |         utt.seed_rng()
24 |         x = matrix()
25 |         self._compile_and_check(
26 |             # function inputs (symbolic)
27 |             [x],
28 |             # Op instance
29 |             [DoubleOp()(x)],
30 |             # numeric input
31 |             [numpy.asarray(numpy.random.rand(5, 4),
32 |                            dtype=config.floatX)],
33 |             # Op class that should disappear
34 |             DoubleOp)
35 | 
36 | 
37 | def test_doubleop_grad():
38 |     utt.seed_rng()
39 |     utt.verify_grad(
40 |         # Op instance
41 |         DoubleOp(),
42 |         # Numeric inputs
43 |         [numpy.random.rand(5, 7, 2)]
44 |         )
45 | 


--------------------------------------------------------------------------------
/test_opt.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | 
 3 | from scalmulop import ScalMulV1
 4 | from doubleop import DoubleOp
 5 | import opt
 6 | 
 7 | def test_scalmul_double():
 8 |     x = theano.tensor.matrix()
 9 |     y = ScalMulV1(2)(x)
10 |     f = theano.function([x], y)
11 | 
12 |     assert not any(isinstance(n.op, ScalMulV1)
13 |                    for n in f.maker.fgraph.toposort())
14 |     assert any(isinstance(n.op, DoubleOp)
15 |                for n in f.maker.fgraph.toposort())
16 | 
17 | 


--------------------------------------------------------------------------------
/thunk.py:
--------------------------------------------------------------------------------
 1 | from theano import Op
 2 | 
 3 | class MyOp(Op):
 4 |     __props__ = ()
 5 | 
 6 |     def __init__(self, ...):
 7 |         # set up parameters
 8 | 
 9 |     def make_node(self, ...):
10 |         # create apply node
11 | 
12 |     def make_thunk(self, node, storage_map,
13 |                    compute_map, no_recycling):
14 |         # return a thunk
15 | 
16 |     def infer_shape(self, input_shapes):
17 |         # return output shapes
18 | 
19 |     def grad(self, inputs, output_grads):
20 |         # return gradient graph for each input
21 | 


--------------------------------------------------------------------------------
/tripleop.py:
--------------------------------------------------------------------------------
 1 | from theano import Op, Apply
 2 | from theano.tensor import as_tensor_variable
 3 | 
 4 | class `TripleOp`(Op):
 5 |     __props__ = ()
 6 | 
 7 |     def make_node(self, x):
 8 |         x = as_tensor_variable(x)
 9 |         return Apply(self, [x], [x.type()])
10 | 
11 |     def perform(self, node, inputs, output_storage):
12 |         x = inputs[0]
13 |         z = output_storage[0]
14 |         z[0] = x * `3`
15 | 
16 |     def infer_shape(self, node, i0_shapes):
17 |         return i0_shapes
18 | 
19 |     def grad(self, inputs, output_grads):
20 |         return [output_grads[0] * `3`]
21 | 


--------------------------------------------------------------------------------