├── projects ├── project_4 │ ├── __init__.py │ ├── poisson.py │ ├── test_poisson.py │ └── test_poisson_blind.py ├── project_5 │ ├── __init__.py │ ├── test │ │ └── __init__.py │ └── README.md ├── README.md ├── project-4-poisson-solver.ipynb ├── project-2-shipra-hina-henil.ipynb ├── project-2-wiggle-group.ipynb └── project-2-felio92.ipynb ├── README.md ├── material ├── seminar-2018-10-16.pdf ├── seminar-2018-10-25.pdf ├── solution-1-nabla-operator.pdf └── resources.md ├── notebooks ├── regression.py ├── langevin.py ├── test_regression.py ├── project-1-rope-mmc.ipynb ├── sampling-2-euler-and-velocity-verlet.ipynb ├── python-intro-5-modules-and-testing.ipynb ├── sampling-1-direct-and-importance-sampling.ipynb ├── python-intro-6-pytorch-basics.ipynb ├── python-intro-2-flow-control-and-functions.ipynb ├── python-intro-4-oop-iterators-generators.ipynb ├── python-intro-3-numpy-and-matplotlib.ipynb └── python-intro-1-variables-and-data-structures.ipynb ├── .gitignore └── exercises └── exercise-2018-10-18-sampling-pi.ipynb /projects/project_4/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /projects/project_5/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /projects/project_5/test/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /projects/README.md: -------------------------------------------------------------------------------- 1 | Please place your submissions for project challenges here. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compsci-2018 2 | Repository for the computational sciences software project of 2018/2019 3 | -------------------------------------------------------------------------------- /material/seminar-2018-10-16.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/compsci-2018/master/material/seminar-2018-10-16.pdf -------------------------------------------------------------------------------- /material/seminar-2018-10-25.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/compsci-2018/master/material/seminar-2018-10-25.pdf -------------------------------------------------------------------------------- /material/solution-1-nabla-operator.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/compsci-2018/master/material/solution-1-nabla-operator.pdf -------------------------------------------------------------------------------- /projects/project_5/README.md: -------------------------------------------------------------------------------- 1 | # Project 5 2 | Implement a sparse solver for Poisson's equation. 3 | 4 | ## Content 5 | 6 | - Jacobi iteration (group I) 7 | - Gauß-Seidel iteration (group II) 8 | - SOR (group III) 9 | -------------------------------------------------------------------------------- /material/resources.md: -------------------------------------------------------------------------------- 1 | # Additional resources 2 | 3 | - [Transforming Code into Beautiful, Idiomatic Python](https://youtu.be/OSGv2VnC0go): a talk by Raymond Hettinger (Python core developer) 4 | - [Testing 101](https://alysivji.github.io/testing-101-introduction-to-testing.html): a blog post by Aly Sivji 5 | - [Best Practices for Foundations in Molecular Simulations](https://www.livecomsjournal.org/article/5957-best-practices-for-foundations-in-molecular-simulations-article-v1-0): an article by Braun *et al* 6 | - [Best Practices for Quantification of Uncertainty and Sampling Quality in Molecular Simulations](https://www.livecomsjournal.org/article/5067-best-practices-for-quantification-of-uncertainty-and-sampling-quality-in-molecular-simulations-article-v1-0): an article by Grossfield *et al* 7 | -------------------------------------------------------------------------------- /projects/project_4/poisson.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def create_laplacian_1d(nx, lx, pbc=True): 4 | """Ceates a discretized Laplacian in 1D 5 | 6 | Arguments: 7 | nx (int): number of grid points; needs more than one 8 | lx (float): box lenght along x; must be positive 9 | pbc (boolean): use periodic boundary conditions 10 | """ 11 | if nx < 2: 12 | raise ValueError('We need at least two grid points') 13 | if lx <= 0.0: 14 | raise ValueError('We need a positive length') 15 | if pbc not in (True, False): 16 | raise TypeError('We need a boolean as pbc') 17 | laplacian = np.zeros((nx, nx)) 18 | mx = (nx / lx)**2 19 | for x in range(nx): 20 | laplacian[x, x] -= 2.0 * mx 21 | laplacian[x, (x + 1) % nx] += mx 22 | laplacian[(x + 1) % nx, x] += mx 23 | if not pbc: 24 | laplacian[0, -1] = 0 25 | laplacian[-1, 0] = 0 26 | return laplacian 27 | -------------------------------------------------------------------------------- /projects/project_4/test_poisson.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from .poisson import create_laplacian_1d 4 | 5 | 6 | def test_laplacian_1d(): 7 | laplacian = create_laplacian_1d(3, 1, pbc=True) 8 | np.testing.assert_almost_equal( 9 | laplacian, 10 | np.array([[-2.0, 1.0, 1.0], 11 | [1.0, -2.0, 1.0], 12 | [1.0, 1.0, -2.0]]) * (3 / 1)**2) 13 | laplacian = create_laplacian_1d(3, 1, pbc=False) 14 | np.testing.assert_almost_equal( 15 | laplacian, 16 | np.array([[-2.0, 1.0, 0.0], 17 | [1.0, -2.0, 1.0], 18 | [0.0, 1.0, -2.0]]) * (3 / 1)**2) 19 | 20 | 21 | @pytest.mark.parametrize('nx,lx,pbc,exception', [ 22 | (1, 1, True, ValueError), 23 | (3, -1, True, ValueError), 24 | ('hello', 1, True, TypeError), 25 | (3, None, True, TypeError), 26 | (3, 1, 'hello', TypeError)]) 27 | def test_laplacian_1d_exceptions(nx, lx, pbc, exception): 28 | with pytest.raises(exception): 29 | create_laplacian_1d(nx, lx, pbc=pbc) 30 | -------------------------------------------------------------------------------- /notebooks/regression.py: -------------------------------------------------------------------------------- 1 | """An example module which provides a linear regression function""" 2 | 3 | 4 | def mean(a): 5 | """Compute the arithmetic mean over an iterable a""" 6 | try: 7 | return sum(a) / len(a) 8 | except ZeroDivisionError: 9 | return 0 10 | 11 | 12 | def scalar_product(a, b): 13 | """Compute the scalar product for two vectors a and b""" 14 | if len(a) != len(b): 15 | raise ValueError( 16 | f'Cannot compute a scalar product for vectors with' 17 | f' lengths {len(a)} and {len(b)}') 18 | return sum(a_ * b_ for a_, b_ in zip(a, b)) 19 | 20 | 21 | def linear_regression(x, y): 22 | """Perform a linear regression 23 | 24 | Estimate a model y_ = slope * x + const such 25 | that y_ approximates y. 26 | 27 | Arguments: 28 | x (iterable of float): x values 29 | y (iterable of float): y values 30 | 31 | Returns: 32 | slope (float): the slope parameter of the regression model 33 | const (float): the constant parameter of the regression model 34 | """ 35 | x_mean = mean(x) 36 | y_mean = mean(y) 37 | x_meanfree = [x_ - x_mean for x_ in x] 38 | y_meanfree = [y_ - y_mean for y_ in y] 39 | xy = scalar_product(x_meanfree, y_meanfree) 40 | xx = scalar_product(x_meanfree, x_meanfree) 41 | try: 42 | slope = xy / xx 43 | except ZeroDivisionError: 44 | slope = 0 45 | const = y_mean - slope * x_mean 46 | return slope, const 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /notebooks/langevin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def langevin( 5 | force, n_steps, x_init, v_init, mass, 6 | time_step=0.001, damping=0.1, beta=1.0): 7 | """Langevin integrator for initial value problems 8 | 9 | This function implements the BAOAB algorithm of Benedict Leimkuhler 10 | and Charles Matthews. See J. Chem. Phys. 138, 174102 (2013) for 11 | further details. 12 | 13 | Arguments: 14 | force (function): computes the forces of a single configuration 15 | n_steps (int): number of integration steps 16 | x_init (numpy.ndarray(n, d)): initial configuration 17 | v_init (numpy.ndarray(n, d)): initial velocities 18 | mass (numpy.ndarray(n)): particle masses 19 | time_step (float): time step for the integration 20 | damping (float): damping term, use zero if not coupled 21 | beta (float): inverse temperature 22 | 23 | Returns: 24 | x (numpy.ndarray(n_steps + 1, n, d)): configuraiton trajectory 25 | v (numpy.ndarray(n_steps + 1, n, d)): velocity trajectory 26 | """ 27 | shape = list(x_init.shape) 28 | th = 0.5 * time_step 29 | thm = 0.5 * time_step / mass[:, None] 30 | edt = np.exp(-damping * time_step) 31 | sqf = np.sqrt((1.0 - edt ** 2) / (beta * mass))[:, None] 32 | x = np.zeros([n_steps + 1] + shape) 33 | v = np.zeros_like(x) 34 | x[0, :, :] = x_init 35 | v[0, :, :] = v_init 36 | f = force(x[0]) 37 | for i in range(n_steps): 38 | v[i + 1, :, :] = v[i] + thm * f 39 | x[i + 1, :, :] = x[i] + th * v[i + 1] 40 | v[i + 1, :, :] = edt * v[i + 1] + sqf * np.random.randn(*shape) 41 | x[i + 1, :, :] = x[i + 1] + th * v[i + 1] 42 | f[:, :] = force(x[i + 1]) 43 | v[i + 1, :, :] = v[i + 1] + thm * f 44 | return x, v 45 | -------------------------------------------------------------------------------- /notebooks/test_regression.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from regression import mean, scalar_product, linear_regression 3 | 4 | 5 | @pytest.mark.parametrize('sequence,mu', [ 6 | ([0, 0], 0), 7 | ([1, 1], 1), 8 | ([float(i + 1) for i in range(100)], 50.5), 9 | ([], 0)]) 10 | def test_mean(sequence, mu): 11 | assert mean(sequence) == mu 12 | 13 | 14 | @pytest.mark.parametrize('argument,exception', [ 15 | (None, TypeError), 16 | (1, TypeError), 17 | ('hello, world', TypeError)]) 18 | def test_mean_exceptions(argument, exception): 19 | with pytest.raises(exception): 20 | mean(argument) 21 | 22 | 23 | @pytest.mark.parametrize('a,b', [ 24 | ([1, 1], [0, 0]), 25 | ([0, 1], [1, 0]), 26 | ([1, 1], [1, -1])]) 27 | def test_scalar_product_orthogonal(a, b): 28 | assert scalar_product(a, b) == 0 29 | 30 | 31 | @pytest.mark.parametrize('x,expected', [ 32 | ([], 0), 33 | ([1, 1], 2), 34 | ([3, 4], 25)]) 35 | def test_scalar_product_squared_norm(x, expected): 36 | assert scalar_product(x, x) == expected 37 | 38 | 39 | @pytest.mark.parametrize('arguments,exception', [ 40 | (None, TypeError), 41 | ([1, 1], TypeError), 42 | ([[1], 1], TypeError), 43 | ([1, [1]], TypeError), 44 | ([[1, 1], [1]], ValueError), 45 | ([[1], [1, 1]], ValueError), 46 | (['hello', 'world'], TypeError)]) 47 | def test_scalar_product_exceptions(arguments, exception): 48 | with pytest.raises(exception): 49 | scalar_product(*arguments) 50 | 51 | 52 | @pytest.mark.parametrize('x,y,slope,const', [ 53 | ([], [], 0, 0), 54 | ([0, 1], [0, 0], 0, 0), 55 | ([0, 1], [1, 1], 0, 1), 56 | ([0, 1], [0, 1], 1, 0), 57 | ([0, 1], [1, 0], -1, 1)]) 58 | def test_linear_regression(x, y, slope, const): 59 | slope_, const_ = linear_regression(x, y) 60 | assert slope == slope_ 61 | assert const == const_ 62 | 63 | 64 | @pytest.mark.parametrize('arguments,exception', [ 65 | (None, TypeError), 66 | ([1, 1], TypeError), 67 | ([[1], 1], TypeError), 68 | ([1, [1]], TypeError), 69 | ([[1, 1], [1]], ValueError), 70 | ([[1], [1, 1]], ValueError), 71 | (['hello', 'world'], TypeError)]) 72 | def test_linear_regression_exceptions(arguments, exception): 73 | with pytest.raises(exception): 74 | linear_regression(*arguments) 75 | -------------------------------------------------------------------------------- /projects/project_4/test_poisson_blind.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from importlib import import_module 4 | from itertools import product 5 | try: 6 | from .poisson_1 import create_laplacian_2d as create_laplacian_2d_1 7 | except ImportError: 8 | print('Cannot import poisson_1.create_laplacian_2d') 9 | try: 10 | from .poisson_2 import create_laplacian_2d as create_laplacian_2d_2 11 | except ImportError: 12 | print('Cannot import poisson_2.create_laplacian_2d') 13 | try: 14 | from .poisson_3 import create_laplacian_2d as create_laplacian_2d_3 15 | except ImportError: 16 | print('Cannot import poisson_3.create_laplacian_2d') 17 | 18 | 19 | @pytest.mark.parametrize( 20 | 'module,function', [ 21 | ('.poisson','create_laplacian_1d'), 22 | ('.poisson_1','create_laplacian_2d'), 23 | ('.poisson_2','create_laplacian_2d'), 24 | ('.poisson_3','create_laplacian_2d')]) 25 | def test_module_and_interface(module, function): 26 | """Test that the module can be imported and that it 27 | provides the desired function. 28 | """ 29 | imported = import_module(module, package='project_4') 30 | assert function in dir(imported) 31 | 32 | 33 | @pytest.mark.parametrize( 34 | 'create_laplacian_2d,nx,ny,lx,ly', [ 35 | (func, nx, ny, lx, ly) 36 | for func, nx, ny, lx, ly in product( 37 | [ 38 | 'create_laplacian_2d_1', 39 | 'create_laplacian_2d_2', 40 | 'create_laplacian_2d_3'], 41 | [5, 10, 20], 42 | [5, 10, 20], 43 | [1.0, 3.0], 44 | [1.0, 3.0])]) 45 | def test_consistency(create_laplacian_2d, nx, ny, lx, ly): 46 | laplacian = eval( 47 | f'{create_laplacian_2d}({nx}, {ny}, {lx}, {ly}, pbc=True)') 48 | assert laplacian.ndim == 2, \ 49 | f'laplacian has wrong dimension: {laplacian.ndim}' 50 | assert laplacian.shape[0] == nx * ny, \ 51 | f'laplacian has wrong first shape: {laplacian.shape[0]}' 52 | assert laplacian.shape[1] == nx * ny, \ 53 | f'laplacian has wrong second shape: {laplacian.shape[1]}' 54 | rho = np.random.normal(size=(nx, ny)) 55 | rho -= np.mean(rho) 56 | phi = np.linalg.solve(laplacian, -rho.reshape(-1)) 57 | np.testing.assert_allclose( 58 | -np.dot(laplacian, phi).reshape(nx, ny), 59 | rho, 60 | rtol=1e-5, atol=1e-5) 61 | -------------------------------------------------------------------------------- /projects/project-4-poisson-solver.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import numpy as np\n", 12 | "from project_4.poisson import create_laplacian_1d" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "create_laplacian_1d(3, 1, pbc=False)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "rho = np.sin(np.linspace(-np.pi, np.pi, 100))\n", 31 | "\n", 32 | "plt.plot(rho)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "laplacian = create_laplacian_1d(rho.size, 2 * np.pi)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "laplacian.shape" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "phi = np.linalg.solve(laplacian, -rho)\n", 60 | "\n", 61 | "plt.plot(phi)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "plt.plot(rho)\n", 71 | "plt.plot(-np.dot(laplacian, phi), 'x')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "rho = np.zeros(20)\n", 81 | "\n", 82 | "rho[0] = 1\n", 83 | "rho[-1] = 1\n", 84 | "rho[10] = -2\n", 85 | "\n", 86 | "plt.plot(rho, '-o')" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "laplacian = create_laplacian_1d(rho.size, 1)\n", 96 | "\n", 97 | "phi = np.linalg.solve(laplacian, -rho)\n", 98 | "\n", 99 | "plt.plot(phi, '-o')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "plt.plot(rho)\n", 109 | "plt.plot(-np.dot(laplacian, phi), 'x')" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "Python 3", 123 | "language": "python", 124 | "name": "python3" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.7.0" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 2 141 | } 142 | -------------------------------------------------------------------------------- /projects/project-2-shipra-hina-henil.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import numpy as np\n", 11 | "%matplotlib inline\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def potential_gradient(x, y, g, k):\n", 21 | "\n", 22 | " \"\"\" Velocity Verlet implementation on a rope\n", 23 | " This function computes the gradient for each particle r(x,y)\n", 24 | "\n", 25 | " Parameters:\n", 26 | " x, y = coordinates of particles\n", 27 | " g = 9.8 is gravity constant\n", 28 | " k = 50 is spring constant\n", 29 | " size = length of markov chain\n", 30 | " mass = mass of particle\n", 31 | " \"\"\"\n", 32 | " particles = len(x)\n", 33 | " gradient_x, gradient_y = np.zeros(particles), np.zeros(particles)\n", 34 | " gravity_g = np.zeros(particles)\n", 35 | " gradient_x[1:-1] = 2*k*(2*x[1:-1] - x[:-2] - x[2:])\n", 36 | " gradient_y[1:-1] = 2*k*(2*y[1:-1] - y[:-2] - y[2:])\n", 37 | " gravity_g[1:-1] = g\n", 38 | " return gradient_x, gradient_y + gravity_g\n" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "def vv(potential_gradient, rx, ry, tow=0.01, g=9.8, k=20,\n", 48 | " size=10000, mass=1.0):\n", 49 | "\n", 50 | " n = len(rx)\n", 51 | " verletX, verletY = np.zeros((size, n)), np.zeros((size, n))\n", 52 | " velocity_x, velocity_y = np.zeros((size, n)), np.zeros((size, n))\n", 53 | " verletX[0], verletY[0] = rx, ry\n", 54 | " for i in range(1, size):\n", 55 | " grad_x, grad_y = potential_gradient(verletX[i-1], verletY[i-1], g, k)\n", 56 | " rx_new = verletX[i-1] + tow*velocity_x[i-1] - ((tow**2)/(mass*2))*grad_x\n", 57 | " ry_new = verletY[i-1] + tow*velocity_y[i-1] - ((tow**2)/(mass*2))*grad_y\n", 58 | " verletX[i], verletY[i] = rx_new, ry_new\n", 59 | " new_grad_x, new_grad_y = potential_gradient(rx_new, ry_new, g, k)\n", 60 | " vx_new = velocity_x[i-1] - (tow/(2*mass)) * (grad_x + new_grad_x)\n", 61 | " vy_new = velocity_y[i-1] - (tow/(2*mass)) * (grad_y + new_grad_y)\n", 62 | " velocity_x[i], velocity_y[i] = vx_new, vy_new\n", 63 | " return verletX, verletY\n" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "x_init, y_init, n = 0, 20, 32\n", 73 | "rx, ry = np.linspace(x_init, n - 1, n), np.linspace(y_init, y_init, n)\n", 74 | "verletX, verletY = vv(potential_gradient, rx, ry)\n", 75 | "\n", 76 | "plt.plot(verletX[-1].T, verletY[-1].T, '-o')\n", 77 | "plt.xlabel(\"x-axis\")\n", 78 | "plt.ylabel(\"y-axis\")\n", 79 | "plt.show()\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [] 88 | } 89 | ], 90 | "metadata": { 91 | "kernelspec": { 92 | "display_name": "Python 3", 93 | "language": "python", 94 | "name": "python3" 95 | }, 96 | "language_info": { 97 | "codemirror_mode": { 98 | "name": "ipython", 99 | "version": 3 100 | }, 101 | "file_extension": ".py", 102 | "mimetype": "text/x-python", 103 | "name": "python", 104 | "nbconvert_exporter": "python", 105 | "pygments_lexer": "ipython3", 106 | "version": "3.7.0" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 2 111 | } 112 | -------------------------------------------------------------------------------- /notebooks/project-1-rope-mmc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import numpy as np\n", 12 | "from random import uniform, randint" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def mmc(potential, size, x_init, beta=1.0, step=0.5):\n", 22 | " \"\"\"A simple Metropolis Monte Carlo implementation\n", 23 | " \n", 24 | " This function updates one particle at a time with a\n", 25 | " uniformly distributed step in each dimension.\n", 26 | "\n", 27 | " Arguments:\n", 28 | " potential (reference): potential function\n", 29 | " size (int): length of the sampled Markov chain\n", 30 | " x_init (numpy.ndarray): initial configuration\n", 31 | " beta (float): inverse temperature factor\n", 32 | " step (float): maximal step size\n", 33 | " \"\"\"\n", 34 | " x = np.zeros((size, x_init.shape[0], x_init.shape[1]))\n", 35 | " u = np.zeros(size)\n", 36 | " x[0, :, :] = x_init\n", 37 | " u[0] = potential(x_init)\n", 38 | " x_ = np.zeros_like(x_init)\n", 39 | " for i in range(1, size):\n", 40 | " x_[:, :] = x[i - 1]\n", 41 | " j = randint(1, len(x_) - 2)\n", 42 | " x_[j] += np.random.uniform(-step, step, x_[j].shape)\n", 43 | " u_ = potential(x_)\n", 44 | " if u_ <= u[i - 1] or uniform(0, 1) < np.exp(beta * (u[i - 1] - u_)):\n", 45 | " x[i, :, :], u[i] = x_, u_\n", 46 | " else:\n", 47 | " x[i, :, :], u[i] = x[i - 1], u[i - 1]\n", 48 | " return x, u" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "def gravity(x, g=9.81):\n", 58 | " \"\"\"A model for gravity which penalizes x[:, 1]\"\"\"\n", 59 | " if np.any(x[:, 1] < 0.0):\n", 60 | " return np.inf\n", 61 | " return g * x[:, 1].sum()\n", 62 | "\n", 63 | "\n", 64 | "def springs(x, k=50.0):\n", 65 | " \"\"\"A pairwise spring potential\"\"\"\n", 66 | " return np.power(x[1:] - x[:-1], 2).sum() * k\n", 67 | "\n", 68 | "\n", 69 | "def both(x, g=9.81, k=50.0):\n", 70 | " \"\"\"This encapsulates gravity and spring\"\"\"\n", 71 | " return gravity(x, g=g) + springs(x, k=k)\n", 72 | "\n", 73 | "\n", 74 | "x_init = 20 * np.ones((30, 2))\n", 75 | "x_init[:, 0] = np.linspace(0, len(x_init) - 1, len(x_init))\n", 76 | "\n", 77 | "x, u = mmc(both, 100000, x_init, beta=5, step=0.1)\n", 78 | "\n", 79 | "fig, ax = plt.subplots(figsize=(10, 4))\n", 80 | "ax.plot(u)\n", 81 | "ax.set_xlabel('time / steps')\n", 82 | "ax.set_ylabel('energy / a.u.')\n", 83 | "fig.tight_layout()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "fig, axes = plt.subplots(\n", 93 | " 1, 2, figsize=(12, 5), sharex=True, sharey=True)\n", 94 | "axes[0].plot(*x[0].T, '-s', label='initial')\n", 95 | "axes[0].plot(*x[-1].T, '-o', label='final')\n", 96 | "axes[0].legend()\n", 97 | "for i in range(1, len(x_init) - 1):\n", 98 | " axes[1].plot(*x[::10, i, :].T)\n", 99 | "axes[1].plot(*x[-1].T, '--', color='grey')\n", 100 | "for ax in axes.flat:\n", 101 | " ax.set_xlabel(r'$x$')\n", 102 | "axes[0].set_ylabel(r'$y$')\n", 103 | "fig.tight_layout()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.7.0" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 2 135 | } 136 | -------------------------------------------------------------------------------- /notebooks/sampling-2-euler-and-velocity-verlet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sampling II: Euler and velocity verlet\n", 8 | "\n", 9 | "\n", 10 | "## Content\n", 11 | "- Euler and velocity verlet integrators\n", 12 | "- Reweighting\n", 13 | "\n", 14 | "## Remember jupyter notebooks\n", 15 | "- To run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter.\n", 16 | "- To get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "%matplotlib inline\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import numpy as np" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def euler(potential_gradient, size, x_init, v_init, mass=1.0, time_step=0.005):\n", 37 | " x, v = np.zeros(size), np.zeros(size)\n", 38 | " x[0], v[0] = x_init, v_init\n", 39 | " for i in range(1, size):\n", 40 | " x[i] = x[i - 1] + time_step * v[i - 1]\n", 41 | " v[i] = v[i - 1] - time_step * potential_gradient(x[i - 1]) / mass\n", 42 | " return x, v\n", 43 | "\n", 44 | "\n", 45 | "def vv(potential_gradient, size, x_init, v_init, mass=1.0, time_step=0.005):\n", 46 | " x, v = np.zeros(size), np.zeros(size)\n", 47 | " x[0], v[0] = x_init, v_init\n", 48 | " nabla_u1, nabla_u0 = potential_gradient(x[0]), None\n", 49 | " for i in range(1, size):\n", 50 | " x[i] = x[i - 1] + time_step * v[i - 1] - time_step**2 * nabla_u1 / 2 / mass\n", 51 | " nabla_u1, nabla_u0 = potential_gradient(x[i]), nabla_u1\n", 52 | " v[i] = v[i - 1] - time_step * (nabla_u1 + nabla_u0) / 2 / mass\n", 53 | " return x, v\n", 54 | "\n", 55 | "\n", 56 | "def harmonic_potential(x):\n", 57 | " return np.power(x, 2) / 2\n", 58 | "\n", 59 | "\n", 60 | "def harmonic_potential_gradient(x):\n", 61 | " return x\n", 62 | "\n", 63 | "\n", 64 | "fig, axes = plt.subplots(1, 2, figsize=(10, 5))\n", 65 | "for ax, integrator in zip(axes.flat, (euler, vv)):\n", 66 | " x, v = integrator(\n", 67 | " harmonic_potential_gradient, 100000, 1, 0, time_step=0.001)\n", 68 | " ax.plot(x, v)\n", 69 | " ax.set_aspect('equal')\n", 70 | " ax.set_xlabel(r'$x$')\n", 71 | " ax.set_ylabel(r'$v$')\n", 72 | "fig.tight_layout()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "def pairs(iterable):\n", 82 | " for x, y in zip(iterable[:-1], iterable[1:]):\n", 83 | " yield x, y\n", 84 | "\n", 85 | "\n", 86 | "def chi(x, xmin, xmax):\n", 87 | " return np.logical_and(xmin <= x, x < xmax)\n", 88 | "\n", 89 | "\n", 90 | "positions, _ = vv(harmonic_potential_gradient, 100000, -5, 0)\n", 91 | "edges = np.linspace(-5, 5, 31)\n", 92 | "centers = (edges[:-1] + edges[1:]) / 2\n", 93 | "\n", 94 | "histogram = [np.sum(chi(positions, x, y)) / positions.size\n", 95 | " for x, y in pairs(edges)]\n", 96 | "\n", 97 | "fig, ax = plt.subplots()\n", 98 | "ax.bar(centers, [h for h in histogram], (edges[1] - edges[0]) * 0.9)\n", 99 | "ax.set_xlabel(r'$x$')\n", 100 | "ax.set_ylabel(r'$\\frac{1}{N}\\sum_{n=1}^N \\chi_i(x_n)$')\n", 101 | "fig.tight_layout()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "empirical_weights = np.zeros(len(positions))\n", 111 | "for h, (x, y) in zip(histogram, pairs(edges)):\n", 112 | " empirical_weights[chi(positions, x, y)] = h\n", 113 | "empirical_weights /= empirical_weights.sum()\n", 114 | " \n", 115 | "weights = np.exp(-harmonic_potential(positions) * 1.0)\n", 116 | "weights /= weights.sum()\n", 117 | "\n", 118 | "histogram2 = [np.sum(weights[chi(positions, x, y)])\n", 119 | " for x, y in pairs(edges)]\n", 120 | " \n", 121 | "corrected_weights = weights / empirical_weights\n", 122 | "corrected_weights /= corrected_weights.sum()\n", 123 | "\n", 124 | "histogram3 = [np.sum(corrected_weights[chi(positions, x, y)])\n", 125 | " for x, y in pairs(edges)]\n", 126 | "\n", 127 | "fig, ax = plt.subplots()\n", 128 | "ax.bar(\n", 129 | " centers,\n", 130 | " [h for h in histogram],\n", 131 | " (edges[1] - edges[0]) * 0.9,\n", 132 | " label=r'$\\frac{1}{N}\\sum_{n=1}^N \\chi_i(x_n)$')\n", 133 | "ax.bar(\n", 134 | " centers,\n", 135 | " [h for h in histogram2],\n", 136 | " (edges[1] - edges[0]) * 0.8,\n", 137 | " label=r'$\\sum_{n=1}^N \\chi_i(x_n)p(x_n)$')\n", 138 | "ax.bar(\n", 139 | " centers,\n", 140 | " [h for h in histogram3],\n", 141 | " (edges[1] - edges[0]) * 0.7,\n", 142 | " label=r'$\\sum_{n=1}^N \\chi_i(x_n)p(x_n) w^{-1}(x_n)$')\n", 143 | "ax.set_xlabel(r'$x$')\n", 144 | "ax.legend()\n", 145 | "fig.tight_layout()" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [] 154 | } 155 | ], 156 | "metadata": { 157 | "kernelspec": { 158 | "display_name": "Python 3", 159 | "language": "python", 160 | "name": "python3" 161 | }, 162 | "language_info": { 163 | "codemirror_mode": { 164 | "name": "ipython", 165 | "version": 3 166 | }, 167 | "file_extension": ".py", 168 | "mimetype": "text/x-python", 169 | "name": "python", 170 | "nbconvert_exporter": "python", 171 | "pygments_lexer": "ipython3", 172 | "version": "3.7.0" 173 | } 174 | }, 175 | "nbformat": 4, 176 | "nbformat_minor": 2 177 | } 178 | -------------------------------------------------------------------------------- /projects/project-2-wiggle-group.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "\n", 13 | "# init the constants and initial p, q\n", 14 | "N = 30\n", 15 | "k = 50.\n", 16 | "g = 9.81\n", 17 | "y0 = 40.\n", 18 | "#m = np.ones(N)\n", 19 | "xs = np.arange(N)\n", 20 | "ys = np.array([y0] * N)\n", 21 | "q0 = np.vstack((xs, ys)).T\n", 22 | "p0 = np.zeros((N, 2))" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# for energies and gradient calc\n", 32 | "# forget about the ground\n", 33 | "'''def gravity(y):\n", 34 | " if(y >= 0):\n", 35 | " return g*y\n", 36 | " else:\n", 37 | " return 1e20'''\n", 38 | "def gravity(y):\n", 39 | " \"\"\"Calculate the gravity potential for unit mass at height y.\"\"\"\n", 40 | " return g*y\n", 41 | "\n", 42 | "def lengths(xs, ys):\n", 43 | " \"\"\"Return the rope lengths as an nparray of length (n-1).\"\"\"\n", 44 | " xdiff = xs[1:] - xs[:-1]\n", 45 | " ydiff = ys[1:] - ys[:-1]\n", 46 | " return np.sqrt(np.square(xdiff) + np.square(ydiff))\n", 47 | "\n", 48 | "def potential(q, m = 1):\n", 49 | " \"\"\"Return the potential of the system.\"\"\"\n", 50 | " xs, ys = q[:, 0], q[:, 1]\n", 51 | " ls = lengths(xs, ys)\n", 52 | " return (np.vectorize(gravity)(ys) * m).sum() + k * np.square(ls - 1).sum()\n", 53 | "\n", 54 | "def kinetic(p, m = 1):\n", 55 | " \"\"\"Return the kinetic energy of the system.\"\"\"\n", 56 | " return (np.square(p)/m).sum() / 2\n", 57 | "\n", 58 | "# gradients\n", 59 | "def potential_grad(q, m = 1):\n", 60 | " \"\"\"\n", 61 | " Return the gradient of the potential of system,\n", 62 | " with fixed end balls (by setting F on them to 0).\n", 63 | " \"\"\"\n", 64 | " if(np.shape(m) == ()):\n", 65 | " m = m * np.ones(N)\n", 66 | " xs, ys = q[:, 0], q[:, 1]\n", 67 | " grad = np.zeros((N, 2))\n", 68 | " ls = lengths(xs, ys)\n", 69 | " delta_ls = ls - 1\n", 70 | " xdiff = xs[1:] - xs[:-1]\n", 71 | " ydiff = ys[1:] - ys[:-1]\n", 72 | " grad[1:-1, 0] = 2 * k * delta_ls[:-1] * xdiff[:-1] / ls[:-1] - 2 * k * delta_ls[1:] * xdiff[1:] / ls[1:]\n", 73 | " grad[1:-1, 1] = 2 * k * delta_ls[:-1] * ydiff[:-1] / ls[:-1] - 2 * k * delta_ls[1:] * ydiff[1:] / ls[1:] + m[1:-1] * g\n", 74 | " #for i in range(1, 29):\n", 75 | " # grad[i, 0] = 2*k*(ls[i-1] - 1)*(xs[i]-xs[i-1])/ls[i-1] + 2*k*(ls[i] - 1)*(xs[i]-xs[i+1])/ls[i]\n", 76 | " # grad[i, 1] = m[i] * g + 2*k*(ls[i-1] - 1)*(ys[i]-ys[i-1])/ls[i-1] + 2*k*(ls[i] - 1)*(ys[i]-ys[i+1])/ls[i]\n", 77 | " return grad" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "def vv(potenital_grad, steps, q0, p0, m = 1, timestep = 0.01):\n", 87 | " \"\"\"Velocity Verlet for autonomous system (pot_grad only a function of q).\"\"\"\n", 88 | " halfstep = timestep / 2\n", 89 | " vec_shape = (steps + 1, q0.shape[0], q0.shape[1])\n", 90 | " qs = np.zeros(vec_shape)\n", 91 | " ps = np.zeros(vec_shape)\n", 92 | " qs[0], ps[0] = q0, p0\n", 93 | " potg = potenital_grad(qs[0])\n", 94 | " for i in range(1, steps + 1):\n", 95 | " p_temp = ps[i - 1] - potg * halfstep\n", 96 | " qs[i] = qs[i - 1] + p_temp / m * timestep\n", 97 | " potg = potenital_grad(qs[i])\n", 98 | " ps[i] = p_temp - potg * halfstep\n", 99 | " return qs, ps" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "size = 2000\n", 109 | "qx, px = vv(potential_grad, size, q0, p0, timestep = 0.005)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "%matplotlib notebook\n", 119 | "fig = plt.figure()\n", 120 | "ax = fig.add_subplot(111)\n", 121 | "plt.ion()\n", 122 | "fig.show()\n", 123 | "fig.canvas.draw()\n", 124 | "ax.set_ylim([0., y0 + 2])\n", 125 | "ax.set_xlabel('x / m')\n", 126 | "ax.set_ylabel('y / m')\n", 127 | "scat = ax.scatter(qx[0, :, 0], qx[0, :, 1])\n", 128 | "for i in range(1, 500):\n", 129 | " scat.remove() \n", 130 | " scat = ax.scatter(qx[4 * i, :, 0], qx[4 * i, :, 1], color = 'b')\n", 131 | " fig.canvas.draw()" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# Plot the energies in every step\n", 141 | "E_p = np.zeros(size + 1)\n", 142 | "E_k = np.zeros(size + 1)\n", 143 | "for i in range(size + 1):\n", 144 | " E_p[i], E_k[i] = potential(qx[i]), kinetic(px[i])\n", 145 | "E = E_p + E_k\n", 146 | "times = 0.01 * np.arange(size + 1)\n", 147 | "%matplotlib inline\n", 148 | "plt.plot(times, E_p, color = 'm', label = 'potential')\n", 149 | "plt.plot(times, E_k, color = 'g', label = 'kinetic')\n", 150 | "plt.plot(times, E, color = 'b', label = 'total')\n", 151 | "plt.ylabel('Energy / a.u.')\n", 152 | "plt.xlabel('time / s')\n", 153 | "plt.legend()" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 3", 167 | "language": "python", 168 | "name": "python3" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.7.0" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 2 185 | } 186 | -------------------------------------------------------------------------------- /exercises/exercise-2018-10-18-sampling-pi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Samping $\\pi$\n", 8 | "\n", 9 | "Let us conduct a stochastic experiment: we throw $N$ pebbles randomly into a square with radius $1$ (we assume that each possible position in the square is equally probable) and we further note how many of the $N$ pebbles are actually landing within distance $1$ of the lower left corner of the square. If $N_\\mathrm{hit}$ is that number, then\n", 10 | "\n", 11 | "$$\\frac{4 N_\\mathrm{hit}}{N} \\approx \\pi$$\n", 12 | "\n", 13 | "if $N$ is sufficiently large.\n", 14 | "\n", 15 | "Why? The number $N_\\mathrm{hit}$ describes how many of the $N$ pebbles have a distances of one from the lower left corner, i.e., the have fallen inside a (quarter-) circle of radius $r=1$ which is centered at the lower left corner. The area of full circle of radius $r$ is $\\pi r^2$; thus, the area of a quarter circle with $r=1$ is $\\frac{\\pi}{4}$. The area of the square, in which all $N$ pebbles must lie, is $r^2=1$. Since we assume a uniform distribution of pebble positions in the square, the ratio $\\frac{N_\\mathrm{hit}}{N}$ must be close to the ratio of areas between the quarter circle and the full square: $\\frac{\\pi}{4}$.\n", 16 | "\n", 17 | "Your task is to implement the above approximation for $\\pi$ as a Python function. To sample random positions in the unit square, you can use the `random` package, in particular, the function `uniform(a, b)` which returns a uniformly distributed random number $[a,b)$. Before you can use `uniform(a, b)`, you need to import this function as shown below:" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "from random import uniform" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Here is an example how to use this function. We draw $10000$ random numbers and print the smallest as well as the largest number in the sample:" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "a, b = 0, 1\n", 43 | "samples = [uniform(a, b) for _ in range(10000)]\n", 44 | "print(min(samples), max(samples))" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Solution" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "def sample_pi(n):\n", 61 | " n_hits = 0\n", 62 | " for _ in range(n):\n", 63 | " x = uniform(0, 1)\n", 64 | " y = uniform(0, 1)\n", 65 | " if x * x + y * y < 1.0:\n", 66 | " n_hits += 1\n", 67 | " return 4.0 * n_hits / n\n", 68 | "\n", 69 | "\n", 70 | "sample_pi(10000)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "We can use Jupyter's `%timeit` **cell magic** to measure the computational cost of `sample_pi()`:" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "%timeit sample_pi(100)\n", 87 | "%timeit sample_pi(1000)\n", 88 | "%timeit sample_pi(10000)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "Here's an example how to compute some statistics for `sample_pi()`:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "def stats(data):\n", 105 | " from math import sqrt\n", 106 | " mean = sum(data) / len(data)\n", 107 | " variance = sum((d - mean)**2 for d in data) / (len(data) - 1)\n", 108 | " return mean, sqrt(variance)\n", 109 | "\n", 110 | "\n", 111 | "mean, std = stats([sample_pi(1000) for _ in range(1000)])\n", 112 | "print('{:.3f} ± {:.3f}'.format(mean, std))" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "With the `matplotlib` package, we can visualize such statistical properties. Here, we show how the standard deviation of the sampled results decreases with an increasing `n`:" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "%matplotlib inline\n", 129 | "import matplotlib.pyplot as plt\n", 130 | "\n", 131 | "std = []\n", 132 | "n_values = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]\n", 133 | "for n in n_values:\n", 134 | " std.append(stats([sample_pi(n) for _ in range(1000)])[1])\n", 135 | " \n", 136 | "plt.plot(n_values, std)\n", 137 | "plt.semilogx()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "If you study Monte Carlo methods, you will learn that the error $\\epsilon_n$ scales as\n", 145 | "\n", 146 | "$$\\epsilon_n \\propto \\frac{1}{\\sqrt{n}}.$$\n", 147 | "\n", 148 | "We can test this for our example by comparing the measured standard deviations with the above expression:" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "from math import sqrt\n", 158 | "\n", 159 | "f = std[0] * sqrt(n_values[0])\n", 160 | "\n", 161 | "plt.plot(n_values, std)\n", 162 | "plt.plot(n_values, [f / sqrt(n) for n in n_values], 'o')\n", 163 | "plt.semilogx()\n", 164 | "\n", 165 | "print(f)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 3", 179 | "language": "python", 180 | "name": "python3" 181 | }, 182 | "language_info": { 183 | "codemirror_mode": { 184 | "name": "ipython", 185 | "version": 3 186 | }, 187 | "file_extension": ".py", 188 | "mimetype": "text/x-python", 189 | "name": "python", 190 | "nbconvert_exporter": "python", 191 | "pygments_lexer": "ipython3", 192 | "version": "3.7.0" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | -------------------------------------------------------------------------------- /projects/project-2-felio92.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib notebook\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import numpy as np\n", 12 | "from matplotlib import animation, rc\n", 13 | "from IPython.display import HTML" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "def generate_config(x0=0, y0=20, n=30, dx=1.):\n", 23 | " \"\"\"Returns the initial configuration of the n chained particles, which is a horizontal line.\n", 24 | " The function returns an array, which has n elements, where each element is a tuple consisting\n", 25 | " of the x- and y-component of the respective particle. The x-coordinates are evenly spaced and\n", 26 | " the y-coordinates are set to some constant y0\n", 27 | " \n", 28 | " Parameters\n", 29 | " ----------\n", 30 | " x0 - x-coordinate of first particle. Standard value is 0.\n", 31 | " y0 - y-coordinate of all particles. Standard value is 20.\n", 32 | " n - Number of particles, standard value is set to 30.\n", 33 | " dx - Horizontal spacing between neighbouring particles. Standard value is 1\n", 34 | " \"\"\"\n", 35 | " \n", 36 | " r = np.zeros((n, 2))\n", 37 | " v = np.zeros((n, 2))\n", 38 | " r[:, 0] = np.linspace(x0, x0 + n*dx, n)\n", 39 | " r[:, 1] = np.linspace(y0, y0, n)\n", 40 | " \n", 41 | " return r, v" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "def potential_gradient(r, t, g=9.81 , k=20., f=0, amp=1, n_cycles=5):\n", 51 | " \n", 52 | " \"\"\"Calculates the potential gradient for a given spatial configuration r of n particles in a chain.\n", 53 | " Returns a vector, where the i-th entry is a tuple consisting of the x- and y-coordinate of the gradient for particle i.\n", 54 | " Because the first and last particle are fixed, this function only computes the gradient for the second to the second-to-last particle,\n", 55 | " while for the first and last particle the gradient is always 0. Includes a sinusoidal force that acts upon the middle particle of the\n", 56 | " chain for a multiple of the driving oscillation period.\n", 57 | " \n", 58 | " Parameters\n", 59 | " ----------\n", 60 | " r - Array that has n-elements and each element has two scalar entries.\n", 61 | " g - Gravitational constant. Default value is 9.81.\n", 62 | " k - Spring constant. Default value is 20.\n", 63 | " f - Frequency of the driving force. Default is set to 0.\n", 64 | " amp - Amplitude of the driving force. Default is set to 1.\n", 65 | " \"\"\"\n", 66 | " n = len(r)\n", 67 | " harmonic_gradient = np.zeros((n, 2))\n", 68 | " gravitational_gradient = np.zeros((n, 2))\n", 69 | " driving_force = amp * np.sin(f * t)\n", 70 | " \n", 71 | " harmonic_gradient[1:-1] = k * (r[1:-1] - r[:-2]) - k * (r[2:] - r[1:-1])\n", 72 | " gravitational_gradient[1:-1][:,1] = g\n", 73 | " total_gradient = harmonic_gradient + gravitational_gradient\n", 74 | " if f!=0 and t⇧ Shift and press ⏎ Enter.\n", 15 | "- To get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "%matplotlib inline\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "import numpy as np\n", 27 | "import pytest\n", 28 | "from regression import mean\n", 29 | "from regression import scalar_product\n", 30 | "from regression import linear_regression\n", 31 | "from langevin import langevin" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Example I: linear regression\n", 39 | "\n", 40 | "We consider a linear regression problem: an experiment yielded data points $(x_n, y_n)$, $n=1,\\dots,N$. In this example, `y_true` refers to the actual truth, but due to experimental inaccuracies, the observed values `y_observed` have some error." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "x = np.random.uniform(low=0, high=10, size=500)\n", 50 | "y_true = 0.5 * x + 1\n", 51 | "y_observed = y_true + np.random.normal(size=x.shape)\n", 52 | "\n", 53 | "plt.scatter(x, y_observed, s=1, label='observation')\n", 54 | "plt.plot(x, y_true, color='C1', label='ground truth')\n", 55 | "plt.xlabel(r'$x$')\n", 56 | "plt.ylabel(r'$y$')\n", 57 | "plt.legend();" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "We now use `linear_regression()` as provided by the `regression` module to estimate the best linear model to describe the $(x_n,y_n)$ relation:" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "plt.scatter(x, y_observed, s=1, label='observation')\n", 74 | "plt.plot(x, y_true, color='C1', label='ground truth')\n", 75 | "plt.xlabel(r'$x$')\n", 76 | "plt.ylabel(r'$y$')\n", 77 | "\n", 78 | "slope, const = linear_regression(x, y_observed)\n", 79 | "\n", 80 | "x_model = np.linspace(x.min(), x.max(), 20)\n", 81 | "y_model = slope * x_model + const\n", 82 | "\n", 83 | "plt.plot(x_model, y_model, '--o', color='C2', label='model')\n", 84 | "plt.legend()\n", 85 | "\n", 86 | "print(f'model: y = {slope:.3f} * x + {const:.3f}')" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "### Black box testing\n", 94 | "\n", 95 | "In this part, we try to write tests blindly, i.e., without looking at the actual implementation of `mean()`, `scalar_product()`, and `linear_regression()`. These tests should tell us whether the functions behave as we expect. \n", 96 | "\n", 97 | "We begin with `mean()`and our only source of information is the function's signature\n", 98 | "\n", 99 | "```Python\n", 100 | "mean(a: iterable) -> float\n", 101 | "```\n", 102 | "\n", 103 | "and a mathemetical expression\n", 104 | "\n", 105 | "$$\\bar{a} = \\frac{1}{N}\\sum_{n=0}^{N-1} a_n$$" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "def test_mean():\n", 115 | " assert mean([0]) == 0\n", 116 | " assert mean([0, 0]) == 0\n", 117 | " assert mean([1]) == 1\n", 118 | " assert mean([1, 1]) == 1\n", 119 | " assert mean([float(i + 1) for i in range(100)]) == 50.5\n", 120 | "\n", 121 | "\n", 122 | "def test_mean_border_cases():\n", 123 | " with pytest.raises(TypeError):\n", 124 | " mean()\n", 125 | " with pytest.raises(TypeError):\n", 126 | " mean(1)\n", 127 | " with pytest.raises(TypeError):\n", 128 | " mean('hello, world')\n", 129 | " assert mean([]) == 0\n", 130 | "\n", 131 | "\n", 132 | "test_mean()\n", 133 | "test_mean_border_cases()" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "We have located a possible first issue: for an empty iterable, `mean()` raises a `ZeroDivisionError`.\n", 141 | "\n", 142 | "Next, we write unit tests for\n", 143 | "\n", 144 | "```Python\n", 145 | "scalar_product(a: iterable, b: iterable) -> float\n", 146 | "```\n", 147 | "\n", 148 | "$$\\left\\langle \\mathbf{a},\\mathbf{b} \\right\\rangle = \\sum\\limits_{n=0}^{N-1} a_n b_n$$" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "def test_scalar_product_orthogonal():\n", 158 | " assert scalar_product([1, 1], [0, 0]) == 0.0\n", 159 | " assert scalar_product([0, 1], [1, 0]) == 0.0\n", 160 | " assert scalar_product([1, 1], [1, -1]) == 0.0\n", 161 | "\n", 162 | "\n", 163 | "def test_scalar_product_squared_norm():\n", 164 | " assert scalar_product([1, 1], [1, 1]) == 2.0\n", 165 | " assert scalar_product([3, 4], [3, 4]) == 5**2\n", 166 | "\n", 167 | "\n", 168 | "def test_scalar_product_border_cases():\n", 169 | " with pytest.raises(TypeError):\n", 170 | " scalar_product()\n", 171 | " with pytest.raises(TypeError):\n", 172 | " scalar_product(1, 1)\n", 173 | " with pytest.raises(TypeError):\n", 174 | " scalar_product([1], 1)\n", 175 | " with pytest.raises(TypeError):\n", 176 | " scalar_product(1, [1])\n", 177 | " with pytest.raises(ValueError):\n", 178 | " scalar_product([1, 1], [1])\n", 179 | " with pytest.raises(ValueError):\n", 180 | " scalar_product([1], [1, 1])\n", 181 | " with pytest.raises(TypeError):\n", 182 | " scalar_product('hello', 'world')\n", 183 | " assert scalar_product([], []) == 0\n", 184 | "\n", 185 | "\n", 186 | "test_scalar_product_orthogonal()\n", 187 | "test_scalar_product_squared_norm()\n", 188 | "test_scalar_product_border_cases()" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "This function seems to be quite predictable.\n", 196 | "\n", 197 | "Finally, we write integration tests (as we reuse `mean()` and `scalar_product()`) for\n", 198 | "\n", 199 | "```Python\n", 200 | "linear_regression(x: iterable, y: iterable) -> (float, float)\n", 201 | "```\n", 202 | "\n", 203 | "$$\\begin{eqnarray*}\n", 204 | "\\textrm{slope} & = & \\frac{\\sum_{n=0}^{N-1} \\left( x_n - \\bar{x} \\middle) \\middle( y_n - \\bar{y} \\right)}{\\sum_{n=0}^{N-1} \\left( x_n - \\bar{x} \\right)^2} \\\\[0.5em]\n", 205 | "\\textrm{const} & = & \\bar{y} - \\textrm{slope } \\bar{x}\n", 206 | "\\end{eqnarray*}$$" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "def test_linear_regression():\n", 216 | " slope, const = linear_regression([0, 1], [0, 0])\n", 217 | " assert slope == 0\n", 218 | " assert const == 0\n", 219 | " slope, const = linear_regression([0, 1], [1, 1])\n", 220 | " assert slope == 0\n", 221 | " assert const == 1\n", 222 | " slope, const = linear_regression([0, 1], [1, 0])\n", 223 | " assert slope == -1\n", 224 | " assert const == 1\n", 225 | "\n", 226 | "\n", 227 | "def test_linear_regression_border_cases():\n", 228 | " with pytest.raises(TypeError):\n", 229 | " linear_regression()\n", 230 | " with pytest.raises(TypeError):\n", 231 | " linear_regression(1)\n", 232 | " with pytest.raises(TypeError):\n", 233 | " linear_regression(1, 1)\n", 234 | " with pytest.raises(TypeError):\n", 235 | " linear_regression(1, [1])\n", 236 | " with pytest.raises(TypeError):\n", 237 | " linear_regression([1], 1)\n", 238 | " with pytest.raises(ValueError):\n", 239 | " linear_regression([1, 1], [1])\n", 240 | " with pytest.raises(ValueError):\n", 241 | " linear_regression([1], [1, 1])\n", 242 | " slope, const = linear_regression([], [])\n", 243 | " assert slope == 0\n", 244 | " assert const == 0\n", 245 | "\n", 246 | "\n", 247 | "test_linear_regression()\n", 248 | "test_linear_regression_border_cases()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "And, again, we find the `ZeroDivisionError` raised by `mean()` in the integration test for our `linear_regression()`.\n", 256 | "\n", 257 | "We now must decide how to deal with this situation. Do we keep the current behaviour and deal with the raised exception? Or do we catch this issue within `mean()` and use a sensible fix, e.g., set `mean([])` to zero?\n", 258 | "\n", 259 | "## Example II: a Langevin integrator\n", 260 | "\n", 261 | "The `langevin` module has an equally named function `langevin()`which provides the signature and docstring\n", 262 | "\n", 263 | "```Python\n", 264 | "def langevin(\n", 265 | " force, n_steps, x_init, v_init, mass,\n", 266 | " time_step=0.001, damping=0.1, beta=1.0):\n", 267 | " \"\"\"Langevin integrator for initial value problems\n", 268 | "\n", 269 | " This function implements the BAOAB algorithm of Benedict Leimkuhler\n", 270 | " and Charles Matthews. See J. Chem. Phys. 138, 174102 (2013) for\n", 271 | " further details.\n", 272 | "\n", 273 | " Arguments:\n", 274 | " force (function): computes the forces of a single configuration\n", 275 | " n_steps (int): number of integration steps\n", 276 | " x_init (numpy.ndarray(n, d)): initial configuration\n", 277 | " v_init (numpy.ndarray(n, d)): initial velocities\n", 278 | " mass (numpy.ndarray(n)): particle masses\n", 279 | " time_step (float): time step for the integration\n", 280 | " damping (float): damping term, use zero if not coupled\n", 281 | " beta (float): inverse temperature\n", 282 | "\n", 283 | " Returns:\n", 284 | " x (numpy.ndarray(n_steps + 1, n, d)): configuraiton trajectory\n", 285 | " v (numpy.ndarray(n_steps + 1, n, d)): velocity trajectory\n", 286 | " \"\"\"\n", 287 | "```" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "def harmonic_potential(x):\n", 297 | " return 0.5 * np.sum(x**2, axis=(-2, -1))\n", 298 | "\n", 299 | "\n", 300 | "def harmonic_force(x):\n", 301 | " return -x\n", 302 | "\n", 303 | "\n", 304 | "def kinetic_energy(v, mass):\n", 305 | " return 0.5 * np.sum(v**2 * mass[None, :, None], axis=(-2, -1))\n", 306 | "\n", 307 | "\n", 308 | "x_init = np.array([[1.0]])\n", 309 | "v_init = np.array([[0.0]])\n", 310 | "mass = np.array([1.0])\n", 311 | "\n", 312 | "fig, axes = plt.subplots(1, 3, figsize=(10, 4), sharex=True, sharey=True)\n", 313 | "for ax, damping in zip(axes.flat, (0, 0.001, 0.005)):\n", 314 | " ax.set_title(f'damping={damping}')\n", 315 | " x, v = langevin(harmonic_force, 3000, x_init, v_init, mass, time_step=0.01, damping=damping)\n", 316 | " ax.scatter(x.reshape(-1), v.reshape(-1), c=np.arange(x.size), s=1)\n", 317 | " ax.set_aspect('equal')\n", 318 | " ax.set_xlabel(r'$x$')\n", 319 | "axes[0].set_ylabel(r'$v$')\n", 320 | "fig.tight_layout()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "**Your task**: design a test suite for the `langevin` module!" 328 | ] 329 | } 330 | ], 331 | "metadata": { 332 | "kernelspec": { 333 | "display_name": "Python 3", 334 | "language": "python", 335 | "name": "python3" 336 | }, 337 | "language_info": { 338 | "codemirror_mode": { 339 | "name": "ipython", 340 | "version": 3 341 | }, 342 | "file_extension": ".py", 343 | "mimetype": "text/x-python", 344 | "name": "python", 345 | "nbconvert_exporter": "python", 346 | "pygments_lexer": "ipython3", 347 | "version": "3.7.0" 348 | } 349 | }, 350 | "nbformat": 4, 351 | "nbformat_minor": 2 352 | } 353 | -------------------------------------------------------------------------------- /notebooks/sampling-1-direct-and-importance-sampling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sampling I: direct and importance sampling\n", 8 | "\n", 9 | "\n", 10 | "## Content\n", 11 | "- Monte Carlo integration methods\n", 12 | "\n", 13 | "## Remember jupyter notebooks\n", 14 | "- To run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter.\n", 15 | "- To get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "%matplotlib inline\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## \"Area under a curve\" via direct Monte Carlo sampling\n", 34 | "\n", 35 | "Let us define `f1(x, mu, sigma)` which evaluates\n", 36 | "\n", 37 | "$$f1(x, \\mu, \\sigma) = \\frac{1}{\\sqrt{2\\pi}\\sigma} \\exp\\left( -\\frac{(x - \\mu)^2}{2\\sigma^2} \\right)$$\n", 38 | "\n", 39 | "as well as `f2(x)` and `f3(x)` which evaluate superpositions of `f1` calls for different centers and scales:" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def f1(x, mu=0.0, sigma=1.0):\n", 49 | " gauss_curve = np.exp(-(x - mu)**2 / (2.0 * sigma**2))\n", 50 | " area = np.sqrt(2.0 * np.pi) * sigma\n", 51 | " return gauss_curve / area\n", 52 | "\n", 53 | "\n", 54 | "x = np.linspace(-5, 5, 1000)\n", 55 | "\n", 56 | "fig, ax = plt.subplots()\n", 57 | "ax.plot(x, f1(x))\n", 58 | "ax.fill_between(x, 0, f1(x), alpha=0.3)\n", 59 | "ax.set_xlabel(r'$x$')\n", 60 | "ax.set_ylabel(r'$f1(x)$')\n", 61 | "fig.tight_layout()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "def integrate_mc(func, xmin, xmax, size):\n", 71 | " x = np.random.uniform(low=xmin, high=xmax, size=size)\n", 72 | " f = func(x)\n", 73 | " ymax = f.max()\n", 74 | " x_width = xmax - xmin\n", 75 | " y = np.random.uniform(low=0, high=ymax, size=size)\n", 76 | " n = np.sum(y < f)\n", 77 | " area = x_width * ymax\n", 78 | " ratio = n / size\n", 79 | " return area * ratio, area, ratio\n", 80 | "\n", 81 | "\n", 82 | "integrate_mc(f1, -5, 5, 100000)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "def f2(x):\n", 92 | " return 0.5 * (f1(x, mu=-2, sigma=0.5) + f1(x, mu=2, sigma=0.5))\n", 93 | "\n", 94 | "\n", 95 | "x = np.linspace(-5, 5, 1000)\n", 96 | "\n", 97 | "fig, ax = plt.subplots()\n", 98 | "ax.plot(x, f2(x))\n", 99 | "ax.fill_between(x, 0, f2(x), alpha=0.3)\n", 100 | "ax.set_xlabel(r'$x$')\n", 101 | "ax.set_ylabel(r'$f2(x)$')\n", 102 | "fig.tight_layout()\n", 103 | "\n", 104 | "integrate_mc(f2, -5, 5, 100000)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "def f3(x):\n", 114 | " return 0.5 * (f1(x, mu=-4.5, sigma=0.05) + f1(x, mu=4.5, sigma=0.05))\n", 115 | "\n", 116 | "\n", 117 | "x = np.linspace(-5, 5, 1000)\n", 118 | "\n", 119 | "fig, ax = plt.subplots()\n", 120 | "ax.plot(x, f3(x))\n", 121 | "ax.fill_between(x, 0, f3(x), alpha=0.3)\n", 122 | "ax.set_xlabel(r'$x$')\n", 123 | "ax.set_ylabel(r'$f3(x)$')\n", 124 | "fig.tight_layout()\n", 125 | "\n", 126 | "integrate_mc(f3, -5, 5, 100000)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "### Exercise:\n", 134 | "\n", 135 | "How do the standard deviations of the integrals of the three functions scale with the number of evaluation points?" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "n_values = [100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000, 100000]\n", 145 | "\n", 146 | "fig, ax = plt.subplots(figsize=(10, 6))\n", 147 | "\n", 148 | "for i, f in enumerate((f1, f2, f3)):\n", 149 | " data = [np.std([integrate_mc(f, -5, 5, n)[0] for _ in range(100)])\n", 150 | " for n in n_values]\n", 151 | " ax.plot(n_values, data, '-o', linewidth=2, color=f'C{i}', label=f'f{i + 1}')\n", 152 | " ax.fill_between(n_values, 0, data, facecolor=f'C{i}', alpha=0.3)\n", 153 | "\n", 154 | "ax.semilogx()\n", 155 | "ax.legend()\n", 156 | "ax.set_xlabel(r'$N$')\n", 157 | "ax.set_ylabel(r'$\\sigma$')\n", 158 | "fig.tight_layout()" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "Why does `f3` scale so much worde than the two other functions? We can understand this problem by looking at the ratios of the areas under the curves versus the total integration/sampling areas:" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "def integrate_mc_show(func, xmin, xmax, size, ax=None):\n", 175 | " x = np.random.uniform(low=xmin, high=xmax, size=size)\n", 176 | " f = func(x)\n", 177 | " ymax = f.max()\n", 178 | " x_width = xmax - xmin\n", 179 | " y = np.random.uniform(low=0, high=ymax, size=size)\n", 180 | " n = np.sum(y < f)\n", 181 | " area = x_width * ymax\n", 182 | " ratio = n / size\n", 183 | " if ax is None:\n", 184 | " _, ax = plt.subplots()\n", 185 | " ax.plot([xmin] * 2, [0, ymax], color='C3')\n", 186 | " ax.plot([xmax] * 2, [0, ymax], color='C3')\n", 187 | " ax.plot([xmin, xmax], [0] * 2, color='C3')\n", 188 | " ax.plot([xmin, xmax], [ymax] * 2, color='C3')\n", 189 | " ax.scatter(x, y, c=y < f, s=0.1)\n", 190 | " ax.set_xlabel(r'$x$')\n", 191 | " ax.set_ylabel(r'$y$')\n", 192 | " return area * ratio, area, ratio, ax\n", 193 | "\n", 194 | "\n", 195 | "fig, axes = plt.subplots(1, 3, figsize=(10, 3))\n", 196 | "for i, (ax, f) in enumerate(zip(axes.flat, (f1, f2, f3))):\n", 197 | " _, _, ratio, _ = integrate_mc_show(f, -5, 5, 10000, ax=ax)\n", 198 | " ax.set_title(f'f{i + 1}, {ratio}')\n", 199 | "fig.tight_layout()" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Computing expectation values" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "def harmonic_potential(x):\n", 216 | " return np.power(x, 2)\n", 217 | "\n", 218 | "\n", 219 | "x = np.linspace(-5, 5, 1000)\n", 220 | "\n", 221 | "fig, ax = plt.subplots()\n", 222 | "ax.plot(x, harmonic_potential(x))\n", 223 | "ax.set_xlabel(r'$x$')\n", 224 | "ax.set_ylabel(r'$x^2$')\n", 225 | "fig.tight_layout()" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "We define a potential $\\phi(x) = x^2$ as shown above.\n", 233 | "According to statistical mechanics, the probablility to observe a\n", 234 | "position/configuration $x$ is proportional to the Boltzmann weight\n", 235 | "\n", 236 | "$$\n", 237 | "p(x) \\propto e^{-\\beta\\phi(x)} = e^{-\\frac{x^2}{\\mathrm{k}_\\mathrm{B}T}}\\,,\n", 238 | "\\quad \\mathrm{with} \\; \\beta^{-1}=\\mathrm{k}_\\mathrm{B}T\n", 239 | "$$\n", 240 | "\n", 241 | "$\\beta$ is called an inverse temperature and has the dimension of an inverse energy.\n", 242 | "\n", 243 | "We further define a uniform grid\n", 244 | "\n", 245 | "$$x_i = x_0 + ih \\,,\\; i=0,\\dots,n$$\n", 246 | "\n", 247 | "and a sequence of indicator functions\n", 248 | "\n", 249 | "$$\\chi_i(x) = \\begin{cases} 1, & x_i \\leq x < x_{i+1} \\\\ 0, & \\mathrm{else} \\end{cases}\\,, \\quad i=0,\\dots,n-1.$$\n", 250 | "\n", 251 | "Let us use our previous approach to compute a histogram of positions\n", 252 | "to approximate the stationary distribution $\\pi(x)$:" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "def chi(x, xmin, xmax):\n", 262 | " return np.logical_and(xmin <= x, x < xmax)\n", 263 | "\n", 264 | "\n", 265 | "positions = np.random.uniform(low=-5, high=5, size=100000)\n", 266 | "edges = np.linspace(-5, 5, 31)\n", 267 | "centers = (edges[:-1] + edges[1:]) / 2\n", 268 | "\n", 269 | "histogram = [np.sum(chi(positions, x, y)) / positions.size\n", 270 | " for x, y in zip(edges[:-1], edges[1:])]\n", 271 | "\n", 272 | "fig, ax = plt.subplots()\n", 273 | "ax.bar(centers, [h for h in histogram], (edges[1] - edges[0]) * 0.9)\n", 274 | "ax.set_xlabel(r'$x$')\n", 275 | "ax.set_ylabel(r'$\\frac{1}{N}\\sum_{n=1}^N \\chi_i(x_n)$')\n", 276 | "fig.tight_layout()" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "This does not look right!\n", 284 | "The naive averaging yields an approximately constant distribution\n", 285 | "which is not compatible with the expected Boltzmann distribution.\n", 286 | "\n", 287 | "The reason is simply that the naive average does not compute a **weighted** expectation:\n", 288 | "\n", 289 | "$$\\frac{1}{N}\\sum_{n=1}^N \\chi_i(x_n) \\neq \\mathbb{E}_p \\left[\\chi_i \\right]$$\n", 290 | "\n", 291 | "Instead, we need to compute \n", 292 | "\n", 293 | "$$\\sum_{n=1}^N \\chi_i(x_n) p(x_n)\\,, \\quad \\mathrm{with} \\; \\sum_{n=1}^N p(x_n)=1:$$" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "def pairs(iterable):\n", 303 | " for x, y in zip(iterable[:-1], iterable[1:]):\n", 304 | " yield x, y\n", 305 | "\n", 306 | "\n", 307 | "weights = np.exp(-harmonic_potential(positions) * 1.0)\n", 308 | "weights /= weights.sum()\n", 309 | "\n", 310 | "histogram2 = [np.sum(weights[chi(positions, x, y)])\n", 311 | " for x, y in pairs(edges)]\n", 312 | "\n", 313 | "fig, ax = plt.subplots()\n", 314 | "ax.bar(\n", 315 | " centers,\n", 316 | " [h for h in histogram],\n", 317 | " (edges[1] - edges[0]) * 0.9,\n", 318 | " label=r'$\\frac{1}{N}\\sum_{n=1}^N \\chi_i(x_n)$')\n", 319 | "ax.bar(\n", 320 | " centers,\n", 321 | " [h for h in histogram2],\n", 322 | " (edges[1] - edges[0]) * 0.8,\n", 323 | " label=r'$\\sum_{n=1}^N \\chi_i(x_n)p(x_n)$')\n", 324 | "ax.set_xlabel(r'$x$')\n", 325 | "ax.legend()\n", 326 | "fig.tight_layout()" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "With this correction, we get the expected distribution, but we are still wasting computational effort.\n", 334 | "\n", 335 | "A better strategy is to exploit that we can directly sample from the Boltzmann distribution in our case of a harmonic potential. We just have to replace the way we draw random numbers:" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "positions = np.random.normal(size=100000)\n", 345 | "edges = np.linspace(-5, 5, 31)\n", 346 | "centers = (edges[:-1] + edges[1:]) / 2\n", 347 | "\n", 348 | "histogram = [np.sum(chi(positions, x, y)) / positions.size\n", 349 | " for x, y in zip(edges[:-1], edges[1:])]\n", 350 | "\n", 351 | "fig, ax = plt.subplots()\n", 352 | "ax.bar(centers, [h for h in histogram], (edges[1] - edges[0]) * 0.9)\n", 353 | "ax.set_xlabel(r'$x$')\n", 354 | "ax.set_ylabel(r'$\\frac{1}{N}\\sum_{n=1}^N \\chi_i(x_n)$, $x_n\\sim\\mathcal{N}(0, 1)$')\n", 355 | "fig.tight_layout()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "If we can sample directly from $p$, we can compute expectations without reweighting.\n", 363 | "\n", 364 | "In most cases, however, we cannot **directly** sample from $p$ and we have to resort to more complicated procedures,\n", 365 | "e.g., sampling a Markov chain of positions/configurations using the Metropolis scheme\n", 366 | "where we accept a transition from $x$ to $y$ with the conditional acceptance probablility\n", 367 | "\n", 368 | "$$\\mathbb{A}(y|x) = \\min\\left\\{1, \\exp\\left(\\beta(\\phi(x) - \\phi(y))\\right)\\right\\}.$$\n", 369 | "\n", 370 | "### Exercise:\n", 371 | "\n", 372 | "Complete the following function to implement the Metropolis sampling algorithm." 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "def mmc(potential, size, x_init=0.0, beta=1.0, step=0.5):\n", 382 | " from random import uniform\n", 383 | " x, u = np.zeros(size), np.zeros(size)\n", 384 | " x[0] = x_init\n", 385 | " u[0] = potential(x_init)\n", 386 | " for i in range(1, size):\n", 387 | " x_ = x[i - 1] + uniform(-step, step)\n", 388 | " u_ = potential(x_)\n", 389 | " if u_ <= u[i - 1] \\\n", 390 | " or uniform(0, 1) < np.exp(beta * (u[i - 1] - u_)):\n", 391 | " x[i], u[i] = x_, u_\n", 392 | " else:\n", 393 | " x[i], u[i] = x[i - 1], u[i - 1]\n", 394 | " return x, u\n", 395 | "\n", 396 | "\n", 397 | "positions, energies = mmc(harmonic_potential, 10000)\n", 398 | "fig, ax = plt.subplots(figsize=(10, 5))\n", 399 | "ax.plot(positions[:400], label=r'positions')\n", 400 | "ax.plot(energies[:400], label=r'energies')\n", 401 | "ax.set_xlabel(r'$t$ / steps')\n", 402 | "ax.legend()\n", 403 | "fig.tight_layout()" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "edges = np.linspace(positions.min(), positions.max(), 31)\n", 413 | "centers = (edges[:-1] + edges[1:]) / 2\n", 414 | "\n", 415 | "histogram = [np.sum(chi(positions, x, y)) / positions.size\n", 416 | " for x, y in zip(edges[:-1], edges[1:])]\n", 417 | "\n", 418 | "fig, ax = plt.subplots()\n", 419 | "ax.bar(centers, [h for h in histogram], (edges[1] - edges[0]) * 0.9)\n", 420 | "ax.set_xlabel(r'$x$')\n", 421 | "ax.set_ylabel(r'$\\frac{1}{N}\\sum_{n=1}^N \\chi_i(x_n)$, $x_n\\sim e^{-\\beta\\phi(x_n)}$')\n", 422 | "fig.tight_layout()" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [] 431 | } 432 | ], 433 | "metadata": { 434 | "kernelspec": { 435 | "display_name": "Python 3", 436 | "language": "python", 437 | "name": "python3" 438 | }, 439 | "language_info": { 440 | "codemirror_mode": { 441 | "name": "ipython", 442 | "version": 3 443 | }, 444 | "file_extension": ".py", 445 | "mimetype": "text/x-python", 446 | "name": "python", 447 | "nbconvert_exporter": "python", 448 | "pygments_lexer": "ipython3", 449 | "version": "3.7.0" 450 | } 451 | }, 452 | "nbformat": 4, 453 | "nbformat_minor": 2 454 | } 455 | -------------------------------------------------------------------------------- /notebooks/python-intro-6-pytorch-basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to Python VI: pytorch basics\n", 8 | "\n", 9 | "## Content\n", 10 | "- tensors vs arrays\n", 11 | "- automatic differentiation\n", 12 | "\n", 13 | "## Prequisites\n", 14 | "Visit [pytorch.org](http://pytorch.org) and follow the installation instructions.\n", 15 | "\n", 16 | "## Remember jupyter notebooks\n", 17 | "- To run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter.\n", 18 | "- To get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "%matplotlib inline\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import numpy as np\n", 30 | "import torch\n", 31 | "from torch.utils import data" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## `torch.Tensor` vs `numpy.ndarray`\n", 39 | "We shall see in the next few cells how to create `pytorch`'s main data structure: tensors. We will also see that the syntax is really close to that of `numpy`." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "print(torch.ones(3, 5))" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "print(torch.zeros(4, 2))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "print(torch.arange(5))" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "This is the standard `Tensor`:" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "a = torch.Tensor()\n", 83 | "print(a)\n", 84 | "print(a.dim())\n", 85 | "print(a.shape)\n", 86 | "print(a.type())" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "It can be initialised with (nested) lists..." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "a = torch.Tensor([0, 1, 2])\n", 103 | "print(a)\n", 104 | "print(a.dim())\n", 105 | "print(a.shape)\n", 106 | "print(a.type())" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "a = torch.Tensor([[0, 1, 2], [3, 4, 5]])\n", 116 | "print(a)\n", 117 | "print(a.dim())\n", 118 | "print(a.shape)\n", 119 | "print(a.type())" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "... or with `numpy.ndarray` objects:" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "a = torch.Tensor(np.asarray([[0, 1, 2], [3, 4, 5]]))\n", 136 | "print(a)\n", 137 | "print(a.dim())\n", 138 | "print(a.shape)\n", 139 | "print(a.type())" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "The standard `Tensor` defaults to single precision (32 bit), independent of the initial data.\n", 147 | "\n", 148 | "The `tensor()` function, however, uses the same type as the supplied data:" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "a = torch.tensor(np.array([[0, 1, 2], [3, 4, 5]]))\n", 158 | "print(a)\n", 159 | "print(a.dim())\n", 160 | "print(a.shape)\n", 161 | "print(a.type())" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "a = torch.tensor(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]))\n", 171 | "print(a)\n", 172 | "print(a.dim())\n", 173 | "print(a.shape)\n", 174 | "print(a.type())" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "The exists a special function to create a `Tensor` from a `numpy.ndarray`:" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "a = torch.from_numpy(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], dtype=np.float32))\n", 191 | "print(a)\n", 192 | "print(a.dim())\n", 193 | "print(a.shape)\n", 194 | "print(a.type())" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "And, like in `numpy`, you can change a `Tensor`'s type:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "a = torch.from_numpy(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])).float()\n", 211 | "print(a)\n", 212 | "print(a.dim())\n", 213 | "print(a.shape)\n", 214 | "print(a.type())" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "A `torch.Tensor` is actually a wrapper around a `numpy.ndarray`:" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "a = np.arange(6).reshape(-1, 3)\n", 231 | "b = torch.from_numpy(a)\n", 232 | "print(a)\n", 233 | "print(b)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "a[:, 1] *= -1\n", 243 | "print(a)\n", 244 | "print(b)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "A type cast, however, disconnects array and tensor:" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "a = np.arange(6).reshape(-1, 3)\n", 261 | "b = torch.from_numpy(a.astype(np.float))\n", 262 | "c = torch.from_numpy(a).float()\n", 263 | "\n", 264 | "a[:, 1] = -1\n", 265 | "\n", 266 | "print(a)\n", 267 | "print(b)\n", 268 | "print(c)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "You can use `torch.Tensor`s (nearly) like `numpy.ndarray`s:" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "a = torch.arange(6).float()\n", 285 | "\n", 286 | "print(a + 1)\n", 287 | "print(a - 1)\n", 288 | "print(a * 2)\n", 289 | "print(a / 2)\n", 290 | "print(a // 2)\n", 291 | "print(a % 2)\n", 292 | "print(a**2)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Note that, unlike arrays, tensors do not change their `dtype`:" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "a = torch.LongTensor([[1, 2, 3], [4, 5, 6]])\n", 309 | "print(a / 2.0)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "`pytorch` is **really** strict about using the right data type:" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "a = torch.Tensor([[1, 2, 3], [4, 5, 6]])\n", 326 | "b = torch.Tensor([[1, 2, 3], [4, 5, 6]]).double()\n", 327 | "\n", 328 | "try:\n", 329 | " print(a + b)\n", 330 | "except Exception as e:\n", 331 | " print(type(e))\n", 332 | " print(e)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "Operating on `numpy.ndarray`s usually creates new objects:" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "a = np.ones((3, 5))\n", 349 | "b = np.exp(a)\n", 350 | "print(id(a))\n", 351 | "print(id(b))" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "To make operations **inplace**, we have to make some (small) effort:" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "b = np.exp(a, out=a)\n", 368 | "print(id(a))\n", 369 | "print(id(b))" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "print(id(a))\n", 379 | "a[:] = np.exp(a)\n", 380 | "print(id(a))" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "In `pytorch`, the situation is similar:" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": null, 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [ 396 | "a = torch.ones(3, 5)\n", 397 | "b = torch.exp(a)\n", 398 | "print(id(a))\n", 399 | "print(id(b))" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [ 408 | "b = torch.exp(a, out=a)\n", 409 | "print(id(a))\n", 410 | "print(id(b))" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "print(id(a))\n", 420 | "a[:] = torch.exp(a)\n", 421 | "print(id(a))" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "There are, however, (non-)inplace operations available as methods for `torch.Tensor`s:" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "print(id(a))\n", 438 | "print(id(a.exp_()))" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "print(id(a))\n", 448 | "print(id(a.exp()))" 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "If we have a GPU available, using the `cuda()` method moves a `torch.Tensor` onto the GPU and all subsequent calculations are performed there. With the `cpu()` method, we can get our `torch.Tensor` back from the GPU." 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [ 464 | "a = torch.randn(1000, 1000)\n", 465 | "\n", 466 | "if torch.cuda.is_available():\n", 467 | " print('We have CUDA!')\n", 468 | " a = a.cuda()\n", 469 | "else:\n", 470 | " print('No CUDA :(')\n", 471 | "\n", 472 | "a.exp_().cpu()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "What happens, though, if you call `.cuda()` on a `torch.Tensor` without having a CUDA-compatible GPU at your disposal?" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": null, 485 | "metadata": {}, 486 | "outputs": [], 487 | "source": [ 488 | "try:\n", 489 | " a.cuda()\n", 490 | "except Exception as e:\n", 491 | " print(type(e))\n", 492 | " print(e)" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "Remember the earlier programming exercises `mean(a)`, `scalar_product(a, b)`, and `linear_regression(x, y)`?\n", 500 | "\n", 501 | "Here, we refactor them for `torch.Tensor`s. We only use methods of already existing `torch.Tensor`s as well as the operators `=`, `-`, `*`, and `/`. For `linear_regression(x, y)` we then use `mean(a)` and `scalar_product(a, b)`:" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": null, 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [ 510 | "def mean(a):\n", 511 | " return a.sum().div(len(a))\n", 512 | "\n", 513 | "def scalar_product(a, b):\n", 514 | " return a.mul(b).sum()\n", 515 | "\n", 516 | "def linear_regression(x, y):\n", 517 | " x_mean = mean(x)\n", 518 | " y_mean = mean(y)\n", 519 | " x = x.sub(x_mean)\n", 520 | " y = y.sub(y_mean)\n", 521 | " slope = scalar_product(x, y) / x.pow(2).sum()\n", 522 | " const = y_mean - slope * x_mean\n", 523 | " return slope, const" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "assert -0.1 < mean(torch.randn(1000)) < 0.1\n", 533 | "assert scalar_product(torch.Tensor([0, 1, 2]), torch.Tensor([1, 1, 1])) == 3\n", 534 | "\n", 535 | "x = torch.Tensor([10, 14, 16, 15, 16, 20])\n", 536 | "y = torch.Tensor([ 1, 3, 5, 6, 5, 11])\n", 537 | "slope, const = linear_regression(x, y)\n", 538 | "assert 0.97 < slope < 0.99\n", 539 | "assert -9.72 < const < -9.70" 540 | ] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "We can make `pytorch` track which operations are performed on a `torch.Tensor` by using the `requires_grad=True` parameter:" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "a = torch.rand(3)\n", 556 | "b = torch.rand_like(a, requires_grad=True)\n", 557 | "c = torch.sum(a * b**2)\n", 558 | "print(a)\n", 559 | "print(b)\n", 560 | "print(c)" 561 | ] 562 | }, 563 | { 564 | "cell_type": "markdown", 565 | "metadata": {}, 566 | "source": [ 567 | "`pytorch` is now able to differentiate `c` with respect to `b`.\n", 568 | "\n", 569 | "Let's make use of that to solve an actual optimisation problem:" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": {}, 576 | "outputs": [], 577 | "source": [ 578 | "def rbf(x, y):\n", 579 | " \"\"\"Rosenbrock function\"\"\"\n", 580 | " return (1 - x)**2 + 100 * (y - x**2)**2\n", 581 | "\n", 582 | "\n", 583 | "xx, yy = np.meshgrid(np.linspace(-2, 2, 100), np.linspace(-1, 3, 100))\n", 584 | "zz = rbf(xx, yy)\n", 585 | "\n", 586 | "fig, ax = plt.subplots(figsize=(5, 5))\n", 587 | "ax.contour(xx, yy, zz, np.linspace(51, 2000, 20), colors='k', linewidths=0.1)\n", 588 | "ax.contourf(xx, yy, zz, np.linspace(0, 50, 20))\n", 589 | "ax.plot([-2, 2], [1, 1], '--', linewidth=1, color='C1')\n", 590 | "ax.plot([1, 1], [-1, 3], '--', linewidth=1, color='C1')\n", 591 | "ax.set_aspect('equal')\n", 592 | "ax.set_xlabel('$x$')\n", 593 | "ax.set_ylabel('$y$')\n", 594 | "fig.tight_layout()" 595 | ] 596 | }, 597 | { 598 | "cell_type": "markdown", 599 | "metadata": {}, 600 | "source": [ 601 | "We create a starting point and require a gradient for this tensor. Then, we evaluate the Rosenbrock function for this position and obtain the gradient via differentiation of the function at this position. And then, we follow the negative gradient and repeat until we converge to the global minimum.\n", 602 | "\n", 603 | "In short, we locate the minimum of the Rosenbrock function via steepest descent:" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": {}, 610 | "outputs": [], 611 | "source": [ 612 | "xy = torch.tensor([-0.3, 2.8], requires_grad=True)\n", 613 | "\n", 614 | "path, conv = [], []\n", 615 | "while True:\n", 616 | " f = rbf(*xy)\n", 617 | " path.append(xy.data.numpy().copy())\n", 618 | " conv.append(f.item())\n", 619 | " if conv[-1] < 0.00001:\n", 620 | " break\n", 621 | " f.backward()\n", 622 | " xy.data.sub_(xy.grad.data.mul_(0.0005))\n", 623 | " xy.grad.zero_()\n", 624 | "path = np.asarray(path)\n", 625 | "\n", 626 | "fig, axes = plt.subplots(1, 2, figsize=(10, 5))\n", 627 | "axes[0].plot(conv)\n", 628 | "axes[0].semilogx()\n", 629 | "axes[0].semilogy()\n", 630 | "axes[0].set_xlabel('steps')\n", 631 | "axes[0].set_ylabel('function value')\n", 632 | "axes[1].contour(xx, yy, zz, np.linspace(51, 2000, 20), colors='k', linewidths=0.1)\n", 633 | "axes[1].contourf(xx, yy, zz, np.linspace(0, 50, 20))\n", 634 | "axes[1].plot(*path.T, linewidth=3, color='C3')\n", 635 | "axes[1].plot([-2, 2], [1, 1], '--', linewidth=1, color='C1')\n", 636 | "axes[1].plot([1, 1], [-1, 3], '--', linewidth=1, color='C1')\n", 637 | "axes[1].set_aspect('equal')\n", 638 | "axes[1].set_xlabel('$x$')\n", 639 | "axes[1].set_ylabel('$y$')\n", 640 | "fig.tight_layout()" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [ 649 | "fig, axes = plt.subplots(2, 4, figsize=(12, 6))\n", 650 | "for ax, cut in zip(axes.flat, [2, 4, 11, 101, 1001, 2501, 5001, 10001]):\n", 651 | " ax.contour(xx, yy, zz, np.linspace(51, 2000, 20), colors='k', linewidths=0.1)\n", 652 | " ax.contourf(xx, yy, zz, np.linspace(0, 50, 20))\n", 653 | " ax.plot(*path[:cut].T, '-o', markersize=3, linewidth=1, color='C3')\n", 654 | " ax.plot([-2, 2], [1, 1], '--', linewidth=1, color='C1')\n", 655 | " ax.plot([1, 1], [-1, 3], '--', linewidth=1, color='C1')\n", 656 | " ax.set_aspect('equal')\n", 657 | " ax.text(-1.9, -0.8, f'steps: {cut - 1}', fontsize=15)\n", 658 | " ax.set_axis_off()\n", 659 | "fig.tight_layout()" 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": null, 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [] 668 | } 669 | ], 670 | "metadata": { 671 | "kernelspec": { 672 | "display_name": "Python 3", 673 | "language": "python", 674 | "name": "python3" 675 | }, 676 | "language_info": { 677 | "codemirror_mode": { 678 | "name": "ipython", 679 | "version": 3 680 | }, 681 | "file_extension": ".py", 682 | "mimetype": "text/x-python", 683 | "name": "python", 684 | "nbconvert_exporter": "python", 685 | "pygments_lexer": "ipython3", 686 | "version": "3.7.0" 687 | } 688 | }, 689 | "nbformat": 4, 690 | "nbformat_minor": 2 691 | } 692 | -------------------------------------------------------------------------------- /notebooks/python-intro-2-flow-control-and-functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to Python II: flow control and functions\n", 8 | "\n", 9 | "## Content\n", 10 | "- What are conditional expressions?\n", 11 | "- What are loops?\n", 12 | "- How can I write functions?\n", 13 | "- How to use list comprehensions?\n", 14 | "\n", 15 | "## Remember jupyter notebooks\n", 16 | "- To run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter.\n", 17 | "- To get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab.\n", 18 | "\n", 19 | "## Comparision operations\n", 20 | "We can use `==` to check whether two objects are equal or `!=` whether the are unequal:" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "print(1 == 1)\n", 30 | "print(1 != 1)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "Similar, we can check whether a variable is greater or smaller than another variable or value:" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "a = 1\n", 47 | "print(a > 0)\n", 48 | "print(a < 0)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "And we can check greater/equal or smaller/equal:" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "print(a >= 0)\n", 65 | "print(a <= 0)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Multiple comparisions can by chained together or modified with the boolean operators `and`, `or`, and `not`:" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "print(0 < a and a < 2)\n", 82 | "print(0 < a or 0 > a)\n", 83 | "print(not 0 < a)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Python even allows to write multiple comparisions in this compact manner:" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "print(0 < a < 2)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "Two variables can have equal values but still be different objects. To check whether two variables/references point to the same object, we can use the `is` operator:" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "print(a is 1)\n", 116 | "print(a is not 1)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "a = [1]\n", 126 | "b = a\n", 127 | "print(a == b, a is b)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "a = [1]\n", 137 | "b = [1]\n", 138 | "print(a == b, a is b)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "## Conditional expressions\n", 146 | "With comparision operations available, we can make our code behave differently depending on the current situation. With the `if` statement, we can write make the execution of a piece of code conditional:" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "if True:\n", 156 | " print('The first condition is true')\n", 157 | "\n", 158 | "if False:\n", 159 | " print('Thes second is not')" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "We have three new concepts in the above cell:\n", 167 | "1. The `if` statement is followed by some condition.\n", 168 | "1. `True` and `False` are two `constants` indicating a logical true or false.\n", 169 | "1. Python groups code blocks by indention! The indented code after the `if` statement is only run if the condition is true.\n", 170 | "\n", 171 | "Here is another example:" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "a = [0, 1, 2, 3, 4]\n", 181 | "\n", 182 | "if len(a) < 3:\n", 183 | " print('a has less than 3 elements')\n", 184 | "else:\n", 185 | " print('a has three or more elements')" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "Here, we have added a default recation to the `if` statement. If the condition is false, the first code block is not executed. In this case, the second block will run.\n", 193 | "\n", 194 | "Another example:" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "a = [0, 1, 2, 3, 4]\n", 204 | "\n", 205 | "if len(a) < 3:\n", 206 | " print('a has less than 3 elements')\n", 207 | "elif len(a) > 3:\n", 208 | " print('a has more than 3 elements')\n", 209 | "else:\n", 210 | " print('a has 3 elements')" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "We have now added a second condition check to our `if` statement: if the first condition is false, the second condition is checked. Only if all explicitly given conditions are false, the default block is executed.\n", 218 | "\n", 219 | "You can chain an arbitrary number of conditions in an `if` statement:" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "a = 5\n", 229 | "\n", 230 | "if a == 0:\n", 231 | " print(0)\n", 232 | "elif a == 1:\n", 233 | " print(1)\n", 234 | "elif a == 2:\n", 235 | " print(2)\n", 236 | "elif a == 3:\n", 237 | " print(3)\n", 238 | "elif a == 4:\n", 239 | " print(4)\n", 240 | "elif a == 5:\n", 241 | " print(5)\n", 242 | "elif a == 6:\n", 243 | " print(6)\n", 244 | "elif a == 7:\n", 245 | " print(7)\n", 246 | "else:\n", 247 | " print('>7')" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "This works but is terribly inefficient! To save coding effort on this repetitive task, we can use a construct called loop...\n", 255 | "\n", 256 | "## Loops\n", 257 | "We can tell Python to repeat a code block for each element in a given sequence. This is called a `for` loop:" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "for value in range(5):\n", 267 | " print(value)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "The inefficient `if` statement can now be written as:" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "a = 73\n", 284 | "\n", 285 | "for value in range(10000):\n", 286 | " if a == value:\n", 287 | " print(value)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "This is still a very bad solution but much more easy to write than the first variant.\n", 295 | "\n", 296 | "A `for` loop iterates over a given sequence and performs the defined task for each element. This could be a `range` of `int`s as shown above or values stored in a `list`, `tuple` or `set`:" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "for value in ['one', 'two', 'three']:\n", 306 | " print(value)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "for value in ({0, 1, 2}, 'This is a string', dict(soime_key='some_value')):\n", 316 | " print(value, type(value))" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "for value in {'one', 2, 'three', 4}:\n", 326 | " print(value)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "#### Exercise\n", 334 | "What happens if you iterate over a `dict`?" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "There are some useful modifiers for `for` loops. `reversed()` reverses the order of the `list` or `tuple`:" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "for value in reversed(range(5)):\n", 358 | " print(value)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "`enumerate` gives you the index along with the value:" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "for index, value in enumerate(['one', 'two', 'three', 'four']):\n", 375 | " print(index, value)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "`zip` allows you to iterate over two sequences simulatenously:" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "for value1, value2 in zip(['one', 'two', 'three'], ('ball', 'people', 'hours of fun')):\n", 392 | " print(value1, value2)" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "#### Exercise\n", 400 | "You can even chain these modifiers. Here is an example:\n", 401 | "\n", 402 | "```Python\n", 403 | "for index, (value1, value2) in enumerate(zip(['one', 'two'], ('ball', 'people'))):\n", 404 | " print(index, value1, value2)\n", 405 | "```\n", 406 | "\n", 407 | "Now try it out for yourself:" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "If we do not know the sequence to iterate over but have a condition to decide whether to run or stop, we can use a `while` loop:" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "a = 0\n", 431 | "while a < 5:\n", 432 | " print(a)\n", 433 | " a += 1" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": {}, 439 | "source": [ 440 | "Let's use that to compute a mathematical sequence: the Fibonacci numbers\n", 441 | "\n", 442 | "$$f_i = f_{i-1} + f_{i-2},\\quad i\\geq2,\\, f_0=f_1=1$$\n", 443 | "\n", 444 | "We list all Fibonacci numbers greater than $1$ and smaller than $100$:" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": {}, 451 | "outputs": [], 452 | "source": [ 453 | "a, b = 1, 1\n", 454 | "while True:\n", 455 | " a, b = a + b, a\n", 456 | " if a < 100:\n", 457 | " print(a)\n", 458 | " else:\n", 459 | " break" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "Here, we have used a condition for the `while` loop which is always true and use an `if` statement within the loop's body to `break` the flow.\n", 467 | "\n", 468 | "The `break` command also works with a `for` loop and terminates the whole iteration immediately once encountered. Another useful command is `continue`: once encountered the rest of the loop's body is skipped and the loop enters the next step in the iteration." 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [ 477 | "for value in range(100):\n", 478 | " if value < 70:\n", 479 | " continue\n", 480 | " print(value)\n", 481 | " if value > 75:\n", 482 | " break" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "## Functions\n", 490 | "We already have used a number builtin functions: `print()`, `type()`, `id()`, `list()`, `set()`, `tuple()`, `range()`, `sorted()`, `dict()`, `len()`, `reversed()`, `enumerate()`, and `zip()`. A function is a piece of code which is executed wherever the name of the function is used.\n", 491 | "\n", 492 | "To write your own function, we need the `def` command:" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "def function1():\n", 502 | " print('function1 was called')" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "metadata": {}, 509 | "outputs": [], 510 | "source": [ 511 | "function1()" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": {}, 517 | "source": [ 518 | "A function may depend on one or more parameters (variables); this needs to be specified in the function's definition:" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": {}, 525 | "outputs": [], 526 | "source": [ 527 | "def function2(value):\n", 528 | " print('function2 was called with parameter ' + str(value))\n", 529 | "\n", 530 | "\n", 531 | "function2(1)\n", 532 | "function2([0, 1, 2])" 533 | ] 534 | }, 535 | { 536 | "cell_type": "markdown", 537 | "metadata": {}, 538 | "source": [ 539 | "A remark on namespaces: function parameters or variables defined within a function's body are local to the function:" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [ 548 | "a, b = 1, 2\n", 549 | "\n", 550 | "\n", 551 | "def function3(a):\n", 552 | " b = 8\n", 553 | " print(a, b)\n", 554 | "\n", 555 | "\n", 556 | "function3(7)\n", 557 | "print(a, b)" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": [ 564 | "If we **reference** a variable which is not defined within the function's body, it is assumed to be a global variable:" 565 | ] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": null, 570 | "metadata": {}, 571 | "outputs": [], 572 | "source": [ 573 | "global_variable = 'This is global'\n", 574 | "\n", 575 | "\n", 576 | "def function4(parameter):\n", 577 | " print(parameter)\n", 578 | " print(global_variable)\n", 579 | "\n", 580 | "\n", 581 | "function4('This is local')" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "def polynomial(x):\n", 591 | " return 2.0 * x**3 - 3.0 * x**2 + 1" 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "Here is an example of a function which takes a sequence and returns a list where every element is squared:" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "def square(a):\n", 608 | " b = []\n", 609 | " for value in a:\n", 610 | " b.append(value**2)\n", 611 | " return b\n", 612 | "\n", 613 | "\n", 614 | "print(square(range(1, 11)))" 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "#### Exercise\n", 622 | "Implement the polynomial\n", 623 | "\n", 624 | "$$p(x) = 2x^3 - 3x^2 + 1$$\n", 625 | "\n", 626 | "by completing the following stub:" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": {}, 633 | "outputs": [], 634 | "source": [ 635 | "def polynomial(x):\n", 636 | " result = None\n", 637 | " return result\n", 638 | "\n", 639 | "\n", 640 | "print(polynomial(0), polynomial(0.5), polynomial(1))" 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "metadata": {}, 646 | "source": [ 647 | "## List comprehensions\n", 648 | "List comprehensions represent a concise and efficient way of creating lists, where each element can be defined in terms of a simpler sequence and/or conditions. For example, a list with the square numbers from $1$ to $100$ can be constructed as:" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "metadata": {}, 655 | "outputs": [], 656 | "source": [ 657 | "print([value**2 for value in range(1, 11)])" 658 | ] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": [ 664 | "The pattern is\n", 665 | "```Python\n", 666 | "[transformation(x) for x in sequence_of_x]\n", 667 | "```\n", 668 | "\n", 669 | "This can further be combined with one or more conditions such that only `x` values for which the condition is true go into the `list`. As an example, let's use only even squares between $1$ and $100$:" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": null, 675 | "metadata": {}, 676 | "outputs": [], 677 | "source": [ 678 | "print([value**2 for value in range(1, 11) if value % 2 == 0])" 679 | ] 680 | }, 681 | { 682 | "cell_type": "markdown", 683 | "metadata": {}, 684 | "source": [ 685 | "#### Exercise\n", 686 | "Can you do a `tuple` or `set` comprehension?" 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "execution_count": null, 692 | "metadata": {}, 693 | "outputs": [], 694 | "source": [] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "Example: `dict` comprehension" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": null, 706 | "metadata": {}, 707 | "outputs": [], 708 | "source": [ 709 | "print({x: x**2 for x in range(1, 10)})" 710 | ] 711 | }, 712 | { 713 | "cell_type": "markdown", 714 | "metadata": {}, 715 | "source": [ 716 | "#### Exercise\n", 717 | "Use a `dict` comprehension to create a dictionary of pairs of `x: x**2` pairs where the keys `x` are all numbers between $5$ and $500$ which are divisible by $7$ but not by $5$." 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": null, 723 | "metadata": {}, 724 | "outputs": [], 725 | "source": [] 726 | }, 727 | { 728 | "cell_type": "markdown", 729 | "metadata": {}, 730 | "source": [ 731 | "#### Exercise\n", 732 | "Create a `set` with all prime numbers between $5$ and $500$ (included). Show the the cut set (intersection) between this set and the set of keys from the previous exercise contains only one number. Which number is that?" 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": null, 738 | "metadata": {}, 739 | "outputs": [], 740 | "source": [] 741 | } 742 | ], 743 | "metadata": { 744 | "kernelspec": { 745 | "display_name": "Python 3", 746 | "language": "python", 747 | "name": "python3" 748 | }, 749 | "language_info": { 750 | "codemirror_mode": { 751 | "name": "ipython", 752 | "version": 3 753 | }, 754 | "file_extension": ".py", 755 | "mimetype": "text/x-python", 756 | "name": "python", 757 | "nbconvert_exporter": "python", 758 | "pygments_lexer": "ipython3", 759 | "version": "3.7.0" 760 | }, 761 | "toc": { 762 | "base_numbering": 1, 763 | "nav_menu": {}, 764 | "number_sections": false, 765 | "sideBar": true, 766 | "skip_h1_title": false, 767 | "title_cell": "Table of Contents", 768 | "title_sidebar": "Contents", 769 | "toc_cell": false, 770 | "toc_position": {}, 771 | "toc_section_display": true, 772 | "toc_window_display": false 773 | } 774 | }, 775 | "nbformat": 4, 776 | "nbformat_minor": 2 777 | } 778 | -------------------------------------------------------------------------------- /notebooks/python-intro-4-oop-iterators-generators.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to Python IV: OOP, iterators, and generators\n", 8 | "\n", 9 | "## Content\n", 10 | "- What are classes and how to use/build them?\n", 11 | "- What are iterators and generators and what's the difference?\n", 12 | "- Some thoughts about testing...\n", 13 | "\n", 14 | "## Remember jupyter notebooks\n", 15 | "- To run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter.\n", 16 | "- To get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "%matplotlib inline\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import numpy as np" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Object-oriented programming (OOP)\n", 35 | "A `class` is a blueprint for a container like data structure containing variables (**attributes**) and functions (**methods**). An `object` is an actual instance of a `class`.\n", 36 | "\n", 37 | "**Example**: let's build a rotation matrix, i.e.,\n", 38 | "\n", 39 | "$$\\mathbf{R}(\\phi) = \\begin{pmatrix} \\cos(\\phi) & -\\sin(\\phi) \\\\ \\sin(\\phi) & \\cos(\\phi) \\end{pmatrix},$$\n", 40 | "\n", 41 | "which rotates a vector $\\mathbf{r}\\in\\mathbb{R}^2$ by the angle $\\phi$.\n", 42 | "\n", 43 | "As a function, it could look like this:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "def rotate(vector, angle):\n", 53 | " \"\"\"Rotate the given vector by the given angle\"\"\"\n", 54 | " rad = np.pi * angle / 180\n", 55 | " rotation = np.array([\n", 56 | " [np.cos(rad), -np.sin(rad)],\n", 57 | " [np.sin(rad), np.cos(rad)]])\n", 58 | " return np.dot(rotation, np.asarray(vector))\n", 59 | "\n", 60 | "\n", 61 | "print(rotate([1, 0], 90))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Now, let's build a class to encapsulate this behaviour!" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "class RotationMatrix(object):\n", 78 | " \"\"\"This will rotate a 2D vector by some angle phi\"\"\"\n", 79 | " \n", 80 | " def __init__(self, angle):\n", 81 | " \"\"\"\n", 82 | " Arguments:\n", 83 | " angle (float): angle in °\n", 84 | " \"\"\"\n", 85 | " self.angle = angle % 360\n", 86 | "\n", 87 | "\n", 88 | "rotation = RotationMatrix(90)\n", 89 | "print(rotation.angle)\n", 90 | "print(rotation)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Printing `rotation` tells us that it is an **object** as well as its address — but nothing more. To learn about its actual angle, we need to look up the `angle` **attribute**." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "class RotationMatrix(object):\n", 107 | " \"\"\"This will rotate a 2D vector by some angle phi\"\"\"\n", 108 | " \n", 109 | " def __init__(self, angle):\n", 110 | " \"\"\"\n", 111 | " Arguments:\n", 112 | " angle (float): angle in °\n", 113 | " \"\"\"\n", 114 | " self.angle = angle % 360\n", 115 | "\n", 116 | " def __repr__(self):\n", 117 | " \"\"\"Represent the object by some informative string\"\"\"\n", 118 | " return f'Rotation by {self.angle:.2f}°'\n", 119 | "\n", 120 | " \n", 121 | "rotation = RotationMatrix(90)\n", 122 | "print(rotation)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "The `__repr__` method is one of many **magic methods** in Python. These methods have two leading and trailing underscores and are called in very specific circumstances, e.g., when we **initialise** the object (`__init__`) or if we attempt to **print** the object (`__repr__`)." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "rotation2 = RotationMatrix(-90)\n", 139 | "print(rotation2)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "Much better! But remember that our class should represent a **matrix**, and matrices can be multiplied..." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "rotation * rotation2" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "We want to make matrix multiplication available. For that, we need another **magic method**, `__mul__`:" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "class RotationMatrix(object):\n", 172 | " \"\"\"This will rotate a 2D vector by some angle phi\"\"\"\n", 173 | " \n", 174 | " def __init__(self, angle):\n", 175 | " \"\"\"\n", 176 | " Arguments:\n", 177 | " angle (float): angle in °\n", 178 | " \"\"\"\n", 179 | " self.angle = angle % 360\n", 180 | "\n", 181 | " def __repr__(self):\n", 182 | " \"\"\"Represent the object by some informative string\"\"\"\n", 183 | " return f'Rotation by {self.angle:.2f}°'\n", 184 | " \n", 185 | " def __mul__(self, other):\n", 186 | " \"\"\"Multiply two rotations by adding their angles\"\"\"\n", 187 | " return RotationMatrix(self.angle + other.angle)\n", 188 | "\n", 189 | "\n", 190 | "rotation = RotationMatrix(90)\n", 191 | "print(rotation)\n", 192 | "\n", 193 | "rotation2 = RotationMatrix(-90)\n", 194 | "print(rotation2)\n", 195 | "\n", 196 | "print(rotation * rotation2)\n", 197 | "print(rotation.__mul__(rotation2))" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "Here, we have used that a product of two rotation matrices is another rotation matrix where the angle is the sum of the two original angles.\n", 205 | "\n", 206 | "Note that we now have (rotation) matrix multiplication available without even having implemented the actual rotation of a vector..." 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "class RotationMatrix(object):\n", 216 | " \"\"\"This will rotate a 2D vector by some angle phi\"\"\"\n", 217 | " \n", 218 | " def __init__(self, angle):\n", 219 | " \"\"\"\n", 220 | " Arguments:\n", 221 | " angle (float): angle in °\n", 222 | " \"\"\"\n", 223 | " self.angle = angle % 360\n", 224 | " self.rad = np.pi * self.angle / 180\n", 225 | " self.rotation_matrix = np.array([\n", 226 | " [np.cos(self.rad), -np.sin(self.rad)],\n", 227 | " [np.sin(self.rad), np.cos(self.rad)]])\n", 228 | " \n", 229 | " def __repr__(self):\n", 230 | " \"\"\"Represent the object by some informative string\"\"\"\n", 231 | " return f'Rotation by {self.angle:.2f}°'\n", 232 | " \n", 233 | " def __mul__(self, other):\n", 234 | " \"\"\"Multiply two rotations by adding their angles\"\"\"\n", 235 | " return RotationMatrix(self.angle + other.angle)\n", 236 | "\n", 237 | " def rotate(self, vector):\n", 238 | " \"\"\"Rotate the vector\"\"\"\n", 239 | " return np.dot(self.rotation_matrix, np.asarray(vector))\n", 240 | "\n", 241 | "\n", 242 | "rotation = RotationMatrix(90)\n", 243 | "print(rotation.rotate([1, 0]))" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "And now" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "class RotationMatrix2(RotationMatrix):\n", 260 | " \"\"\"This is an example on inheritance\"\"\"\n", 261 | "\n", 262 | " def __mul__(self, other):\n", 263 | " \"\"\"Multiply two rotations by adding their angles\"\"\"\n", 264 | " return RotationMatrix2(self.angle + other.angle)\n", 265 | "\n", 266 | " def __call__(self, vector):\n", 267 | " \"\"\"Rotate the vector\"\"\"\n", 268 | " return self.rotate(vector)\n", 269 | "\n", 270 | "\n", 271 | "r1 = RotationMatrix2(180)\n", 272 | "r2 = RotationMatrix(-90)\n", 273 | "\n", 274 | "print(isinstance(r1, RotationMatrix), isinstance(r1, RotationMatrix2))\n", 275 | "print(isinstance(r2, RotationMatrix), isinstance(r2, RotationMatrix2))" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "print(isinstance(r1 * r2, RotationMatrix), isinstance(r1 * r2, RotationMatrix2))\n", 285 | "print(isinstance(r2 * r1, RotationMatrix), isinstance(r2 * r1, RotationMatrix2))" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "print(r1 * r2)\n", 295 | "print((r1 * r2)([1, 0]))" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "**Example**: Some of Python's **magic methods** and their corresponding binary/unary operators:" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "class Tester(object):\n", 312 | " def __add__(self, value):\n", 313 | " print(f'__add__({value})')\n", 314 | " def __sub__(self, value):\n", 315 | " print(f'__sub__({value})')\n", 316 | " def __mul__(self, value):\n", 317 | " print(f'__mul__({value})')\n", 318 | " def __truediv__(self, value):\n", 319 | " print(f'__truediv__({value})')\n", 320 | " def __floordiv__(self, value):\n", 321 | " print(f'__floordiv__({value})')\n", 322 | " def __mod__(self, value):\n", 323 | " print(f'__mod__({value})')\n", 324 | " def __pow__(self, value):\n", 325 | " print(f'__pow__({value})')\n", 326 | " def __and__(self, value):\n", 327 | " print(f'__and__({value})')\n", 328 | " def __or__(self, value):\n", 329 | " print(f'__or__({value})')\n", 330 | " def __xor__(self, value):\n", 331 | " print(f'__xor__({value})')\n", 332 | " def __eq__(self, value):\n", 333 | " print(f'__eq__({value})')\n", 334 | " def __neq__(self, value):\n", 335 | " print(f'__neq__({value})')\n", 336 | " def __gt__(self, value):\n", 337 | " print(f'__gt__({value})')\n", 338 | " def __ge__(self, value):\n", 339 | " print(f'__ge__({value})')\n", 340 | " def __lt__(self, value):\n", 341 | " print(f'__lt__({value})')\n", 342 | " def __le__(self, value):\n", 343 | " print(f'__le__({value})')\n", 344 | " def __radd__(self, value):\n", 345 | " print(f'__radd__({value})')\n", 346 | " def __iadd__(self, value):\n", 347 | " print(f'__iadd__({value})')\n", 348 | "\n", 349 | "\n", 350 | "t = Tester()\n", 351 | "t + 1\n", 352 | "t - 1\n", 353 | "t * 1\n", 354 | "t / 1\n", 355 | "t // 1\n", 356 | "t % 1\n", 357 | "t ** 1\n", 358 | "t & 1\n", 359 | "t | 1\n", 360 | "t ^ 1\n", 361 | "t == 1\n", 362 | "t != 1\n", 363 | "t > 1\n", 364 | "t >= 1\n", 365 | "t < 1\n", 366 | "t <= 1\n", 367 | "1 + t\n", 368 | "t += 1" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "## Iterators and generators\n", 376 | "We revisit the Fibonacci problem:\n", 377 | "\n", 378 | "$$f_i = f_{i-1} + f_{i-2},\\quad i\\geq2,\\, f_0=f_1=1.$$\n", 379 | "\n", 380 | "\n", 381 | "In the second introduction notebook, we *solved* this problem with the code snippet\n", 382 | "\n", 383 | "```Python\n", 384 | "a, b = 1, 1\n", 385 | "while True:\n", 386 | " a, b = a + b, a\n", 387 | " if a < 100:\n", 388 | " print(a)\n", 389 | " else:\n", 390 | " break\n", 391 | "```\n", 392 | "\n", 393 | "Now, we create an `iterator` which we could use in a `for` loop:" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "class Fibonacci(object):\n", 403 | " \"\"\"An iterator over Fibonacci numbers\"\"\"\n", 404 | " \n", 405 | " def __init__(self, stop):\n", 406 | " \"\"\"Arguments:\n", 407 | " stop (int): endpoint for the iteration\n", 408 | " \"\"\"\n", 409 | " self.stop = stop\n", 410 | " self.a, self.b = 1, 1\n", 411 | " \n", 412 | " def __iter__(self):\n", 413 | " \"\"\"This is what make this class an iterator\"\"\"\n", 414 | " return self\n", 415 | " \n", 416 | " def __next__(self):\n", 417 | " \"\"\"This is what actually computes the next number\"\"\"\n", 418 | " self.a, self.b = self.a + self.b, self.a\n", 419 | " if self.a < self.stop:\n", 420 | " return self.a\n", 421 | " raise StopIteration\n", 422 | "\n", 423 | "\n", 424 | "fib = Fibonacci(100)\n", 425 | "print(fib)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [ 434 | "for value in fib:\n", 435 | " print(value)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "print(list(Fibonacci(100)))" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": {}, 451 | "outputs": [], 452 | "source": [ 453 | "fib = Fibonacci(100)\n", 454 | "for i in range(70):\n", 455 | " print(i, next(fib))" 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "metadata": {}, 461 | "source": [ 462 | "Hence, an `iterator` is a `class` which has the methods `__iter__()` and `__next__()`.\n", 463 | "\n", 464 | "A `generator` behaves very similar but does not require to write a `class`. The key component here is the `yield` command:" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "def fibonacci(stop):\n", 474 | " \"\"\"Generate Fibonacci numbers\n", 475 | " \n", 476 | " Arguments:\n", 477 | " stop (int): endpoint for the generation\n", 478 | " \"\"\"\n", 479 | " a, b = 1, 1\n", 480 | " while True:\n", 481 | " a, b = a + b, a\n", 482 | " if a < stop:\n", 483 | " yield a\n", 484 | " else:\n", 485 | " break\n", 486 | "\n", 487 | "\n", 488 | "print(fibonacci(100))" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": null, 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "for value in fibonacci(100):\n", 498 | " print(value)" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [ 507 | "print(list(fibonacci(100)))" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "fib = fibonacci(100)\n", 517 | "for i in range(10):\n", 518 | " print(i, next(fib))" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": {}, 524 | "source": [ 525 | "## Some thoughts about testing...\n", 526 | "\n", 527 | "When using the **test-driven development** pattern, we want to make sure that testing each piece of code plays a central role in the entire development process. The cycle reads\n", 528 | "\n", 529 | "1. define a new interface (function, class, ...) changed functionality\n", 530 | "2. design conditions under which you can predict the behaviour\n", 531 | "3. write tests to check these predictions\n", 532 | "4. implement a quick prototype which passes the tests\n", 533 | "5. refactor the prototype for readability, speed, ...\n", 534 | "6. start over at 1.\n", 535 | "\n", 536 | "In practise, we can use `assert` to test predicted behaviour:" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [ 545 | "assert True, 'This is True and, thus, not interesting!'" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": {}, 552 | "outputs": [], 553 | "source": [ 554 | "assert False, 'This is False and very interesting!'" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": {}, 560 | "source": [ 561 | "**Example**: we want to write a generator which yields all neighbouring intergers up to its (excluded) stopping point, i.e.,\n", 562 | "\n", 563 | "$$[0, 1, 2, 3, 4, ..., n - 1] \\quad \\to \\quad [(0, 1), (1, 2), (2, 3), ..., (n - 2, n - 1)]$$\n", 564 | "\n", 565 | "for the use case `pairs(n)`.\n", 566 | "\n", 567 | "We start the development process by writing some test function and the interface and convince ourselfs that the test fails:" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": null, 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": [ 576 | "def test_pairs(generator):\n", 577 | " for x, y in generator:\n", 578 | " assert y == x + 1, f'Failure for ({x}, {y})'\n", 579 | "\n", 580 | "\n", 581 | "def pairs(stop):\n", 582 | " pass\n", 583 | "\n", 584 | "\n", 585 | "test_pairs(pairs(100))" 586 | ] 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": {}, 591 | "source": [ 592 | "Now we start prototyping and check the test after every change:" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "def pairs(stop):\n", 602 | " for x in range(stop):\n", 603 | " yield x\n", 604 | "\n", 605 | "\n", 606 | "test_pairs(pairs(100))" 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": {}, 612 | "source": [ 613 | "Like for the empty interface, the `assert` is not even evaluated. This, too, is an important test result." 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "metadata": {}, 620 | "outputs": [], 621 | "source": [ 622 | "def pairs(stop):\n", 623 | " for x in range(stop):\n", 624 | " yield x, x\n", 625 | "\n", 626 | "\n", 627 | "test_pairs(pairs(100))" 628 | ] 629 | }, 630 | { 631 | "cell_type": "markdown", 632 | "metadata": {}, 633 | "source": [ 634 | "Now we can evaluate the pairs, but they are incorrect." 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": null, 640 | "metadata": {}, 641 | "outputs": [], 642 | "source": [ 643 | "def pairs(stop):\n", 644 | " for x in range(stop):\n", 645 | " yield x, x + 1\n", 646 | "\n", 647 | "\n", 648 | "test_pairs(pairs(100))" 649 | ] 650 | }, 651 | { 652 | "cell_type": "markdown", 653 | "metadata": {}, 654 | "source": [ 655 | "No traceback, no exception, great! Are we done?\n", 656 | "\n", 657 | "No, because our test was not really exhaustive and has neglected some part of the interface specifications. We, thus, need better tests!" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "list(pairs(5))" 667 | ] 668 | }, 669 | { 670 | "cell_type": "markdown", 671 | "metadata": {}, 672 | "source": [ 673 | "In a test suite, you often find test functions which bundle a number of tests for a specific function, testing various aspects of the function's behaviour:" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": null, 679 | "metadata": {}, 680 | "outputs": [], 681 | "source": [ 682 | "def test_pair_generator():\n", 683 | " test_pairs(pairs(100))\n", 684 | " assert len(list(pairs(100))) == 99, 'Generator too long'\n", 685 | "\n", 686 | "\n", 687 | "test_pair_generator()" 688 | ] 689 | }, 690 | { 691 | "cell_type": "markdown", 692 | "metadata": {}, 693 | "source": [ 694 | "And, finally," 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": null, 700 | "metadata": {}, 701 | "outputs": [], 702 | "source": [ 703 | "def pairs(stop):\n", 704 | " for x in range(stop - 1):\n", 705 | " yield x, x + 1\n", 706 | "\n", 707 | "\n", 708 | "test_pair_generator()" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "metadata": {}, 714 | "source": [ 715 | "we have something which **might** actually work as we intend it to work.\n", 716 | "\n", 717 | "If you want to write good tests, don't give in to **confirmation bias** just check that your function gives you the expected result for very few/easy cases. You should try to actually **break** your function, feed it improper parameters, and check that it breaks down where it should." 718 | ] 719 | } 720 | ], 721 | "metadata": { 722 | "kernelspec": { 723 | "display_name": "Python 3", 724 | "language": "python", 725 | "name": "python3" 726 | }, 727 | "language_info": { 728 | "codemirror_mode": { 729 | "name": "ipython", 730 | "version": 3 731 | }, 732 | "file_extension": ".py", 733 | "mimetype": "text/x-python", 734 | "name": "python", 735 | "nbconvert_exporter": "python", 736 | "pygments_lexer": "ipython3", 737 | "version": "3.7.0" 738 | } 739 | }, 740 | "nbformat": 4, 741 | "nbformat_minor": 2 742 | } 743 | -------------------------------------------------------------------------------- /notebooks/python-intro-3-numpy-and-matplotlib.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to Python III: numpy and matplotlib\n", 8 | "\n", 9 | "\n", 10 | "## Content\n", 11 | "- numpy.ndarray creation and usage\n", 12 | "- basic plotting with matplotlib\n", 13 | "\n", 14 | "## Remember jupyter notebooks\n", 15 | "- To run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter.\n", 16 | "- To get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab.\n", 17 | "\n", 18 | "## A notebook \"preamble\"\n", 19 | "The forst code block prepares our notebook by specifying how to render plots and importing two required packages." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "%matplotlib inline\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "import numpy as np" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## ndarray: numpy's central data structure" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "a = list(range(5))\n", 47 | "print(a, type(a))" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "b = np.asarray(a)\n", 57 | "print(b, type(b))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "a = [[0, 1, 2], [3, 4, 5]]\n", 67 | "print(a, type(a))" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "b = np.asarray(a)\n", 77 | "print(b, type(b))" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "print(b.size)\n", 87 | "print(b.ndim)\n", 88 | "print(b.shape)\n", 89 | "print(b.dtype)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "How does `numpy` select the appropriate `dtype`?" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "a = np.array([0, 1])\n", 106 | "print(a, a.dtype)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "a = np.array([0, 1, 2.0])\n", 116 | "print(a, a.dtype)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "a = np.array([0, 1, 2.0, 3+0j])\n", 126 | "print(a, a.dtype)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "a = np.array([0, 1, 2.0, 3+0j, 'four'])\n", 136 | "print(a, a.dtype)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "a = np.array([0, 1, 2.0, 3+0j, 'four', None])\n", 146 | "print(a, a.dtype)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "Creating arrays with \"default\" values." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "a = np.zeros((2, 3, 4), dtype=np.float64)\n", 163 | "print(a)\n", 164 | "print(a.size, a.ndim, a.shape)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "print(np.ones((4, 3, 2), dtype=np.int))" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "a = np.arange(16)\n", 183 | "print(a)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "The `shape` can be changed..." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "a = a.reshape(-1, 4)\n", 200 | "print(a)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "a.reshape(-1)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "... and the `dtype`, too:" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "a = a.astype(np.float64)\n", 226 | "print(a)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "You can index like a nested `list`/`tuple`..." 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "print(a[-1][0])" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "... or via the `numpy` way:" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "print(a[-1, 0])" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "Slicing works, too:" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "print(a[:, 0])\n", 275 | "print(a[0, :])" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "Even for assignments!" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "a[:, -1] *= -1\n", 292 | "print(a)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "You can (implicitly) iterate over the first index:" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "for b in a:\n", 309 | " print(b)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "for b in a.T:\n", 319 | " print(b)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": {}, 325 | "source": [ 326 | "Can we add/subtract/... somthing to/from an array?" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "a = np.arange(5)\n", 336 | "print(a)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "print(a + 1)" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "print(a - 1.0)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "print(a * 1+0j)" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "print(a / 2)" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "print(a // 2)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "print(a**2)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "print(a % 2)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "What about adding/... two arrays?" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [ 415 | "b = np.ones(a.size) * 2\n", 416 | "print(b)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "print(a + b)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [ 434 | "print(a - b)" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "print(a * b)" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "print(a / b)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "We can evaluate function on the whole array in one step:" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "print(np.sqrt(a))" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [ 477 | "print(np.exp(a))" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": {}, 484 | "outputs": [], 485 | "source": [ 486 | "print(np.log(a + 1))" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": {}, 493 | "outputs": [], 494 | "source": [ 495 | "print(np.sin(a))" 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": {}, 501 | "source": [ 502 | "Summations/multiplications over the whole array or selected axes are possible:" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "metadata": {}, 509 | "outputs": [], 510 | "source": [ 511 | "a = np.ones((3, 5))\n", 512 | "print(a)\n", 513 | "print(a.sum())\n", 514 | "print(a.sum(axis=0))\n", 515 | "print(a.sum(axis=1))" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "a = np.ones((3, 5)) * 2\n", 525 | "print(a.prod())\n", 526 | "print(a.prod(axis=0))\n", 527 | "print(a.prod(axis=1))" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": null, 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [ 536 | "a = np.ones((5, 3))\n", 537 | "print(np.sqrt(np.sum(a**2, axis=-1)))" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": {}, 544 | "outputs": [], 545 | "source": [ 546 | "print(np.linalg.norm(a, axis=-1))" 547 | ] 548 | }, 549 | { 550 | "cell_type": "markdown", 551 | "metadata": {}, 552 | "source": [ 553 | "## Vectorisation\n", 554 | "Computing distances can be an expensive task as it is $\\mathcal{O}(N^2)$." 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": null, 560 | "metadata": {}, 561 | "outputs": [], 562 | "source": [ 563 | "def get_distances(coordinates):\n", 564 | " distances = np.zeros((len(coordinates), len(coordinates)))\n", 565 | " for i in range(len(coordinates)):\n", 566 | " for j in range(len(coordinates)):\n", 567 | " distances[i, j] = np.linalg.norm(\n", 568 | " coordinates[i] - coordinates[j],\n", 569 | " axis=-1)\n", 570 | " return distances\n", 571 | "\n", 572 | "\n", 573 | "coordinates = np.random.rand(1000, 3)\n", 574 | "%timeit get_distances(coordinates)" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": {}, 580 | "source": [ 581 | "We can, of course, exploit symmetry:" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "def get_distances2(coordinates):\n", 591 | " distances = np.zeros((len(coordinates), len(coordinates)))\n", 592 | " for i in range(1, len(coordinates)):\n", 593 | " for j in range(i):\n", 594 | " distances[i, j] = np.linalg.norm(\n", 595 | " coordinates[i] - coordinates[j],\n", 596 | " axis=-1)\n", 597 | " distances[j, i] = distances[i, j]\n", 598 | " return distances\n", 599 | "\n", 600 | "\n", 601 | "%timeit get_distances2(coordinates)" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": {}, 607 | "source": [ 608 | "But **vectorisation** is much faster and easier to write:" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": null, 614 | "metadata": {}, 615 | "outputs": [], 616 | "source": [ 617 | "def get_distances3(coordinates):\n", 618 | " return np.linalg.norm(\n", 619 | " coordinates[:, None, :] - coordinates[None, :, :],\n", 620 | " axis=-1)\n", 621 | "\n", 622 | "\n", 623 | "%timeit get_distances3(coordinates)" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | "In the above example, we traded loops against higher memory requirement. To see how that works, let's look at what a `None` does for array indexing:" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": null, 636 | "metadata": {}, 637 | "outputs": [], 638 | "source": [ 639 | "a = np.arange(5)\n", 640 | "print(a)" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [ 649 | "print(a[:, None])" 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": null, 655 | "metadata": {}, 656 | "outputs": [], 657 | "source": [ 658 | "print(a[None, :])" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": null, 664 | "metadata": {}, 665 | "outputs": [], 666 | "source": [ 667 | "print(a[None, :, None])" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": null, 673 | "metadata": {}, 674 | "outputs": [], 675 | "source": [ 676 | "a = np.arange(16).reshape(4, -1)\n", 677 | "b = a [1:-1, 1:-1]\n", 678 | "print(a)\n", 679 | "print(b)" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": null, 685 | "metadata": {}, 686 | "outputs": [], 687 | "source": [ 688 | "b *= -1\n", 689 | "print(a)" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "## Plotting\n", 697 | "Let's try to visualise a function:" 698 | ] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": null, 703 | "metadata": {}, 704 | "outputs": [], 705 | "source": [ 706 | "x = np.linspace(-np.pi, np.pi, 100)\n", 707 | "s = np.sin(x)\n", 708 | "\n", 709 | "plt.plot(x, s)\n", 710 | "plt.xlabel('$x$ / rad', fontsize=15)\n", 711 | "plt.ylabel('$\\sin(x)$', fontsize=15)" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": null, 717 | "metadata": {}, 718 | "outputs": [], 719 | "source": [ 720 | "c = np.cos(x)\n", 721 | "\n", 722 | "plt.plot(x, s, label='sin')\n", 723 | "plt.plot(x, c, label='cos')\n", 724 | "plt.xlabel('$x$ / rad', fontsize=15)\n", 725 | "plt.legend(fontsize=15)" 726 | ] 727 | }, 728 | { 729 | "cell_type": "markdown", 730 | "metadata": {}, 731 | "source": [ 732 | "Let's revisit the $\\pi$ sampling exercise." 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": null, 738 | "metadata": {}, 739 | "outputs": [], 740 | "source": [ 741 | "def sample_pi2(n):\n", 742 | " n_hits = np.sum(np.linalg.norm(np.random.rand(n, 2), axis=1) < 1.0)\n", 743 | " return 4.0 * n_hits / n\n", 744 | "\n", 745 | "\n", 746 | "%timeit sample_pi2(1000)\n", 747 | "%timeit sample_pi2(10000)\n", 748 | "%timeit sample_pi2(100000)" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": null, 754 | "metadata": {}, 755 | "outputs": [], 756 | "source": [ 757 | "n_values = [\n", 758 | " 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]\n", 759 | "\n", 760 | "std = [np.std([sample_pi2(n) for _ in range(10000)])\n", 761 | " for n in n_values]" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": null, 767 | "metadata": {}, 768 | "outputs": [], 769 | "source": [ 770 | "f = np.sqrt(n_values[0]) * std[0]\n", 771 | "inv_sqrt = f / np.sqrt(n_values)\n", 772 | "\n", 773 | "fig, ax = plt.subplots()\n", 774 | "ax.plot(n_values, std, linewidth=2, label='data')\n", 775 | "ax.plot(n_values, inv_sqrt, 'o', label='model')\n", 776 | "ax.fill_between(n_values, 0, std, alpha=0.3)\n", 777 | "ax.semilogx()\n", 778 | "ax.legend()\n", 779 | "ax.set_xlabel(r'sample size for $\\pi$ estimation')\n", 780 | "ax.set_ylabel(r'standard deviation')\n", 781 | "fig.tight_layout()" 782 | ] 783 | }, 784 | { 785 | "cell_type": "markdown", 786 | "metadata": {}, 787 | "source": [ 788 | "## Exercise: scalar product\n", 789 | "\n", 790 | "Implement a function\n", 791 | "\n", 792 | "```Python\n", 793 | "def scalar_product(a, b):\n", 794 | " pass\n", 795 | "```\n", 796 | "\n", 797 | "which implements the scalar product\n", 798 | "\n", 799 | "$$\\left\\langle \\mathbf{a},\\mathbf{b} \\right\\rangle = \\sum\\limits_{n=0}^{N-1} a_n b_n$$\n", 800 | "\n", 801 | "where $N$ is the number of elements in each $\\mathbf{a}$ and $\\mathbf{b}$. Both variables `a` and `b` can be `list`s or `tuple`s, and their elements should be numerical (`float` or `int`).\n", 802 | "\n", 803 | "**Bonus**: the function should not return a numerical result if both variables have different lengths or contain non-numerical elements." 804 | ] 805 | }, 806 | { 807 | "cell_type": "code", 808 | "execution_count": null, 809 | "metadata": {}, 810 | "outputs": [], 811 | "source": [ 812 | "def scalar_product(a, b):\n", 813 | " pass" 814 | ] 815 | }, 816 | { 817 | "cell_type": "code", 818 | "execution_count": null, 819 | "metadata": {}, 820 | "outputs": [], 821 | "source": [ 822 | "assert scalar_product([0] * 100, [1] * 100) == 0\n", 823 | "assert scalar_product([1] * 100, [1, -1] * 50) == 0\n", 824 | "assert scalar_product([1] * 100, range(100)) == 99 * 50" 825 | ] 826 | }, 827 | { 828 | "cell_type": "markdown", 829 | "metadata": {}, 830 | "source": [ 831 | "## Exercise: arithmetic mean\n", 832 | "\n", 833 | "Implement a function\n", 834 | "```Python\n", 835 | "def mean(a):\n", 836 | " pass\n", 837 | "```\n", 838 | "which computes the arithmetic mean of a sequence:\n", 839 | "\n", 840 | "$$\\bar{a} = \\frac{\\sum_{n=0}^{N-1} a_n}{N}$$\n", 841 | "\n", 842 | "where $N$ is the number of elements $a_0,\\dots,a_{N-1}$. The parameter `a` may be any type of `iterable` with only numerical elements.\n", 843 | "\n", 844 | "**Bonus**: for a sequence of length 0, e.g., an empty list, the function should return 0." 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": null, 850 | "metadata": {}, 851 | "outputs": [], 852 | "source": [ 853 | "def mean(a):\n", 854 | " pass" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": null, 860 | "metadata": {}, 861 | "outputs": [], 862 | "source": [ 863 | "assert mean(range(100)) == 99 * 0.5\n", 864 | "assert mean([]) == 0\n", 865 | "assert mean([1] * 1000) == 1" 866 | ] 867 | }, 868 | { 869 | "cell_type": "markdown", 870 | "metadata": {}, 871 | "source": [ 872 | "## Exercise: linear regression\n", 873 | "\n", 874 | "Implement a function\n", 875 | "```Python\n", 876 | "def linear_regression(x, y):\n", 877 | " slope = None\n", 878 | " const = None\n", 879 | " return slope, const\n", 880 | "```\n", 881 | "which performs a simple linear regression\n", 882 | "\n", 883 | "$$\\begin{eqnarray*}\n", 884 | "\\textrm{slope} & = & \\frac{\\sum_{n=0}^{N-1} \\left( x_n - \\bar{x} \\middle) \\middle( y_n - \\bar{y} \\right)}{\\sum_{n=0}^{N-1} \\left( x_n - \\bar{x} \\right)^2} \\\\[0.5em]\n", 885 | "\\textrm{const} & = & \\bar{y} - \\textrm{slope } \\bar{x}\n", 886 | "\\end{eqnarray*}$$\n", 887 | "\n", 888 | "for value pairs $(x_0, y_0),\\dots,(x_{N-1},y_{N-1})$. The parameters `x` and `y` may be any type of `iterable` with only numerical elements; both must have the same length." 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": null, 894 | "metadata": {}, 895 | "outputs": [], 896 | "source": [ 897 | "def linear_regression(x, y):\n", 898 | " slope = None\n", 899 | " const = None\n", 900 | " return slope, const" 901 | ] 902 | }, 903 | { 904 | "cell_type": "code", 905 | "execution_count": null, 906 | "metadata": {}, 907 | "outputs": [], 908 | "source": [ 909 | "x = [10, 14, 16, 15, 16, 20]\n", 910 | "y = [ 1, 3, 5, 6, 5, 11]\n", 911 | "slope, const = linear_regression(x, y)\n", 912 | "assert 0.97 < slope < 0.99\n", 913 | "assert -9.72 < const < -9.70" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": null, 919 | "metadata": {}, 920 | "outputs": [], 921 | "source": [] 922 | } 923 | ], 924 | "metadata": { 925 | "kernelspec": { 926 | "display_name": "Python 3", 927 | "language": "python", 928 | "name": "python3" 929 | }, 930 | "language_info": { 931 | "codemirror_mode": { 932 | "name": "ipython", 933 | "version": 3 934 | }, 935 | "file_extension": ".py", 936 | "mimetype": "text/x-python", 937 | "name": "python", 938 | "nbconvert_exporter": "python", 939 | "pygments_lexer": "ipython3", 940 | "version": "3.7.0" 941 | } 942 | }, 943 | "nbformat": 4, 944 | "nbformat_minor": 2 945 | } 946 | -------------------------------------------------------------------------------- /notebooks/python-intro-1-variables-and-data-structures.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to Python I: variables and data structures\n", 8 | "\n", 9 | "## Content\n", 10 | "- How do I use a jupyter notebook?\n", 11 | "- What are variables and what can I do with them?\n", 12 | "- Which data structures are available?\n", 13 | "\n", 14 | "## Jupyter notebooks...\n", 15 | "... are a single environment in which you can run code interactively, visualize results, and even add formatted documentation. This text for example lies in a **Markdown**-type cell. To run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter.\n", 16 | "\n", 17 | "## Variables\n", 18 | "Let's have a look at a code cell which will show us how to handle variables:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "a = 1\n", 28 | "b = 1.5" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Click with you mouse pointer on the above cell and run it with +. You now a assigned the value $1$ to the variable `a` and $1.5$ to the variable `b`. By running the next cell, we print out the contents of both variables along with their type:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "print(a, type(a))\n", 45 | "print(b, type(b))" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "`a=1` represents an **integer** while `b=1.5` is a **floating point number**.\n", 53 | "\n", 54 | "Next, we try to add, subtract, multiply and divide `a` and `b` and print out the result and its type:" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "c = a + b\n", 64 | "print(c, type(c))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "c = a - b\n", 74 | "print(c, type(c))" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "c = a * b\n", 84 | "print(c, type(c))" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "c = a / b\n", 94 | "print(c, type(c))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Python can handle very small floats..." 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "c = 1e-300\n", 111 | "print(c, type(c))" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "...as well as very big numbers:" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "c = int(1e20)\n", 128 | "print(c, type(c))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "Note that, in the last cell, we have used a type conversion: `1e20` actually is a `float` which we cast as an `int` using the same-named function. Let's try thsi again to convert between floats and integers:" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "c = float(1)\n", 145 | "print(c, type(c))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "c = int(1.9)\n", 155 | "print(c, type(c))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "We observe that a `float` can easily be made into an `int` but in the reverse process, trailing digits are cut off without propper rounding.\n", 163 | "\n", 164 | "Here is another example how Python handles rounding: we can choose between two division operators with different behavior." 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "c = 9 / 5\n", 174 | "print(c, type(c))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "c = 9 // 5\n", 184 | "print(c, type(c))" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "The first version performs a usual floating point division, even for integer arguments. The second version performs an integer division and, like before, trailing digits are cut.\n", 192 | "\n", 193 | "The last division-related operation is the modulo division:" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "c = 3 % 2\n", 203 | "print(c, type(c))" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "For exponentiation, Python provides the `**` operator:" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "c = 2**3\n", 220 | "print(c, type(c))" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "If we want to \"update\" to content of a variable, e.g. add a constant, we could write\n", 228 | "\n", 229 | "```Python\n", 230 | "c = c + 3\n", 231 | "```\n", 232 | "\n", 233 | "For such cases, however, Python provides a more compact syntax:" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "c += 3\n", 243 | "print(c)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "The versions `-=`, `*=`, `/=`, `//=`, `%=`, and `**=` are also available.\n", 251 | "\n", 252 | "Now we shall see how Python stores variables. We create a variable `a` and assign its value to another variable `b`:" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "a = 1\n", 262 | "b = a\n", 263 | "print(a, id(a))\n", 264 | "print(b, id(b))" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "When we now `print` the values and `id`s of both variables, we see that `a` and `b` share the same address: both are referencing the same **object**." 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "a += 1\n", 281 | "print(a, id(a))\n", 282 | "print(b, id(b))" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "If, however, we modify `a`, we see that `a` changes its avlues as well as its address while `b` remains unchanged. This is because the built-in data types `float` and `int` are **immutable**: you cannot change a `float`, you can only create a new one.\n", 290 | "\n", 291 | "Here is a nice property of Python that allows easy swapping of variables:" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "print(a, b)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "If we want to swap `a` and `b`, we do not need a third (swapping) variable as we can use two or even more variables on the left of the assignment operator `=`:" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "a, b = b, a\n", 317 | "print(a, b)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "Finally: text. A variable containing text has the type `str` (**string**). We use either single `'` or double `\"` quotes." 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "a = 'some text...'\n", 334 | "print(a, type(a))" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "When we add two strings, they are simply concatenated:" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "b = a + \" and some more\"\n", 351 | "print(b)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "### Playground: variables\n", 359 | "\n", 360 | "Time to get creative! Create some variables, add or subtract them, cast them into other types, and get a feeling for their behavior..." 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "## Data structures\n", 389 | "\n", 390 | "Apart from the basic data types `int`, `float`, and `str`, Python provides more complex data types to store more than one value. There are several types of such structures which may seem very similar but differ significantly in their behavior:" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "a = ['one', 'two', 'three', 'four']\n", 400 | "b = ('one', 'two', 'three', 'four')\n", 401 | "c = {'one', 'two', 'three', 'four'}\n", 402 | "\n", 403 | "print(a, id(a), type(a))\n", 404 | "print(b, id(b), type(b))\n", 405 | "print(c, id(c), type(c))" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "Now, we have created a `list`, a `tuple`, and a `set`; each containing four strings.\n", 413 | "\n", 414 | "We create a new variable `d` from the `list` in `a` and modify the first element of `d`:" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "d = a\n", 424 | "print(d[0])\n", 425 | "d[0] = 'ONE'\n", 426 | "print(a, id(a), type(a))" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "The `print` statement tells us that the change of `d`did change the content of `a`, but not its address. This means, a `list` is **mutable** and `a` and `d` are both pointing to the same, changeable object." 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "d += ['five']\n", 443 | "print(a, id(a), type(a))" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "We can also add another `list` via `+` ..." 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "d.append('six')\n", 460 | "print(a, id(a), type(a))" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "... or another element via the `append()` method." 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "#### Exercise\n", 475 | "\n", 476 | "What is the difference between adding two lists and using the `append` method?" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [] 485 | }, 486 | { 487 | "cell_type": "markdown", 488 | "metadata": {}, 489 | "source": [ 490 | "#### Exercise\n", 491 | "\n", 492 | "Can you access the first element of a `set` like we did for a `list`?" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": {}, 505 | "source": [ 506 | "#### Exercise\n", 507 | "\n", 508 | "Can you modify the first element of a tuple like we did for a `list`?" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [] 517 | }, 518 | { 519 | "cell_type": "markdown", 520 | "metadata": {}, 521 | "source": [ 522 | "A set can be modified by adding new elements with the `add()` method; the `+` operator does not work:" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": null, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "c.add('five')\n", 532 | "print(c, id(c), type(c))" 533 | ] 534 | }, 535 | { 536 | "cell_type": "markdown", 537 | "metadata": {}, 538 | "source": [ 539 | "We also observe that the address does not change: `set`s are, like `list`s, mutable. Why should we use a `set` instead of a list if we cannot access elements by index? Let's see how `list`s and `set`s behave for non-unique elements:" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [ 548 | "d = ['one', 'one', 'one', 'two']\n", 549 | "print(d)" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": null, 555 | "metadata": {}, 556 | "outputs": [], 557 | "source": [ 558 | "d = {'one', 'one', 'one', 'two'}\n", 559 | "print(d)" 560 | ] 561 | }, 562 | { 563 | "cell_type": "markdown", 564 | "metadata": {}, 565 | "source": [ 566 | "While a `list` (like a `tuple`) exactly preserves all elements in order, a `set` contains only one for each different element. Thus, a `set` does not care how often an element is given but only if it is given at all.\n", 567 | "\n", 568 | "Finally: a `tuple` is like a `list`, but **immutable**.\n", 569 | "\n", 570 | "What is the mater with **mutable** and **immutable** objects?\n", 571 | "\n", 572 | "- A **mutable** object is cheap to change but lookups of individual elements are expensive.\n", 573 | "- An **immutable** object cannot be changed (only remade which is expensive) but lookups of elements is cheap.\n", 574 | "\n", 575 | "`list`s, `tuple`s, and `set`s can be converted into each other:" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "metadata": {}, 582 | "outputs": [], 583 | "source": [ 584 | "a_tuple = ('one', 'two', 'three')\n", 585 | "print(a_tuple, type(a_tuple))\n", 586 | "\n", 587 | "a_list = list(a_tuple)\n", 588 | "print(a_list, type(a_list))" 589 | ] 590 | }, 591 | { 592 | "cell_type": "code", 593 | "execution_count": null, 594 | "metadata": {}, 595 | "outputs": [], 596 | "source": [ 597 | "a_tuple = ('one', 'two', 'three')\n", 598 | "print(a_tuple, type(a_tuple))\n", 599 | "\n", 600 | "a_set = set(a_tuple)\n", 601 | "print(a_set, type(a_set))" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": null, 607 | "metadata": {}, 608 | "outputs": [], 609 | "source": [ 610 | "a_list = ['one', 'two', 'three']\n", 611 | "print(a_list, type(a_list))\n", 612 | "\n", 613 | "a_set = set(a_list)\n", 614 | "print(a_set, type(a_set))" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": null, 620 | "metadata": {}, 621 | "outputs": [], 622 | "source": [ 623 | "a_list = ['one', 'two', 'three']\n", 624 | "print(a_list, type(a_list))\n", 625 | "\n", 626 | "a_tuple = tuple(a_list)\n", 627 | "print(a_tuple, type(a_tuple))" 628 | ] 629 | }, 630 | { 631 | "cell_type": "markdown", 632 | "metadata": {}, 633 | "source": [ 634 | "#### Exercise\n", 635 | "\n", 636 | "Take the given list and remove all multiple occurences of elements, i.e., no element may occur more than once. It is not important to preserve the order of elements." 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": null, 642 | "metadata": {}, 643 | "outputs": [], 644 | "source": [ 645 | "a_list = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]\n", 646 | "\n", 647 | "\n" 648 | ] 649 | }, 650 | { 651 | "cell_type": "markdown", 652 | "metadata": {}, 653 | "source": [ 654 | "Let's have a closer look at indexing for `list`s and `tuple`s:" 655 | ] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": null, 660 | "metadata": {}, 661 | "outputs": [], 662 | "source": [ 663 | "a = list(range(10))\n", 664 | "b = tuple(range(10))\n", 665 | "\n", 666 | "print(a, type(a))\n", 667 | "print(b, type(b))" 668 | ] 669 | }, 670 | { 671 | "cell_type": "markdown", 672 | "metadata": {}, 673 | "source": [ 674 | "In both cases, we access the first element by appending `[0]` to the variable name..." 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": null, 680 | "metadata": {}, 681 | "outputs": [], 682 | "source": [ 683 | "print(a[0], b[0])" 684 | ] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": {}, 689 | "source": [ 690 | "... and the second element by appending `[1]`:" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": null, 696 | "metadata": {}, 697 | "outputs": [], 698 | "source": [ 699 | "print(a[1], b[1])" 700 | ] 701 | }, 702 | { 703 | "cell_type": "markdown", 704 | "metadata": {}, 705 | "source": [ 706 | "Likewise, we access the last or second to last element using `[-1]` or `[-2]`:" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": null, 712 | "metadata": {}, 713 | "outputs": [], 714 | "source": [ 715 | "print(a[-1], b[-1])\n", 716 | "print(a[-2], b[-2])" 717 | ] 718 | }, 719 | { 720 | "cell_type": "markdown", 721 | "metadata": {}, 722 | "source": [ 723 | "Using `[:5]` we get all elements up to the index $5$ (excluded)..." 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": null, 729 | "metadata": {}, 730 | "outputs": [], 731 | "source": [ 732 | "print(a[:5])" 733 | ] 734 | }, 735 | { 736 | "cell_type": "markdown", 737 | "metadata": {}, 738 | "source": [ 739 | "... or starting from index $5$ until the end:" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": null, 745 | "metadata": {}, 746 | "outputs": [], 747 | "source": [ 748 | "print(a[5:])" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "metadata": {}, 754 | "source": [ 755 | "We can give both a start and end index to access any range..." 756 | ] 757 | }, 758 | { 759 | "cell_type": "code", 760 | "execution_count": null, 761 | "metadata": {}, 762 | "outputs": [], 763 | "source": [ 764 | "print(a[2:7])" 765 | ] 766 | }, 767 | { 768 | "cell_type": "markdown", 769 | "metadata": {}, 770 | "source": [ 771 | "... and if we add another `:` and a number $>1$, this acts as a step size:" 772 | ] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": null, 777 | "metadata": {}, 778 | "outputs": [], 779 | "source": [ 780 | "print(a[2:7:2])" 781 | ] 782 | }, 783 | { 784 | "cell_type": "markdown", 785 | "metadata": {}, 786 | "source": [ 787 | "A negative step size (and inverted start end indices) allows us to select a backwards defined range:" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": null, 793 | "metadata": {}, 794 | "outputs": [], 795 | "source": [ 796 | "print(a[7:2:-2])" 797 | ] 798 | }, 799 | { 800 | "cell_type": "markdown", 801 | "metadata": {}, 802 | "source": [ 803 | "This always follows the same pattern: `start:end:step`.\n", 804 | "\n", 805 | "With this `slicing`, we can easily reverse an entire `list`:" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": null, 811 | "metadata": {}, 812 | "outputs": [], 813 | "source": [ 814 | "print(a[::-1])" 815 | ] 816 | }, 817 | { 818 | "cell_type": "markdown", 819 | "metadata": {}, 820 | "source": [ 821 | "#### Exercise\n", 822 | "\n", 823 | "Coinvince yourself that the above indexing patterns also work for a `tuple`." 824 | ] 825 | }, 826 | { 827 | "cell_type": "code", 828 | "execution_count": null, 829 | "metadata": {}, 830 | "outputs": [], 831 | "source": [] 832 | }, 833 | { 834 | "cell_type": "markdown", 835 | "metadata": {}, 836 | "source": [ 837 | "A remark on strings: `str`-type objects behave like a tuple..." 838 | ] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "execution_count": null, 843 | "metadata": {}, 844 | "outputs": [], 845 | "source": [ 846 | "c = 'this is a sentence'\n", 847 | "print(c, type(c))\n", 848 | "print(c[::-1])" 849 | ] 850 | }, 851 | { 852 | "cell_type": "markdown", 853 | "metadata": {}, 854 | "source": [ 855 | "... the are immutable but elements can be (read-)accessed by index and `sclicing`.\n", 856 | "\n", 857 | "Let us now revisit `set`s:" 858 | ] 859 | }, 860 | { 861 | "cell_type": "code", 862 | "execution_count": null, 863 | "metadata": {}, 864 | "outputs": [], 865 | "source": [ 866 | "a = set('An informative Python tutorial')\n", 867 | "b = set('Nice spring weather')\n", 868 | "\n", 869 | "print(a)\n", 870 | "print(b)" 871 | ] 872 | }, 873 | { 874 | "cell_type": "markdown", 875 | "metadata": {}, 876 | "source": [ 877 | "Each `set` stores all letters used in the above sentences (but not the number of occurences). To make things easier to read, we pass each `set` through the `sorted()` function which sorts the sequence of letters:" 878 | ] 879 | }, 880 | { 881 | "cell_type": "code", 882 | "execution_count": null, 883 | "metadata": {}, 884 | "outputs": [], 885 | "source": [ 886 | "print(sorted(a))\n", 887 | "print(sorted(b))" 888 | ] 889 | }, 890 | { 891 | "cell_type": "markdown", 892 | "metadata": {}, 893 | "source": [ 894 | "A very nice feature of `set`s that `list`s and `tuple`s do not have is that we can use them with the bitwise operators `&`(logical and), `|` (logical or), and `^` (logical xor).\n", 895 | "Thus, we can easily get the intersection of two sets..." 896 | ] 897 | }, 898 | { 899 | "cell_type": "code", 900 | "execution_count": null, 901 | "metadata": {}, 902 | "outputs": [], 903 | "source": [ 904 | "print(sorted(a & b))" 905 | ] 906 | }, 907 | { 908 | "cell_type": "markdown", 909 | "metadata": {}, 910 | "source": [ 911 | "... their union..." 912 | ] 913 | }, 914 | { 915 | "cell_type": "code", 916 | "execution_count": null, 917 | "metadata": {}, 918 | "outputs": [], 919 | "source": [ 920 | "print(sorted(a | b))" 921 | ] 922 | }, 923 | { 924 | "cell_type": "markdown", 925 | "metadata": {}, 926 | "source": [ 927 | "... or all letters which appear in only one of the two sentences:" 928 | ] 929 | }, 930 | { 931 | "cell_type": "code", 932 | "execution_count": null, 933 | "metadata": {}, 934 | "outputs": [], 935 | "source": [ 936 | "print(sorted(a ^ b))" 937 | ] 938 | }, 939 | { 940 | "cell_type": "markdown", 941 | "metadata": {}, 942 | "source": [ 943 | "There is a fourth type of data structure, a `dict` (dictionary), which has some resemblance to `set`s. A `dict` contains pairs of `keys` and `values`, and for the `keys`, a `dict` behave like a `set`; i.e. no `key` may appear more than once. The `values` can then be accessed by their `keys` instead of indices. You can create a `dict` either with the same-named function..." 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": null, 949 | "metadata": {}, 950 | "outputs": [], 951 | "source": [ 952 | "a = dict(one=1, two=2, three=3)\n", 953 | "print(a, type(a))" 954 | ] 955 | }, 956 | { 957 | "cell_type": "markdown", 958 | "metadata": {}, 959 | "source": [ 960 | "... or with a `set`-like syntax:" 961 | ] 962 | }, 963 | { 964 | "cell_type": "code", 965 | "execution_count": null, 966 | "metadata": {}, 967 | "outputs": [], 968 | "source": [ 969 | "b = {'four': 4, 'five': 5, 'six': 6}\n", 970 | "print(b, type(b))" 971 | ] 972 | }, 973 | { 974 | "cell_type": "markdown", 975 | "metadata": {}, 976 | "source": [ 977 | "We convince ourselfs that `dict`s are mutable..." 978 | ] 979 | }, 980 | { 981 | "cell_type": "code", 982 | "execution_count": null, 983 | "metadata": {}, 984 | "outputs": [], 985 | "source": [ 986 | "c = a\n", 987 | "c.update(zero=0)\n", 988 | "print(a, type(a))" 989 | ] 990 | }, 991 | { 992 | "cell_type": "markdown", 993 | "metadata": {}, 994 | "source": [ 995 | "... and see how we can access an element by key:" 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": null, 1001 | "metadata": {}, 1002 | "outputs": [], 1003 | "source": [ 1004 | "print(a['two'])" 1005 | ] 1006 | }, 1007 | { 1008 | "cell_type": "markdown", 1009 | "metadata": {}, 1010 | "source": [ 1011 | "To get all `keys` of a `dict` we can use the `keys()` method:" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": null, 1017 | "metadata": {}, 1018 | "outputs": [], 1019 | "source": [ 1020 | "print(a.keys())" 1021 | ] 1022 | }, 1023 | { 1024 | "cell_type": "markdown", 1025 | "metadata": {}, 1026 | "source": [ 1027 | "To repeat what we have done three cells before: we can add more `key`-`value` pairs with the `update()` method which accepts the same syntax as the `dict` function as well as entire `dict` objects:" 1028 | ] 1029 | }, 1030 | { 1031 | "cell_type": "code", 1032 | "execution_count": null, 1033 | "metadata": {}, 1034 | "outputs": [], 1035 | "source": [ 1036 | "a.update(b)\n", 1037 | "print(a)" 1038 | ] 1039 | }, 1040 | { 1041 | "cell_type": "markdown", 1042 | "metadata": {}, 1043 | "source": [ 1044 | "A very useful feature of Python is that `list`s, `tuple`s, and `dict`s can be nested:" 1045 | ] 1046 | }, 1047 | { 1048 | "cell_type": "code", 1049 | "execution_count": null, 1050 | "metadata": {}, 1051 | "outputs": [], 1052 | "source": [ 1053 | "a = [0, 1, 2, ('one', 'two'), {5, 5, 5}]\n", 1054 | "print(a)" 1055 | ] 1056 | }, 1057 | { 1058 | "cell_type": "code", 1059 | "execution_count": null, 1060 | "metadata": {}, 1061 | "outputs": [], 1062 | "source": [ 1063 | "b = dict(some_key=list(range(10)))\n", 1064 | "print(b)" 1065 | ] 1066 | }, 1067 | { 1068 | "cell_type": "markdown", 1069 | "metadata": {}, 1070 | "source": [ 1071 | "And, while a tuple itself is immutable, any mutable object within can be changed:" 1072 | ] 1073 | }, 1074 | { 1075 | "cell_type": "code", 1076 | "execution_count": null, 1077 | "metadata": {}, 1078 | "outputs": [], 1079 | "source": [ 1080 | "c = ('one', list(range(10)))\n", 1081 | "print(c)\n", 1082 | "c[1].append(10)\n", 1083 | "print(c)" 1084 | ] 1085 | }, 1086 | { 1087 | "cell_type": "markdown", 1088 | "metadata": {}, 1089 | "source": [ 1090 | "#### Exercise\n", 1091 | "\n", 1092 | "Build your own nested structure of all the data types shown in this notebook. Can you create a set which contains another set, list, tuple or dictionary?" 1093 | ] 1094 | }, 1095 | { 1096 | "cell_type": "code", 1097 | "execution_count": null, 1098 | "metadata": {}, 1099 | "outputs": [], 1100 | "source": [] 1101 | } 1102 | ], 1103 | "metadata": { 1104 | "kernelspec": { 1105 | "display_name": "Python 3", 1106 | "language": "python", 1107 | "name": "python3" 1108 | }, 1109 | "language_info": { 1110 | "codemirror_mode": { 1111 | "name": "ipython", 1112 | "version": 3 1113 | }, 1114 | "file_extension": ".py", 1115 | "mimetype": "text/x-python", 1116 | "name": "python", 1117 | "nbconvert_exporter": "python", 1118 | "pygments_lexer": "ipython3", 1119 | "version": "3.7.0" 1120 | }, 1121 | "toc": { 1122 | "base_numbering": 1, 1123 | "nav_menu": {}, 1124 | "number_sections": false, 1125 | "sideBar": true, 1126 | "skip_h1_title": false, 1127 | "title_cell": "Table of Contents", 1128 | "title_sidebar": "Contents", 1129 | "toc_cell": false, 1130 | "toc_position": {}, 1131 | "toc_section_display": true, 1132 | "toc_window_display": false 1133 | } 1134 | }, 1135 | "nbformat": 4, 1136 | "nbformat_minor": 2 1137 | } 1138 | --------------------------------------------------------------------------------