├── Chapter 01 ├── array-arithmetic-and-functions.py ├── basic-mathematical-functions.py ├── complex-type.py ├── decimal-type.py ├── eigenvalus-and-eigenvectors.py ├── fraction-type.py ├── matrices-and-n-dimensional-arrays.py ├── matrix-properties-of-numpy-arrays.py ├── numpy-arrays.py ├── solving-equations.py ├── sparse-matrices.py ├── systems-of-equations.py └── useful-array-creation-routines.py ├── Chapter 02 ├── adding-labels-and-legends-to-plots.py ├── adding-subplots.py ├── basic-plotting-with-matplotlib.py ├── changing-the-plotting-style.py ├── customising-3d-plots.py ├── plotting-with-error-bars.py ├── saving-matplotlib-figures.py ├── surface-and-contour-plots.py ├── trisurf.py └── visualizing-vector-fields-with-quivers.py ├── Chapter 03 ├── automatic-differentiation-and-calculus-using-jax.py ├── discrete-fourier-transforms-for-signals-processing.py ├── numerical-integration.py ├── partial-differential-equations.py ├── polynomials-and-calculus.py ├── solving-differential-equations-with-jax.py ├── solving-equations.py ├── solving-simple-differential-equations-numerically.py ├── solving-systems-of-differential-equations.py └── symbolic-calculus-using-sympy.py ├── Chapter 04 ├── analysing-conversion-rates-with-bayesian-techniques.py ├── changing-the-random-number-generator.py ├── estimating-parameters-with-monte-carlo-simulations.py ├── generating-normally-distributed-random-numbers.py ├── generating-random-data.py ├── selecting-items-at-random.py └── working-with-random-processes.py ├── Chapter 05 ├── coloring-a-network.py ├── creating-directed-and-weighted-networks.py ├── creating-networks-in-python.py ├── finding-minimum-spanning-trees-and-dominating-sets.py ├── finding-shortest-paths.py ├── generating-the-adjacency-matrix-for-a-network.py ├── getting-the-basic-characteristics-of-networks.py ├── quantifying-clustering-in-a-network.py └── visualising-networks.py ├── Chapter 06 ├── creating-interactive-plots-with-Bokeh.py ├── creating-series-and-dataframes.py ├── getting-descriptive-statistics-from-dataframes.py ├── loading-and-storing-data-from-a-dataframe.py ├── manipulating-data-frames.py ├── performing-operations-on-grouped-data-in-a-dataframe.py ├── plotting-data-from-a-DataFrame.py ├── testing-hypotheses-for-non-parametric-data.py ├── testing-hypotheses-using-ANOVA.py ├── testing-hypotheses-using-t-tests.py └── understanding-a-population-using-sampling.py ├── Chapter 07 ├── classifying-using-logarithmic-regression.py ├── forecasting-from-time-series-data-using-arima.py ├── forecasting-seasonal-data-with-arima.py ├── modelling-time-series-data-with-arma.py ├── tsdata.py ├── using-linear-regression.py ├── using-multilinear-regression.py ├── using-prophet-to-model-time-series.py └── using-signatures-to-summarize-time-series-data.py ├── Chapter 08 ├── computing-convex-hulls.py ├── constructing-bezier-curves.py ├── finding-edges-in-images.py ├── finding-interior-points.py ├── mandelbrot.png ├── swisscheese-grid-10411.csv ├── triangulating-polygonal-regions.py └── visualizing-two-dimensional-geometric-figures.py ├── Chapter 09 ├── analyzing-simple-two-player-games.py ├── computing-nash-equilibria.py ├── minimising-a-non-linear-system.py ├── minimising-simple-linear-systems.py ├── using-gradient-descent-methods.py └── using-least-squares-to-fit-a-curve-to-data.py ├── Chapter 10 ├── accelerating-code-with-cython │ ├── mandelbrot │ │ ├── __init__.py │ │ ├── python_mandel.py │ │ └── setup.py │ └── run.py ├── accouting-for-uncertainty-in-calculations.py ├── distributing-computations-with-dask.py ├── keeping-track-of-units-with-pint.py ├── loading-and-storing-data-from-netcdf.py ├── sample.csv ├── sample.ipynb ├── validating-data.py ├── working-with-geographical-data.py └── writing-reproducible-code-for-data-science.py ├── LICENSE └── README.md /Chapter 01/array-arithmetic-and-functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | arr_a = np.array([1, 2, 3, 4]) 5 | arr_b = np.array([1, 0, -3, 1]) 6 | arr_a + arr_b # array([2, 2, 0, 5]) 7 | arr_a - arr_b # array([0, 2, 6, 3]) 8 | arr_a * arr_b # array([ 1, 0, -9, 4]) 9 | arr_b / arr_a # array([ 1. , 0. , -1. , 0.25]) 10 | arr_b**arr_a # array([1, 0, -27, 1]) 11 | 12 | 13 | arr = np.array([1, 2, 3, 4]) 14 | new = 2*arr 15 | print(new) 16 | # [2, 4, 6, 8] -------------------------------------------------------------------------------- /Chapter 01/basic-mathematical-functions.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | math.sqrt(4) # 2.0 4 | 5 | theta = math.pi / 4 6 | math.cos(theta) # 0.7071067811865476 7 | math.sin(theta) # 0.7071067811865475 8 | math.tan(theta) # 0.9999999999999999 9 | 10 | math.asin(-1) # -1.5707963267948966 11 | math.acos(-1) # 3.141592653589793 12 | math.atan(1) # 0.7853981633974483 13 | 14 | math.log(10) # 2.302585092994046 15 | math.log(10, 10) # 1.0 16 | 17 | math.gamma(5) # 24.0 18 | math.erf(2) # 0.9953222650189527 19 | 20 | 21 | math.comb(5, 2) # 10 22 | math.factorial(5) # 120 23 | 24 | 25 | math.gcd(2, 4) # 2 26 | math.gcd(2, 3) # 1 27 | 28 | 29 | nums = [0.1]*10 # list containing 0.1 ten times 30 | sum(nums) # 0.9999999999999999 31 | math.fsum(nums) # 1.0 -------------------------------------------------------------------------------- /Chapter 01/complex-type.py: -------------------------------------------------------------------------------- 1 | z = 1 + 1j 2 | z + 2 # 3 + 1j 3 | z.conjugate() # 1 - 1j -------------------------------------------------------------------------------- /Chapter 01/decimal-type.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | 3 | num1 = Decimal('1.1') 4 | num2 = Decimal('1.563') 5 | num1 + num2 # Decimal('2.663') 6 | 7 | from decimal import getcontext 8 | ctx = getcontext() 9 | num = Decimal('1.1') 10 | num**4 # Decimal('1.4641') 11 | ctx.prec = 4 # set new precision 12 | num**4 # Decimal('1.464') 13 | 14 | 15 | from decimal import localcontext 16 | num = Decimal("1.1") 17 | with localcontext() as ctx: 18 | ctx.prec = 2 19 | num**4 # Decimal('1.5') 20 | num**4 # Decimal('1.4641') 21 | 22 | -------------------------------------------------------------------------------- /Chapter 01/eigenvalus-and-eigenvectors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numpy import linalg 4 | 5 | A = np.array([[3, -1, 4], [-1, 0, -1], [4, -1, 2]]) 6 | 7 | v, B = linalg.eig(A) 8 | 9 | i = 0 # first eigenvalue/eigenvector pair 10 | lambda0 = v[i] 11 | print(lambda0) 12 | # 6.823156164525971 13 | x0 = B[:, i] # ith column of B 14 | print(x0) 15 | # array([ 0.73271846, -0.20260301, 0.649672352]) 16 | 17 | linalg.norm(x0) # 1.0 - eigenvalues are normalised. 18 | 19 | 20 | lhs = A @ x0 21 | rhs = lambda0*x0 22 | linalg.norm(lhs - rhs) # 2.8445583831733384e-15 - very small. 23 | 24 | -------------------------------------------------------------------------------- /Chapter 01/fraction-type.py: -------------------------------------------------------------------------------- 1 | from fractions import Fraction 2 | num1 = Fraction(1, 3) 3 | num2 = Fraction(1, 7) 4 | num1 * num2 # Fraction(1, 21) 5 | -------------------------------------------------------------------------------- /Chapter 01/matrices-and-n-dimensional-arrays.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | mat = np.array([[1, 2], [3, 4]]) 4 | vec = np.array([1, 2]) 5 | 6 | mat.shape # (2, 2) 7 | vec.shape # (2,) 8 | 9 | 10 | mat.reshape(4,) 11 | # array([1, 2, 3, 4]) 12 | 13 | 14 | mat1 = [[1, 2], [3, 4]] 15 | mat2 = [[5, 6], [7, 8]] 16 | mat3 = [[9, 10], [11, 12]] 17 | 18 | arr_3d = np.array([mat1, mat2, mat3]) 19 | arr_3d.shape # (3, 2, 2) 20 | 21 | mat[0, 0] # 1 - top left element 22 | mat[1, 1] # 4 - bottom right element 23 | 24 | mat[:, 0] # array([1, 3]) 25 | 26 | 27 | -------------------------------------------------------------------------------- /Chapter 01/matrix-properties-of-numpy-arrays.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | np.eye(3) 4 | # array([[1., 0., 0.], 5 | # [0., 1., 0.], 6 | # [0., 0., 1.]]) 7 | 8 | 9 | mat = np.array([[1, 2], [3, 4]]) 10 | mat.transpose() 11 | # array([[1, 3], 12 | # [2, 4]]) 13 | mat.T 14 | # array([[1, 3], 15 | # [2, 4]]) 16 | 17 | 18 | A = np.array([[1, 2], [3, 4]]) 19 | A.trace() # 5 20 | 21 | A = np.array([[1, 2], [3, 4]]) 22 | B = np.array([[-1, 1], [0, 1]]) 23 | A @ B 24 | # array([[-1, 3], 25 | # [-3, 7]]) 26 | A * B 27 | # array([[-1, 2], 28 | # [ 0, 4]]) 29 | 30 | 31 | A = np.array([[1, 2], [3, 4]]) 32 | I = np.eye(2) 33 | A @ I 34 | # array([[1, 2], 35 | # [3, 4]]) 36 | 37 | 38 | from numpy import linalg 39 | linalg.det(A) # -2.0000000000000004 40 | linalg.inv(A) 41 | # array([[-2. , 1. ], 42 | # [ 1.5, -0.5]]) 43 | 44 | 45 | Ainv = linalg.inv(A) 46 | Ainv @ A 47 | # Approximately 48 | # array([[1., 0.], 49 | # [0., 1.]]) 50 | 51 | A @ Ainv 52 | # Approximately 53 | # array([[1., 0.], 54 | # [0., 1.]]) 55 | 56 | -------------------------------------------------------------------------------- /Chapter 01/numpy-arrays.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ary = np.array([1, 2, 3, 4]) # array([1, 2, 3, 4]) 4 | 5 | ary[0] # 1 6 | ary[2] # 3 7 | ary[::2] # array([1, 3]) 8 | 9 | 10 | np.array([1, 2, 3, 4], dtype=np.float32) 11 | # array([1., 2., 3., 4.], dtype=float32) 12 | 13 | 14 | arr = np.array([1, 2, 3, 4]) 15 | print(arr.dtype) # dtype('int64') 16 | arr.dtype = np.float32 17 | print(arr) 18 | # [1.e-45 0.e+00 3.e-45 0.e+00 4.e-45 0.e+00 6.e-45 0.e+00] 19 | 20 | 21 | arr = arr.astype(np.float32) 22 | print(arr) 23 | # [1. 2. 3. 4.] -------------------------------------------------------------------------------- /Chapter 01/solving-equations.py: -------------------------------------------------------------------------------- 1 | from scipy import optimize 2 | from math import exp 3 | 4 | def f(x): 5 | return x*(x - 2)*exp(3 - x) 6 | 7 | def fp(x): 8 | return -(x**2 - 4*x + 2)*exp(3 - x) 9 | 10 | 11 | optimize.newton(f, 1., x1=1.5) # Using x1 = 1.5 and the secant method 12 | # 1.9999999999999862 13 | optimize.newton(f, 1., fprime=fp) # Using Newton-Raphson method 14 | # 2.0 15 | 16 | 17 | -------------------------------------------------------------------------------- /Chapter 01/sparse-matrices.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | 4 | T = sparse.diags([-1, 2, -1], (-1, 0, 1), shape=(5, 5), format="csr") 5 | T.toarray() 6 | # array([[ 2, -1, 0, 0, 0], 7 | # [-1, 2, -1, 0, 0], 8 | # [ 0, -1, 2, -1, 0], 9 | # [ 0, 0, -1, 2, -1], 10 | # [ 0, 0, 0, -1, 2]]) 11 | 12 | from scipy.sparse import linalg 13 | linalg.spsolve(T.tocsr(), np.array([1, 2, 3, 4, 5])) 14 | # array([ 5.83333333, 10.66666667, 13.5, 13.33333333, 9.16666667]) 15 | -------------------------------------------------------------------------------- /Chapter 01/systems-of-equations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import linalg 3 | 4 | A = np.array([[3, -2, 1], [1, 1, -2], [-3, -2, 1]]) 5 | b = np.array([7, -4, 1]) 6 | 7 | linalg.solve(A, b) # array([ 1., -1., 2.]) 8 | 9 | 10 | -------------------------------------------------------------------------------- /Chapter 01/useful-array-creation-routines.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | np.linspace(0, 1, 5) # array([0., 0.25, 0.5, 0.75, 1.0]) 5 | np.arange(0, 1, 0.3) # array([0.0, 0.3, 0.6, 0.9]) 6 | 7 | -------------------------------------------------------------------------------- /Chapter 02/adding-labels-and-legends-to-plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | y1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) 5 | y2 = np.array([1.2, 1.6, 3.1, 4.2, 4.8]) 6 | y3 = np.array([3.2, 1.1, 2.0, 3.9, 2.5]) 7 | 8 | fig, ax = plt.subplots() 9 | 10 | lines = ax.plot(y1, 'o', y2, 'x', y3, '*') 11 | 12 | ax.set_title("Plot of the data y1, y2, and y3") 13 | ax.set_xlabel("x axis label") 14 | ax.set_ylabel("y axis label") 15 | 16 | 17 | 18 | ax.legend(("data y1", "data y2", "data y3")) 19 | 20 | 21 | plt.show() 22 | 23 | 24 | -------------------------------------------------------------------------------- /Chapter 02/adding-subplots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from math import fabs 4 | 5 | def generate_newton_iters(x0, number): 6 | yield x0, fabs(x0 - 1.) 7 | for _ in range(number): 8 | x0 = x0 - (x0*x0 - 1.)/(2*x0) 9 | yield x0, fabs(x0 - 1.) 10 | 11 | 12 | data = np.array(list(generate_newton_iters(2.0, 5))) 13 | iterates, errors = data[:, 0], data[:, 1] 14 | 15 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True) # 1 row, 2 columns 16 | 17 | ax1.plot(iterates, "kx") 18 | ax1.set_title("Iterates") 19 | ax1.set_xlabel("$i$", usetex=True) 20 | ax1.set_ylabel("$x_i$", usetex=True) 21 | 22 | ax2.semilogy(errors, "kx") # plot y on a logarithmic scale 23 | ax2.set_title("Error") 24 | ax2.set_xlabel("$i$", usetex=True) 25 | ax2.set_ylabel("Error") 26 | 27 | plt.show() 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /Chapter 02/basic-plotting-with-matplotlib.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # set up 5 | def f(x): 6 | return x*(x-2)*np.exp(3 - x) 7 | 8 | def g(x): 9 | return x**2 10 | 11 | def h(x): 12 | return 1 - x 13 | 14 | 15 | x = np.linspace(-0.5, 3.0) # 50 values between -0.5 and 3.0 16 | 17 | 18 | y1 = f(x) # evaluate f on the x points 19 | y2 = g(x) # evaluate g on the x points 20 | y3 = h(x) # evaluate h on the x points 21 | 22 | 23 | fig, ax = plt.subplots() 24 | 25 | ax.plot(x, y1, "k") # black solid line style 26 | 27 | 28 | ax.plot(x, y2, "k--") # black dashed line style 29 | ax.plot(x, y3, "k.-") # black dot-dashed line style 30 | 31 | 32 | ax.set_title("Plot of the functions f, g, and h") 33 | ax.set_xlabel("x") 34 | ax.set_ylabel("y") 35 | 36 | 37 | ax.legend(["f", "g", "h"]) 38 | 39 | ax.text(0.4, 2.0, "Intersection") 40 | 41 | plt.show() -------------------------------------------------------------------------------- /Chapter 02/changing-the-plotting-style.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | y1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) 6 | y2 = np.array([1.2, 1.6, 3.1, 4.2, 4.8]) 7 | y3 = np.array([3.2, 1.1, 2.0, 3.9, 2.5]) 8 | 9 | fig, ax = plt.subplots() 10 | 11 | lines = ax.plot(y1, 'o', y2, 'x', y3, '*', color="k") 12 | 13 | ax.set_xlabel("x") 14 | ax.set_ylabel("y") 15 | ax.set_title("Plot of several data points with only markers") 16 | 17 | plt.show() 18 | -------------------------------------------------------------------------------- /Chapter 02/customising-3d-plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits import mplot3d 4 | 5 | t = np.linspace(-5, 5) 6 | x, y = np.meshgrid(t, t) 7 | z = np.exp(-((x-2.)**2 + (y-3.)**2)/4) - np.exp(-((x+3.)**2 + (y+2)**2)/3) 8 | 9 | 10 | fig = plt.figure() 11 | 12 | ax = fig.add_subplot(projection="3d", proj_type="ortho") 13 | 14 | ax.plot_surface(x, y, z, cmap="gray", vmin=-1.2, vmax=1.2) 15 | ax.set_title("Customized 3D surface plot") 16 | ax.set_xlabel("x") 17 | ax.set_ylabel("y") 18 | ax.set_zlabel("z") 19 | 20 | 21 | plt.show() 22 | 23 | -------------------------------------------------------------------------------- /Chapter 02/plotting-with-error-bars.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | measurement_id = np.arange(1, 11) 5 | measurements = np.array([2.3, 1.9, 4.4, 1.5, 3.0, 3.3, 2.9, 2.6, 4.1, 3.6]) 6 | err = np.array([0.1]*10) 7 | 8 | fig, ax = plt.subplots() 9 | 10 | ax.errorbar(measurement_id, measurements, yerr=err, fmt="kx", capsize=2.0) 11 | 12 | ax.set_title("Plot of measurements and their estimated error") 13 | ax.set_xlabel("Measurement ID") 14 | ax.set_ylabel("Measurement (cm)") 15 | 16 | 17 | ax.set_xticks(measurement_id) 18 | 19 | plt.show() 20 | -------------------------------------------------------------------------------- /Chapter 02/saving-matplotlib-figures.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | x = np.arange(1, 5, 0.1) 5 | y = x*x 6 | 7 | fig, ax = plt.subplots() 8 | ax.plot(x, y) 9 | ax.set_title("Graph of $y=x^2$") 10 | ax.set_xlabel("$x$") 11 | ax.set_ylabel("$y$") 12 | fig.savefig("savingfigs.png", dpi=300) 13 | plt.show() 14 | -------------------------------------------------------------------------------- /Chapter 02/surface-and-contour-plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | X = np.linspace(-5, 5) 5 | Y = np.linspace(-5, 5) 6 | 7 | x, y = np.meshgrid(X, Y) 8 | 9 | z = np.exp(-((x - 2.)**2 + (y - 3.)**2)/4) - np.exp(-((x + 3.)**2 + (y + 2.)**2)/3) 10 | 11 | from mpl_toolkits import mplot3d 12 | 13 | fig = plt.figure() 14 | ax = fig.add_subplot(projection="3d") 15 | 16 | ax.plot_surface(x, y, z, cmap="gray") 17 | 18 | ax.set_xlabel("x") 19 | ax.set_ylabel("y") 20 | ax.set_zlabel("z") 21 | ax.set_title("Graph of the function f(x, y)") 22 | 23 | plt.show() # paused here 24 | 25 | 26 | fig, ax = plt.subplots() 27 | ax.contour(x, y, z, cmap="gray") 28 | ax.set_title("Contours of f(x, y)") 29 | ax.set_xlabel("x") 30 | ax.set_ylabel("y") 31 | 32 | 33 | plt.show() 34 | -------------------------------------------------------------------------------- /Chapter 02/trisurf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits import mplot3d 4 | 5 | 6 | x = np.array([ 0.19, -0.82, 0.8 , 0.95, 0.46, 0.71, 7 | -0.86, -0.55, 0.75,-0.98, 0.55, -0.17, -0.89, 8 | -0.4 , 0.48, -0.09, 1., -0.03, -0.87, -0.43]) 9 | y = np.array([-0.25, -0.71, -0.88, 0.55, -0.88, 0.23, 10 | 0.18,-0.06, 0.95, 0.04, -0.59, -0.21, 0.14, 0.94, 11 | 0.51, 0.47, 0.79, 0.33, -0.85, 0.19]) 12 | z = np.array([-0.04, 0.44, -0.53, 0.4, -0.31, 0.13, 13 | -0.12, 0.03, 0.53, -0.03, -0.25, 0.03, -0.1 , 14 | -0.29, 0.19, -0.03, 0.58, -0.01, 0.55, -0.06]) 15 | 16 | 17 | fig = plt.figure(tight_layout=True) 18 | ax1 = fig.add_subplot(1, 2, 1, projection="3d") 19 | ax1.plot_trisurf(x, y, z, cmap="gray") 20 | 21 | ax1.set_xlabel("x") 22 | ax1.set_ylabel("y") 23 | ax1.set_zlabel("z") 24 | ax1.set_title("Approximate surface") 25 | 26 | ax2 = fig.add_subplot(1, 2, 2) 27 | ax2.tricontour(x, y, z, cmap="gray") 28 | ax2.set_xlabel("x") 29 | ax2.set_ylabel("y") 30 | ax2.set_title("Approximate contours") 31 | 32 | plt.show() 33 | -------------------------------------------------------------------------------- /Chapter 02/visualizing-vector-fields-with-quivers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def f(x, y): 6 | v = x**2 + y**2 7 | return np.exp(-2*v)*(x + y), np.exp(-2*v)*(x - y) 8 | 9 | 10 | t = np.linspace(-1., 1.) 11 | x, y = np.meshgrid(t, t) 12 | 13 | dx, dy = f(x, y) 14 | 15 | 16 | fig, ax = plt.subplots() 17 | ax.quiver(x, y, dx, dy) 18 | 19 | ax.set_xlabel("x") 20 | ax.set_ylabel("y") 21 | ax.set_title("Quiver plot of a vector field") 22 | 23 | plt.show() 24 | 25 | 26 | -------------------------------------------------------------------------------- /Chapter 03/automatic-differentiation-and-calculus-using-jax.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from mpl_toolkits import mplot3d 3 | 4 | import jax.numpy as jnp 5 | from jax import grad, jit, vmap 6 | 7 | @jit 8 | def f(x, y): 9 | return jnp.exp(-(x**2 + y**2)) 10 | 11 | t = jnp.linspace(-1.0, 1.0) 12 | x, y = jnp.meshgrid(t, t) 13 | fig = plt.figure() 14 | ax = fig.add_subplot(projection="3d") 15 | ax.plot_surface(x, y, f(x, y), cmap="gray") 16 | ax.set_title("Plot of the function f(x, y)") 17 | ax.set_xlabel("x") 18 | ax.set_ylabel("y") 19 | ax.set_zlabel("z") 20 | 21 | 22 | fx = jit(grad(f, 0)) 23 | fy = jit(grad(f, 1)) 24 | 25 | print(fx(1., -1.), fy(1., -1.)) 26 | 27 | zx = vmap(fx)(x.ravel(), y.ravel()).reshape(x.shape) 28 | figpd = plt.figure() 29 | axpd = figpd.add_subplot(projection="3d") 30 | axpd.plot_surface(x, y, zx, cmap="gray") 31 | axpd.set_title("Partial derivative with respect to x") 32 | axpd.set_xlabel("x") 33 | axpd.set_ylabel("y") 34 | axpd.set_zlabel("z") 35 | 36 | 37 | plt.show() 38 | -------------------------------------------------------------------------------- /Chapter 03/discrete-fourier-transforms-for-signals-processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | rng = np.random.default_rng(12345) 4 | 5 | 6 | def signal(t, freq_1=4.0, freq_2=7.0): 7 | return np.sin(freq_1*2*np.pi*t) + np.sin(freq_2*2*np.pi*t) 8 | 9 | 10 | sample_size = 2**7 11 | sample_t = np.linspace(0, 4, sample_size, dtype=np.float64) 12 | sample_y = signal(sample_t) + rng.standard_normal(sample_size) 13 | sample_d = 4. / (sample_size - 1) # Spacing for linspace array 14 | true_signal = signal(sample_t) 15 | 16 | from numpy import fft 17 | 18 | fig1, ax1 = plt.subplots() 19 | ax1.plot(sample_t, sample_y, "k.", label="Noisy signal") 20 | ax1.plot(sample_t, true_signal, "k--", label="True signal") 21 | 22 | ax1.set_title("Sample signal with noise") 23 | ax1.set_xlabel("Time") 24 | ax1.set_ylabel("Amplitude") 25 | ax1.legend() 26 | 27 | 28 | spectrum = fft.fft(sample_y) 29 | 30 | freq = fft.fftfreq(sample_size, sample_d) 31 | pos_freq_i = np.arange(1, sample_size//2, dtype=int) 32 | 33 | psd = np.abs(spectrum[pos_freq_i])**2 + np.abs(spectrum[-pos_freq_i])**2 34 | 35 | fig2, ax2 = plt.subplots() 36 | ax2.plot(freq[pos_freq_i], psd, "k") 37 | ax2.set_title("PSD of the noisy signal") 38 | ax2.set_xlabel("Frequency") 39 | ax2.set_ylabel("Density") 40 | 41 | 42 | filtered = pos_freq_i[psd > 2e3] 43 | 44 | new_spec = np.zeros_like(spectrum) 45 | new_spec[filtered] = spectrum[filtered] 46 | new_spec[-filtered] = spectrum[-filtered] 47 | 48 | new_sample = np.real(fft.ifft(new_spec)) 49 | 50 | fig3, ax3 = plt.subplots() 51 | ax3.plot(sample_t, true_signal, color="#8c8c8c", linewidth=1.5, label="True signal") 52 | ax3.plot(sample_t, new_sample, "k--", label="Filtered signal") 53 | ax3.legend() 54 | ax3.set_title("Plot comparing filtered signal and true signal") 55 | ax3.set_xlabel("Time") 56 | ax3.set_ylabel("Amplitude") 57 | 58 | 59 | plt.show() 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /Chapter 03/numerical-integration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def erf_integrand(t): 4 | return np.exp(-t**2) 5 | 6 | from scipy import integrate 7 | 8 | val_quad, err_quad = integrate.quad(erf_integrand, -1.0, 1.0) 9 | # (1.493648265624854, 1.6582826951881447e-14) 10 | 11 | 12 | val_quadr, err_quadr = integrate.quadrature(erf_integrand, -1.0, 1.0) 13 | # (1.4936482656450039, 7.459897144457273e-10) 14 | 15 | 16 | -------------------------------------------------------------------------------- /Chapter 03/partial-differential-equations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits import mplot3d 4 | 5 | alpha = 1 6 | x0 = 0 # Left hand x limit 7 | xL = 2 # Right hand x limit 8 | 9 | N = 10 10 | x = np.linspace(x0, xL, N+1) 11 | h = (xL - x0) / N 12 | 13 | k = 0.01 14 | steps = 100 15 | t = np.array([i*k for i in range(steps+1)]) 16 | 17 | r = alpha*k / h**2 18 | assert r < 0.5, f"Must have r < 0.5, currently r={r}" 19 | 20 | from scipy import sparse 21 | diag = [1, *(1-2*r for _ in range(N-1)), 1] 22 | abv_diag = [0, *(r for _ in range(N-1))] 23 | blw_diag = [*(r for _ in range(N-1)), 0] 24 | 25 | A = sparse.diags([blw_diag, diag, abv_diag], (-1, 0, 1), shape=(N+1, N+1), dtype=np.float64, format="csr") 26 | 27 | u = np.zeros((steps+1, N+1), dtype=np.float64) 28 | 29 | def initial_profile(x): 30 | return 3*np.sin(np.pi*x/2) 31 | 32 | u[0, :] = initial_profile(x) 33 | 34 | for i in range(steps): 35 | u[i+1, :] = A @ u[i, :] 36 | 37 | 38 | X, T = np.meshgrid(x, t) 39 | fig = plt.figure() 40 | ax = fig.add_subplot(projection="3d") 41 | 42 | ax.plot_surface(T, X, u, cmap="gray") 43 | ax.set_title("Solution of the heat equation") 44 | ax.set_xlabel("t") 45 | ax.set_ylabel("x") 46 | ax.set_zlabel("u") 47 | 48 | 49 | plt.show() 50 | 51 | -------------------------------------------------------------------------------- /Chapter 03/polynomials-and-calculus.py: -------------------------------------------------------------------------------- 1 | 2 | class Polynomial: 3 | """Basic polynomial class""" 4 | 5 | def __init__(self, coeffs): 6 | self.coeffs = coeffs 7 | 8 | def __repr__(self): 9 | return f"Polynomial({repr(self.coeffs)})" 10 | 11 | def __call__(self, x): 12 | return sum(coeff*x**i for i, coeff in enumerate(self.coeffs)) 13 | 14 | def differentiate(self): 15 | """Differentiate the polynomial and return the derivative""" 16 | coeffs = [i*c for i, c in enumerate(self.coeffs[1:], start=1)] 17 | return Polynomial(coeffs) 18 | 19 | def integrate(self, constant=0): 20 | """Integrate the polynomial and return the integral""" 21 | coeffs = [float(constant)] 22 | coeffs += [c/i for i, c in enumerate(self.coeffs, start=1)] 23 | return Polynomial(coeffs) 24 | 25 | 26 | p = Polynomial([1, -2, 1]) 27 | p.differentiate() 28 | # Polynomial([2, -2]) 29 | p.integrate(constant=1) 30 | # Polynomial([1.0, 1.0, -1.0, 0.333333333333]) 31 | -------------------------------------------------------------------------------- /Chapter 03/solving-differential-equations-with-jax.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import jax.numpy as jnp 3 | import diffrax 4 | 5 | def f(x, y, args): 6 | u = y[..., 0] 7 | v = y[..., 1] 8 | return jnp.array([v, 3.*x**2*v-(1.-x)*u]) 9 | 10 | term = diffrax.ODETerm(f) 11 | solver = diffrax.Dopri5() 12 | save_at = diffrax.SaveAt(ts=jnp.linspace(0., 1.)) 13 | y0 = jnp.array([0., 1.]) 14 | 15 | solution = diffrax.diffeqsolve(term, solver, t0=0., t1=2., dt0=0.1, y0=y0, saveat=save_at) 16 | 17 | x = solution.ts 18 | y = solution.ys[:, 0] 19 | 20 | fig, ax = plt.subplots() 21 | ax.plot(x, y, "k") 22 | ax.set_title("Plot of the solution to the second order ODE") 23 | ax.set_xlabel("x") 24 | ax.set_ylabel("y") 25 | 26 | 27 | plt.show() 28 | 29 | 30 | -------------------------------------------------------------------------------- /Chapter 03/solving-equations.py: -------------------------------------------------------------------------------- 1 | from scipy import optimize 2 | from math import exp 3 | 4 | def f(x): 5 | return x*(x - 2)*exp(3 - x) 6 | 7 | def fp(x): 8 | return -(x**2 - 4*x + 2)*exp(3 - x) 9 | 10 | 11 | optimize.newton(f, 1., x1=1.5) # Using x1 = 1.5 and the secant method 12 | # 1.9999999999999862 13 | optimize.newton(f, 1., fprime=fp) # Using Newton-Raphson method 14 | # 2.0 15 | 16 | 17 | -------------------------------------------------------------------------------- /Chapter 03/solving-simple-differential-equations-numerically.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import integrate 3 | import matplotlib.pyplot as plt 4 | 5 | def f(t, y): 6 | return -0.2*y 7 | 8 | t_range = (0, 5) 9 | 10 | T0 = np.array([50.]) 11 | 12 | def true_solution(t): 13 | return 50.*np.exp(-0.2*t) 14 | 15 | sol = integrate.solve_ivp(f, t_range, T0, max_step=0.1) 16 | 17 | t_vals = sol.t 18 | T_vals = sol.y[0, :] 19 | 20 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True) 21 | 22 | ax1.plot(t_vals, T_vals, "k") 23 | ax1.set_xlabel("$t$") 24 | ax1.set_ylabel("$T$") 25 | ax1.set_title("Solution of the cooling equation") 26 | 27 | 28 | err = np.abs(T_vals - true_solution(t_vals)) 29 | ax2.semilogy(t_vals, err, "k") 30 | ax2.set_xlabel("$t$") 31 | ax2.set_ylabel("Error") 32 | ax2.set_title("Error in approximation") 33 | 34 | plt.show() 35 | -------------------------------------------------------------------------------- /Chapter 03/solving-systems-of-differential-equations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | def predator_prey_system(t, y): 5 | return np.array([5*y[0] - 0.1*y[0]*y[1], 0.1*y[1]*y[0] - 6*y[1]]) 6 | 7 | 8 | p = np.linspace(0, 100, 25) 9 | w = np.linspace(0, 100, 25) 10 | P, W = np.meshgrid(p, w) 11 | 12 | dp, dw = predator_prey_system(0, np.array([P, W])) 13 | 14 | fig, ax = plt.subplots() 15 | ax.quiver(P, W, dp, dw) 16 | ax.set_title("Population dynamics for two competing species") 17 | ax.set_xlabel("P") 18 | ax.set_ylabel("W") 19 | 20 | 21 | initial_conditions = np.array([85, 40]) 22 | 23 | from scipy import integrate 24 | sol = integrate.solve_ivp(predator_prey_system, (0., 5.), initial_conditions, max_step=0.01) 25 | 26 | ax.plot(initial_conditions[0], initial_conditions[1], "ko") 27 | ax.plot(sol.y[0, :], sol.y[1, :], "k", linewidth=0.5) 28 | 29 | plt.show() 30 | 31 | -------------------------------------------------------------------------------- /Chapter 03/symbolic-calculus-using-sympy.py: -------------------------------------------------------------------------------- 1 | import sympy 2 | 3 | x = sympy.symbols('x') 4 | 5 | f = (x**2 - 2*x)*sympy.exp(3 - x) 6 | 7 | fp = sympy.simplify(sympy.diff(f)) # (x*(2 - x) + 2*x - 2)*exp(3 - x) 8 | print(fp) 9 | 10 | fp2 = -(x**2 - 4*x + 2)*sympy.exp(3 - x) 11 | 12 | print(sympy.simplify(fp2 - fp) == 0) # True 13 | 14 | 15 | F = sympy.integrate(fp, x) 16 | print(F) # (x**2 - 2*x)*exp(3 - x) 17 | -------------------------------------------------------------------------------- /Chapter 04/analysing-conversion-rates-with-bayesian-techniques.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | 4 | import matplotlib.pyplot as plt 5 | 6 | from scipy.stats import beta as beta_dist 7 | beta_pdf = beta_dist.pdf 8 | 9 | 10 | prior_alpha = 25 11 | prior_beta = 75 12 | 13 | args = (prior_alpha, prior_beta) 14 | prior_over_33, err = sp.integrate.quad(beta_pdf, 0.33, 1, args=args) 15 | print("Prior probability", prior_over_33) 16 | # 0.037830787030165056 17 | 18 | observed_successes = 122 19 | observed_failures = 257 20 | 21 | posterior_alpha = prior_alpha + observed_successes 22 | posterior_beta = prior_beta + observed_failures 23 | 24 | args = (posterior_alpha, posterior_beta) 25 | posterior_over_33, err2 = sp.integrate.quad(beta_pdf, 0.33, 1, args=args) 26 | print("Posterior probability", posterior_over_33) 27 | # 0.13686193416281017 28 | 29 | p = np.linspace(0, 1, 500) 30 | prior_dist = beta_pdf(p, prior_alpha, prior_beta) 31 | posterior_dist = beta_pdf(p, posterior_alpha, posterior_beta) 32 | 33 | fig, ax = plt.subplots() 34 | ax.plot(p, prior_dist, "k--", label="Prior") 35 | ax.plot(p, posterior_dist, "k", label="Posterior") 36 | ax.legend() 37 | ax.set_xlabel("Success rate") 38 | ax.set_ylabel("Density") 39 | ax.set_title("Prior and posterior distributions for success rate") 40 | 41 | plt.show() 42 | -------------------------------------------------------------------------------- /Chapter 04/changing-the-random-number-generator.py: -------------------------------------------------------------------------------- 1 | from numpy import random 2 | 3 | seed_seq = random.SeedSequence() 4 | 5 | print(seed_seq.entropy) 6 | # 9219863422733683567749127389169034574 7 | 8 | bit_gen = random.MT19937(seed_seq) 9 | 10 | rng = random.Generator(bit_gen) 11 | -------------------------------------------------------------------------------- /Chapter 04/estimating-parameters-with-monte-carlo-simulations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | 7 | import pymc as pm 8 | 9 | def underlying(x, params): 10 | return params[0]*x**2 + params[1]*x + params[2] 11 | 12 | size = 100 13 | true_params = [2, -7, 6] 14 | 15 | x_vals = np.linspace(-5, 5, size) 16 | raw_model = underlying(x_vals, true_params) 17 | noise = rng.normal(loc=0.0, scale=10.0, size=size) 18 | sample = raw_model + noise 19 | 20 | fig1, ax1 = plt.subplots() 21 | ax1.scatter(x_vals, sample, label="Sampled data", color="k", alpha=0.6) 22 | ax1.plot(x_vals, raw_model, "k--", label="Underlying model") 23 | ax1.set_title("Sampled data") 24 | ax1.set_xlabel("x") 25 | ax1.set_ylabel("y") 26 | 27 | plt.show() 28 | 29 | with pm.Model() as model: 30 | params = pm.Normal("params", mu=1, sigma=1, shape=3) 31 | y = underlying(x_vals, params) 32 | y_obs = pm.Normal("y_obs", mu=y, sigma=2, observed=sample) 33 | trace = pm.sample(cores=4) 34 | 35 | 36 | fig2, axs2 = plt.subplots(1, 3, tight_layout=True) 37 | 38 | pm.plot_posterior(trace, ax=axs2, color="k") 39 | 40 | plt.show() 41 | 42 | estimated_params = trace.posterior["params"].mean(axis=(0, 1)).to_numpy() 43 | print("Estimated parameters", estimated_params) 44 | 45 | estimated = underlying(x_vals, estimated_params) 46 | 47 | fig3, ax3 = plt.subplots() 48 | ax3.plot(x_vals, raw_model, "k", label="True model") 49 | ax3.plot(x_vals, estimated, "k--", label="Estimated model") 50 | ax3.set_title("Plot of true and estimated models") 51 | ax3.set_xlabel("x") 52 | ax3.set_ylabel("y") 53 | ax3.legend() 54 | 55 | 56 | plt.show() 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /Chapter 04/generating-normally-distributed-random-numbers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | 7 | mu = 5.0 # mean value 8 | sigma = 3.0 # standard deviation 9 | rands = rng.normal(loc=mu, scale=sigma, size=10000) 10 | 11 | fig, ax = plt.subplots() 12 | ax.hist(rands, bins=20, color="k", alpha=0.6) 13 | ax.set_title("Histogram of normally distributed data") 14 | ax.set_xlabel("Value") 15 | ax.set_ylabel("Density") 16 | 17 | def normal_dist_curve(x): 18 | return 10000*np.exp(-0.5*((x-mu)/sigma)**2)/(sigma*np.sqrt(2*np.pi)) 19 | 20 | x_range = np.linspace(-5, 15) 21 | y = normal_dist_curve(x_range) 22 | ax.plot(x_range, y, "k--") 23 | 24 | 25 | plt.show() 26 | -------------------------------------------------------------------------------- /Chapter 04/generating-random-data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) # changing seed for reproducibility 6 | 7 | random_floats = rng.random(size=(5, 5)) 8 | # array([[0.22733602, 0.31675834, 0.79736546, 0.67625467, 0.39110955], 9 | # [0.33281393, 0.59830875, 0.18673419, 0.67275604, 0.94180287], 10 | # [0.24824571, 0.94888115, 0.66723745, 0.09589794, 0.44183967], 11 | # [0.88647992, 0.6974535 , 0.32647286, 0.73392816, 0.22013496], 12 | # [0.08159457, 0.1598956 , 0.34010018, 0.46519315, 0.26642103]]) 13 | 14 | random_ints = rng.integers(1, 20, endpoint=True, size=10) 15 | # array([12, 17, 10, 4, 1, 3, 2, 2, 3, 12]) 16 | 17 | 18 | dist = rng.random(size=1000) 19 | 20 | 21 | fig, ax = plt.subplots() 22 | ax.hist(dist, color="k", alpha=0.6) 23 | ax.set_title("Histogram of random numbers") 24 | ax.set_xlabel("Value") 25 | ax.set_ylabel("Density") 26 | 27 | plt.show() -------------------------------------------------------------------------------- /Chapter 04/selecting-items-at-random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | rng = np.random.default_rng(12345) 4 | 5 | data = np.arange(15) 6 | probabilities = np.array([0.3, 0.2, 0.1, 0.05, 0.05, 0.05, 0.05, 0.025, 7 | 0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025]) 8 | 9 | assert round(sum(probabilities), 10) == 1.0, "Probabilities must sum to 1" 10 | 11 | selected = rng.choice(data, p=probabilities, replace=True) 12 | # 0 13 | 14 | selected_array = rng.choice(data, p=probabilities, replace=True, size=(5, 5)) 15 | #array([[ 1, 6, 4, 1, 1], 16 | # [ 2, 0, 4, 12, 0], 17 | # [12, 4, 0, 1, 10], 18 | # [ 4, 1, 5, 0, 0], 19 | # [ 0, 1, 1, 0, 7]]) 20 | -------------------------------------------------------------------------------- /Chapter 04/working-with-random-processes.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | 7 | rate = 4.0 8 | inter_arrival_times = rng.exponential(scale=1./rate, size=50) 9 | 10 | arrivals = np.add.accumulate(inter_arrival_times) 11 | count = np.arange(50) 12 | 13 | 14 | fig1, ax1 = plt.subplots() 15 | 16 | ax1.step(arrivals, count, where="post", color="k") 17 | ax1.set_xlabel("Time") 18 | ax1.set_ylabel("Number of arrivals") 19 | ax1.set_title("Arrivals over time") 20 | 21 | 22 | fig1.savefig("arrivals.png", dpi=300, bbox_inches="tight") 23 | 24 | from scipy.special import factorial 25 | N = np.arange(15) 26 | 27 | def probability(events, time=1, param=rate): 28 | return ((param*time)**events/factorial(events))*np.exp(-param*time) 29 | 30 | 31 | fig2, ax2 = plt.subplots() 32 | ax2.plot(N, probability(N), "k", label="True distribution") 33 | ax2.set_xlabel("Number of arrivals in 1 time unit") 34 | ax2.set_ylabel("Probability") 35 | ax2.set_title("Probability distribution") 36 | 37 | estimated_scale = np.mean(inter_arrival_times) 38 | estimated_rate = 1.0/estimated_scale 39 | 40 | ax2.plot(N, probability(N, param=estimated_rate), "k--", label="Estimated distribution") 41 | ax2.legend() 42 | 43 | 44 | plt.show() 45 | -------------------------------------------------------------------------------- /Chapter 05/coloring-a-network.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | 4 | G = nx.complete_graph(3) 5 | G.add_nodes_from(range(3, 7)) 6 | G.add_edges_from([ 7 | (2, 3), (2, 4), (2, 6), (0, 3), (0, 6), (1, 6), 8 | (1, 5), (2, 5), (4, 5) 9 | ]) 10 | 11 | 12 | fig, ax = plt.subplots() 13 | nx.draw_circular(G, ax=ax, with_labels=True) 14 | ax.set_title("Scheduling network") 15 | 16 | plt.show() 17 | 18 | 19 | coloring = nx.greedy_color(G) 20 | print("Coloring", coloring) 21 | # Coloring {2: 0, 0: 1, 1: 2, 5: 1, 6: 3, 3: 2, 4: 2} 22 | 23 | different_colors = set(coloring.values()) 24 | print("Different colors", different_colors) 25 | # Different colors {0, 1, 2, 3} 26 | -------------------------------------------------------------------------------- /Chapter 05/creating-directed-and-weighted-networks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | import matplotlib.pyplot as plt 4 | 5 | G = nx.DiGraph() 6 | 7 | G.add_nodes_from(range(5)) 8 | 9 | G.add_edge(0, 1, weight=1.0) 10 | G.add_weighted_edges_from([ 11 | (1, 2, 0.5), (1, 3, 2.0), (2, 3, 0.3), (3, 2, 0.3), 12 | (2, 4, 1.2), (3, 4, 0.8) 13 | ]) 14 | 15 | 16 | fig, ax = plt.subplots() 17 | pos = {0: (-1, 0), 1: (0, 0), 2: (1, 1), 3: (1, -1), 4: (2, 0)} 18 | nx.draw(G, ax=ax, pos=pos, with_labels=True) 19 | ax.set_title("Weighted, directed network") 20 | 21 | plt.show() 22 | 23 | 24 | adj_mat = nx.adjacency_matrix(G).todense() 25 | print(adj_mat) 26 | # [[0. 1. 0. 0. 0. ] 27 | # [0. 0. 0.5 2. 0. ] 28 | # [0. 0. 0. 0.3 1.2] 29 | # [0. 0. 0.3 0. 0.8] 30 | # [0. 0. 0. 0. 0. ]] 31 | -------------------------------------------------------------------------------- /Chapter 05/creating-networks-in-python.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | G = nx.Graph() 4 | 5 | G.add_node(1) 6 | G.add_node(2) 7 | 8 | G.add_nodes_from([3, 4, 5, 6]) 9 | 10 | G.add_edge(1, 2) 11 | G.add_edges_from([(2, 3), (3, 4), (3, 5), (3, 6), (4, 5), (5, 6)]) 12 | 13 | 14 | print(G.nodes) 15 | print(G.edges) -------------------------------------------------------------------------------- /Chapter 05/finding-minimum-spanning-trees-and-dominating-sets.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | 4 | G = nx.gnm_random_graph(15, 22, seed=12345) 5 | 6 | 7 | fig, ax = plt.subplots() 8 | pos = nx.circular_layout(G) 9 | nx.draw(G, pos=pos, ax=ax, with_labels=True, style="--") 10 | ax.set_title("Network with minimum spanning tree overlaid") 11 | 12 | min_span_tree = nx.minimum_spanning_tree(G) 13 | print(list(min_span_tree.edges)) 14 | # [(0, 13), (0, 7), (0, 5), (1, 13), (1, 11), 15 | # (2, 5), (2, 9), (2, 8), (2, 3), (2, 12), 16 | # (3, 4), (4, 6), (5, 14), (8, 10)] 17 | 18 | nx.draw_networkx_edges(min_span_tree, pos=pos, ax=ax, width=2.) 19 | 20 | 21 | dominating_set = nx.dominating_set(G) 22 | print("Dominating set", dominating_set) 23 | # Dominating set {0, 1, 2, 4, 10, 14} 24 | 25 | 26 | plt.show() 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /Chapter 05/finding-shortest-paths.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | 7 | G = nx.gnm_random_graph(10, 17, seed=12345) 8 | 9 | fig, ax = plt.subplots() 10 | nx.draw_circular(G, ax=ax, with_labels=True) 11 | ax.set_title("Random network for shortest path finding") 12 | 13 | plt.show() 14 | 15 | for u, v in G.edges: 16 | G.edges[u, v]["weight"] = rng.integers(5, 15) 17 | 18 | 19 | path = nx.shortest_path(G, 7, 9, weight="weight") 20 | print(path) 21 | # [7, 5, 2, 9] 22 | 23 | length = nx.shortest_path_length(G, 7, 9, weight="weight") 24 | print("Length", length) 25 | # Length 32 26 | 27 | -------------------------------------------------------------------------------- /Chapter 05/generating-the-adjacency-matrix-for-a-network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | 4 | G = nx.dense_gnm_random_graph(5, 5, seed=12345) 5 | 6 | matrix = nx.adjacency_matrix(G).todense() 7 | print(matrix) 8 | # [[0 0 1 0 0] 9 | # [0 0 1 1 0] 10 | # [1 1 0 0 1] 11 | # [0 1 0 0 1] 12 | # [0 0 1 1 0]] 13 | 14 | paths_len_4 = np.linalg.matrix_power(matrix, 4) 15 | print(paths_len_4) 16 | # [[ 3 5 0 0 5] 17 | # [ 5 9 0 0 9] 18 | # [ 0 0 13 10 0] 19 | # [ 0 0 10 8 0] 20 | # [ 5 9 0 0 9]] 21 | -------------------------------------------------------------------------------- /Chapter 05/getting-the-basic-characteristics-of-networks.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | 4 | G = nx.Graph() 5 | G.add_nodes_from(range(10)) 6 | G.add_edges_from([ 7 | (0, 1), (1, 2), (2, 3), (2, 4), 8 | (2, 5), (3, 4), (4, 5), (6, 7), 9 | (6, 8), (6, 9), (7, 8), (8, 9) 10 | ]) 11 | 12 | fig, ax = plt.subplots() 13 | nx.draw_circular(G, ax=ax, with_labels=True) 14 | ax.set_title("Simple network") 15 | 16 | plt.show() 17 | 18 | print(G) 19 | # Name: 20 | # Type: Graph 21 | # Number of nodes: 10 22 | # Number of edges: 12 23 | # Average degree: 2.4000 24 | 25 | for i in [0, 2, 7]: 26 | degree = G.degree[i] 27 | print(f"Degree of {i}: {degree}") 28 | # Degree of 0: 1 29 | # Degree of 2: 4 30 | # Degree of 7: 2 31 | 32 | components = list(nx.connected_components(G)) 33 | print(components) 34 | 35 | 36 | density = nx.density(G) 37 | print("Density", density) 38 | # Density 0.26666666666666666 39 | 40 | is_planar, _ = nx.check_planarity(G) 41 | print("Is planar", is_planar) 42 | # Is planar True 43 | 44 | -------------------------------------------------------------------------------- /Chapter 05/quantifying-clustering-in-a-network.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | 4 | G = nx.Graph() 5 | complete_part = nx.complete_graph(4) 6 | cycle_part = nx.cycle_graph(range(4, 9)) 7 | G.update(complete_part) 8 | G.update(cycle_part) 9 | G.add_edges_from([(0, 8), (3, 4)]) 10 | 11 | fig, ax = plt.subplots() 12 | nx.draw_circular(G, ax=ax, with_labels=True) 13 | ax.set_title("Network with different clustering behavior") 14 | 15 | plt.show() 16 | 17 | cluster_coeffs = nx.clustering(G) 18 | 19 | for i in [0, 2, 6]: 20 | print(f"Node {i}, clustering {cluster_coeffs[i]}") 21 | # Node 0, clustering 0.5 22 | # Node 2, clustering 1.0 23 | # Node 6, clustering 0 24 | 25 | av_clustering = nx.average_clustering(G) 26 | print(av_clustering) 27 | # 0.3333333333333333 28 | -------------------------------------------------------------------------------- /Chapter 05/visualising-networks.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | 4 | # Graph from "Creating networks" recipe 5 | G = nx.Graph() 6 | 7 | G.add_nodes_from(range(1, 7)) 8 | G.add_edges_from([ 9 | (1, 2), (2, 3), (3, 4), (3, 5), 10 | (3, 6), (4, 5), (5, 6) 11 | ]) 12 | 13 | fig, ax = plt.subplots() 14 | 15 | layout = nx.shell_layout(G) 16 | 17 | nx.draw(G, ax=ax, pos=layout, with_labels=True) 18 | ax.set_title("Simple network drawing") 19 | 20 | 21 | plt.show() 22 | -------------------------------------------------------------------------------- /Chapter 06/creating-interactive-plots-with-Bokeh.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from bokeh import plotting as bk 4 | import matplotlib.pyplot as plt 5 | 6 | from numpy.random import default_rng 7 | rng = default_rng(12345) 8 | 9 | date_range = pd.date_range("2020-01-01", periods=50) 10 | data = rng.normal(0, 3, size=50).cumsum() 11 | series = pd.Series(data, index=date_range) 12 | 13 | 14 | bk.output_file("sample.html") 15 | 16 | fig = bk.figure(title="Time series data", 17 | x_axis_label="date", 18 | x_axis_type="datetime", 19 | y_axis_label="value") 20 | 21 | fig.line(date_range, series) 22 | 23 | bk.show(fig) 24 | -------------------------------------------------------------------------------- /Chapter 06/creating-series-and-dataframes.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | 7 | diff_data = rng.normal(0, 1, size=100) 8 | cumulative = diff_data.cumsum() 9 | 10 | data_series = pd.Series(diff_data) 11 | print(data_series) 12 | 13 | data_frame = pd.DataFrame({ 14 | "diffs": data_series, 15 | "cumulative": cumulative 16 | }) 17 | 18 | print(data_frame) 19 | -------------------------------------------------------------------------------- /Chapter 06/getting-descriptive-statistics-from-dataframes.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | from numpy.random import default_rng 6 | rng = default_rng(12345) 7 | 8 | uniform = rng.uniform(1, 5, size=100) 9 | normal = rng.normal(1, 2.5, size=100) 10 | bimodal = np.concatenate([rng.normal(0, 1, size=50), rng.normal(6, 1, size=50)]) 11 | 12 | df = pd.DataFrame({ 13 | "uniform": uniform, 14 | "normal": normal, 15 | "bimodal": bimodal 16 | }) 17 | 18 | fig, (ax1, ax2, ax3) = plt.subplots(1, 3, tight_layout=True) 19 | 20 | df["uniform"].plot(kind="hist", title="Uniform", ax=ax1, color="k", alpha=0.6) 21 | df["normal"].plot(kind="hist", title="Normal", ax=ax2, color="k", alpha=0.6) 22 | df["bimodal"].plot(kind="hist", title="Bimodal", ax=ax3, bins=20, color="k", alpha=0.6) 23 | 24 | descriptive = df.describe() 25 | descriptive.loc["kurtosis"] = df.kurtosis() 26 | print(descriptive) 27 | 28 | uniform_mean = descriptive.loc["mean", "uniform"] 29 | normal_mean = descriptive.loc["mean", "normal"] 30 | bimodal_mean = descriptive.loc["mean", "bimodal"] 31 | 32 | ax1.vlines(uniform_mean, 0, 20, "k") 33 | ax2.vlines(normal_mean, 0, 25, "k") 34 | ax3.vlines(bimodal_mean, 0, 12,"k") 35 | 36 | plt.show() 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /Chapter 06/loading-and-storing-data-from-a-dataframe.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from numpy.random import default_rng 4 | rng = default_rng(12345) 5 | 6 | 7 | diffs = rng.normal(0, 1, size=100) 8 | cumulative = diffs.cumsum() 9 | 10 | data_frame = pd.DataFrame({ 11 | "diffs": diffs, 12 | "cumulative": cumulative 13 | }) 14 | print(data_frame) 15 | 16 | 17 | data_frame.to_csv("sample.csv", index=False) 18 | 19 | 20 | df = pd.read_csv("sample.csv", index_col=False) 21 | print(df) 22 | -------------------------------------------------------------------------------- /Chapter 06/manipulating-data-frames.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from numpy.random import default_rng 4 | rng = default_rng(12345) 5 | three = rng.uniform(-0.2, 1.0, size=100) 6 | three[three < 0] = np.nan 7 | 8 | data_frame = pd.DataFrame({ 9 | "one": rng.random(size=100), 10 | "two": rng.normal(0, 1, size=100).cumsum(), 11 | "three": three 12 | }) 13 | 14 | data_frame["four"] = data_frame["one"] > 0.5 15 | 16 | def transform_function(row): 17 | if row["four"]: 18 | return 0.5*row["two"] 19 | return row["one"]*row["two"] 20 | 21 | data_frame["five"] = data_frame.apply(transform_function, axis=1) 22 | 23 | print(data_frame) 24 | 25 | df = data_frame.dropna() 26 | 27 | print(df) 28 | 29 | 30 | -------------------------------------------------------------------------------- /Chapter 06/performing-operations-on-grouped-data-in-a-dataframe.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | 5 | rng = np.random.default_rng(12345) 6 | from matplotlib.rcsetup import cycler 7 | plt.rc("axes", prop_cycle=cycler(c=["k"]*3, ls=["-", "--", "-."])) 8 | 9 | labels1 = rng.choice(["A", "B", "C"], size=50) 10 | labels2 = rng.choice([1, 2], size=50) 11 | data = rng.normal(0.0, 2.0, size=50) 12 | 13 | df = pd.DataFrame({"label1": labels1, "label2": labels2, "data": data}) 14 | 15 | df["first_group"] = df.groupby("label1")["data"].cumsum() 16 | print(df.head()) 17 | 18 | 19 | grouped = df.groupby(["label1", "label2"]) 20 | df["second_group"] = grouped["data"].transform( 21 | lambda d: d.rolling(2, min_periods=1).mean()) 22 | 23 | print(df.head()) 24 | 25 | print(df[df["label1"] == "C"].head()) 26 | 27 | 28 | fig, ax = plt.subplots() 29 | df.groupby("label1")["first_group"].plot(ax=ax) 30 | ax.set(title="Grouped data cumulative sums", xlabel="Index", ylabel="value") 31 | ax.legend() 32 | 33 | 34 | plt.show() 35 | -------------------------------------------------------------------------------- /Chapter 06/plotting-data-from-a-DataFrame.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | 7 | diffs = rng.standard_normal(size=100) 8 | walk = diffs.cumsum() 9 | df = pd.DataFrame({ 10 | "diffs": diffs, 11 | "walk": walk 12 | }) 13 | 14 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True) 15 | 16 | df["walk"].plot(ax=ax1, title="Random walk", color="k") 17 | ax1.set_xlabel("Index") 18 | ax1.set_ylabel("Value") 19 | 20 | df["diffs"].plot(kind="hist", ax=ax2, title="Histogram of diffs", color="k", alpha=0.6) 21 | ax2.set_xlabel("Difference") 22 | 23 | 24 | plt.show() 25 | -------------------------------------------------------------------------------- /Chapter 06/testing-hypotheses-for-non-parametric-data.py: -------------------------------------------------------------------------------- 1 | from scipy import stats 2 | from numpy.random import default_rng 3 | rng = default_rng(12345) 4 | 5 | 6 | sample_A = rng.uniform(2.5, 3.5, size=25) 7 | sample_B = rng.uniform(3.0, 4.4, size=25) 8 | sample_C = rng.uniform(3.1, 4.5, size=25) 9 | 10 | significance = 0.05 11 | 12 | statistic, p_value = stats.kruskal(sample_A, sample_B, sample_C) 13 | print(f"Statistic: {statistic}, p value: {p_value}") 14 | # Statistic: 40.22214736842102, p value: 1.8444703308682906e-09 15 | 16 | if p_value <= significance: 17 | print("There are differences between population medians") 18 | else: 19 | print("Accept H0: all medians equal") 20 | # There are differences between population medians 21 | 22 | _, p_A_B = stats.ranksums(sample_A, sample_B) 23 | _, p_A_C = stats.ranksums(sample_A, sample_C) 24 | _, p_B_C = stats.ranksums(sample_B, sample_C) 25 | 26 | if p_A_B <= significance: 27 | print("Significant differences between A and B, p value", p_A_B) 28 | # Significant differences between A and B, p value 1.0035366080480683e-07 29 | 30 | if p_A_C <= significance: 31 | print("Significant differences between A and C, p value", p_A_C) 32 | # Significant differences between A and C, p value 2.428534673701913e-08 33 | 34 | if p_B_C <= significance: 35 | print("Significant differences between B and C, p value", p_B_C) 36 | else: 37 | print("No significant differences between B and C, p value", p_B_C) 38 | # No significant differences between B and C, p value 0.3271631660572756 39 | -------------------------------------------------------------------------------- /Chapter 06/testing-hypotheses-using-ANOVA.py: -------------------------------------------------------------------------------- 1 | from scipy import stats 2 | from numpy.random import default_rng 3 | rng = default_rng(12345) 4 | 5 | current = rng.normal(4.0, 2.0, size=40) 6 | process_a = rng.normal(6.2, 2.0, size=25) 7 | process_b = rng.normal(4.5, 2.0, size=64) 8 | 9 | significance = 0.05 10 | 11 | F_stat, p_value = stats.f_oneway(current, process_a, process_b) 12 | 13 | print(f"F stat: {F_stat}, p value: {p_value}") 14 | # F stat: 9.949052026027028, p value: 9.732322721019206e-05 15 | 16 | if p_value <= significance: 17 | print("Reject H0: there is a difference between means") 18 | else: 19 | print("Accept H0: all means equal") 20 | # Reject H0: there is a difference between means 21 | -------------------------------------------------------------------------------- /Chapter 06/testing-hypotheses-using-t-tests.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from scipy import stats 3 | 4 | sample = pd.Series([ 5 | 2.4, 2.4, 2.9, 2.6, 1.8, 2.7, 2.6, 2.4, 2.8, 2.4, 2.4, 6 | 2.4, 2.7, 2.7, 2.3, 2.4, 2.4, 3.2, 2.2, 2.5, 2.1, 1.8, 7 | 2.9, 2.5, 2.5, 3.2, 2. , 2.3, 3. , 1.5, 3.1, 2.5, 3.1, 8 | 2.4, 3. , 2.5, 2.7, 2.1, 2.3, 2.2, 2.5, 2.6, 2.5, 2.8, 9 | 2.5, 2.9, 2.1, 2.8, 2.1, 2.3 10 | ]) 11 | 12 | mu0 = 2.0 13 | significance = 0.05 14 | 15 | t_statistic, p_value = stats.ttest_1samp(sample, mu0) 16 | 17 | print(f"t stat: {t_statistic}, p value: {p_value}") 18 | # t stat: 9.752368720068665, p value: 4.596949515944238e-13 19 | 20 | 21 | if p_value <= significance: 22 | print("Reject H0 in favour of H1: mu != 2.0") 23 | else: 24 | print("Accept H0: mu = 2.0") 25 | # Reject H0 in favour of H1: mu != 2.0 26 | -------------------------------------------------------------------------------- /Chapter 06/understanding-a-population-using-sampling.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import math 3 | from scipy import stats 4 | import matplotlib.pyplot as plt 5 | 6 | sample_data = pd.Series( 7 | [172.3, 171.3, 164.7, 162.9, 172.5, 176.3, 174.8, 171.9, 8 | 176.8, 167.8, 164.5, 179.7, 157.8, 170.6, 189.9, 185. , 9 | 172.7, 165.5, 174.5, 171.5] 10 | ) 11 | 12 | sample_mean = sample_data.mean() 13 | sample_std = sample_data.std() 14 | 15 | print(f"Mean {sample_mean}, st. dev {sample_std}") 16 | # Mean 172.15, st. dev 7.473778724383846 17 | 18 | N = sample_data.count() 19 | std_err = sample_std/math.sqrt(N) 20 | 21 | cv_95, cv_99 = stats.t.ppf([0.975, 0.995], df=N-1) 22 | 23 | pm_95 = cv_95*std_err 24 | conf_interval_95 = [sample_mean - pm_95, sample_mean + pm_95] 25 | pm_99 = cv_99*std_err 26 | conf_interval_99 = [sample_mean - pm_99, sample_mean + pm_99] 27 | 28 | print("95% confidence", conf_interval_95) 29 | # 95% confidence [168.65216388659374, 175.64783611340627] 30 | print("99% confidence", conf_interval_99) 31 | # 99% confidence [167.36884119608774, 176.93115880391227] 32 | 33 | -------------------------------------------------------------------------------- /Chapter 07/classifying-using-logarithmic-regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | from sklearn.linear_model import LogisticRegression 7 | from sklearn.metrics import classification_report, roc_curve 8 | 9 | 10 | df = pd.DataFrame({ 11 | "var1": np.concatenate([rng.normal(3.0, 1.5, size=50), rng.normal(-4.0, 2.0, size=50)]), 12 | "var2": rng.uniform(size=100), 13 | "var3": np.concatenate([rng.normal(-2.0, 2.0, size=50), rng.normal(1.5, 0.8, size=50)]) 14 | }) 15 | 16 | 17 | score = 4.0 + df["var1"] - df["var3"] 18 | Y = score >= 0 19 | 20 | fig1, ax1 = plt.subplots() 21 | ax1.plot(df.loc[Y, "var1"], df.loc[Y, "var3"], "ko", label="True data") 22 | ax1.plot(df.loc[~Y, "var1"], df.loc[~Y, "var3"], "kx", label="False data") 23 | ax1.legend() 24 | ax1.set_xlabel("var1") 25 | ax1.set_ylabel("var3") 26 | ax1.set_title("Scatter plot of var3 against var1") 27 | 28 | plt.show() 29 | 30 | model = LogisticRegression() 31 | model.fit(df, Y) 32 | 33 | 34 | test_df = pd.DataFrame({ 35 | "var1": np.concatenate([rng.normal(3.0, 1.5, size=25), rng.normal(-4.0, 2.0, size=25)]), 36 | "var2": rng.uniform(size=50), 37 | "var3": np.concatenate([rng.normal(-2.0, 2.0, size=25), rng.normal(1.5, 0.8, size=25)]) 38 | }) 39 | 40 | test_scores = 4.0 + test_df["var1"] - test_df["var3"] 41 | test_Y = test_scores >= 0 42 | 43 | test_predicts = model.predict(test_df) 44 | 45 | plt.show() 46 | 47 | print(classification_report(test_Y, test_predicts)) 48 | -------------------------------------------------------------------------------- /Chapter 07/forecasting-from-time-series-data-using-arima.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import statsmodels.api as sm 5 | 6 | from tsdata import generate_sample_data 7 | 8 | from matplotlib.rcsetup import cycler 9 | plt.rc("axes", prop_cycle=cycler(c="k")) 10 | 11 | sample_ts, test_ts = generate_sample_data(trend=0.2, undiff=True) 12 | 13 | ts_fig, ts_ax = plt.subplots(tight_layout=True) 14 | sample_ts.plot(ax=ts_ax, label="Observed") 15 | ts_ax.set_title("Training time series data") 16 | ts_ax.set_xlabel("Date") 17 | ts_ax.set_ylabel("Value") 18 | 19 | diffs = sample_ts.diff().dropna() 20 | 21 | ap_fig, (acf_ax, pacf_ax) = plt.subplots(2, 1, tight_layout=True) 22 | sm.graphics.tsa.plot_acf(diffs, ax=acf_ax) 23 | sm.graphics.tsa.plot_pacf(diffs, ax=pacf_ax) 24 | acf_ax.set_ylabel("Value") 25 | acf_ax.set_xlabel("Lag") 26 | pacf_ax.set_xlabel("Lag") 27 | pacf_ax.set_ylabel("Value") 28 | 29 | 30 | model = sm.tsa.ARIMA(sample_ts, order=(1,1,1)) 31 | fitted = model.fit() 32 | print(fitted.summary()) 33 | 34 | forecast = fitted.get_forecast(steps=50).summary_frame() 35 | print(forecast) 36 | 37 | forecast["mean"].plot(ax=ts_ax, label="Forecast", ls="--") 38 | ts_ax.fill_between(forecast.index, forecast["mean_ci_lower"], 39 | forecast["mean_ci_upper"], alpha=0.4) 40 | 41 | 42 | test_ts.plot(ax=ts_ax, label="Actual", ls="-.") 43 | ts_ax.legend() 44 | 45 | plt.show() 46 | -------------------------------------------------------------------------------- /Chapter 07/forecasting-seasonal-data-with-arima.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | import statsmodels.api as sm 6 | 7 | from tsdata import generate_sample_data 8 | 9 | from matplotlib.rcsetup import cycler 10 | plt.rc("axes", prop_cycle=cycler(c="k")) 11 | 12 | sample_ts, test_ts = generate_sample_data(undiff=True, seasonal=True) 13 | 14 | ts_fig, ts_ax = plt.subplots(tight_layout=True) 15 | sample_ts.plot(ax=ts_ax, title="Time series", label="Observed") 16 | ts_ax.set_xlabel("Date") 17 | ts_ax.set_ylabel("Value") 18 | 19 | ap_fig, (acf_ax, pacf_ax) = plt.subplots(2, 1, tight_layout=True) 20 | sm.graphics.tsa.plot_acf(sample_ts, ax=acf_ax) 21 | sm.graphics.tsa.plot_pacf(sample_ts, ax=pacf_ax) 22 | acf_ax.set_xlabel("Lag") 23 | pacf_ax.set_xlabel("Lag") 24 | acf_ax.set_ylabel("Value") 25 | pacf_ax.set_ylabel("Value") 26 | 27 | diffs = sample_ts.diff().dropna() 28 | dap_fig, (dacf_ax, dpacf_ax) = plt.subplots(2, 1, tight_layout=True) 29 | sm.graphics.tsa.plot_acf(diffs, ax=dacf_ax, title="Differenced ACF") 30 | sm.graphics.tsa.plot_pacf(diffs, ax=dpacf_ax, title="Differenced PACF") 31 | dacf_ax.set_xlabel("Lag") 32 | dpacf_ax.set_xlabel("Lag") 33 | dacf_ax.set_ylabel("Value") 34 | dpacf_ax.set_ylabel("Value") 35 | 36 | model = sm.tsa.SARIMAX(sample_ts, order=(1, 1, 1), seasonal_order=(1, 0, 0, 7)) 37 | fitted_seasonal = model.fit() 38 | print(fitted_seasonal.summary()) 39 | 40 | forecast_result = fitted_seasonal.get_forecast(steps=50) 41 | forecast_index = pd.date_range("2021-01-01", periods=50) 42 | forecast = forecast_result.predicted_mean 43 | 44 | forecast.plot(ax=ts_ax, label="Forecasts", ls="--") 45 | conf = forecast_result.conf_int() 46 | ts_ax.fill_between(forecast_index, conf["lower y"], conf["upper y"], alpha=0.4) 47 | test_ts.plot(ax=ts_ax, label="Actual future", ls="-.") 48 | ts_ax.legend() 49 | 50 | plt.show() 51 | 52 | -------------------------------------------------------------------------------- /Chapter 07/modelling-time-series-data-with-arma.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import statsmodels.api as sm 3 | 4 | from matplotlib.rcsetup import cycler 5 | plt.rc("axes", prop_cycle=cycler(c="k")) 6 | 7 | from tsdata import generate_sample_data 8 | 9 | sample_ts, _ = generate_sample_data() 10 | 11 | ts_fig, ts_ax = plt.subplots(tight_layout=True) 12 | sample_ts.plot(ax=ts_ax, label="Observed", ls="--", alpha=0.4) 13 | ts_ax.set_title("Time series data") 14 | ts_ax.set_xlabel("Date") 15 | ts_ax.set_ylabel("Value") 16 | 17 | adf_results = sm.tsa.adfuller(sample_ts) 18 | adf_pvalue = adf_results[1] 19 | print("Augmented Dickey-Fuller test:\nP-value:", adf_pvalue) 20 | 21 | ap_fig, (acf_ax, pacf_ax) = plt.subplots(2, 1, tight_layout=True) 22 | sm.graphics.tsa.plot_acf(sample_ts, ax=acf_ax, title="Observed autocorrelation") 23 | sm.graphics.tsa.plot_pacf(sample_ts, ax=pacf_ax, title="Observed partial autocorrelation") 24 | acf_ax.set_xlabel("Lags") 25 | pacf_ax.set_xlabel("Lags") 26 | pacf_ax.set_ylabel("Value") 27 | acf_ax.set_ylabel("Value") 28 | 29 | arma_model = sm.tsa.ARIMA(sample_ts, order=(1, 0, 1)) 30 | 31 | arma_results = arma_model.fit() 32 | print(arma_results.summary()) 33 | 34 | residuals = arma_results.resid 35 | rap_fig, (racf_ax, rpacf_ax) = plt.subplots(2, 1, tight_layout=True) 36 | sm.graphics.tsa.plot_acf(residuals, ax=racf_ax, title="Residual autocorrelation") 37 | sm.graphics.tsa.plot_pacf(residuals, ax=rpacf_ax, title="Residual partial autocorrelation") 38 | racf_ax.set_xlabel("Lags") 39 | rpacf_ax.set_xlabel("Lags") 40 | rpacf_ax.set_ylabel("Value") 41 | racf_ax.set_ylabel("Value") 42 | 43 | 44 | fitted = arma_results.fittedvalues 45 | fitted.plot(ax=ts_ax, label="Fitted") 46 | ts_ax.legend() 47 | 48 | 49 | plt.show() 50 | 51 | 52 | -------------------------------------------------------------------------------- /Chapter 07/tsdata.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import numpy as np 3 | import pandas as pd 4 | import itertools 5 | 6 | from numpy.random import default_rng 7 | 8 | 9 | def _get_n(iterable, n): 10 | return list(itertools.islice(iterable, n)) 11 | 12 | def generate_ma(*coeffs, std=1.0, seed=12345): 13 | rng = default_rng(seed=seed) 14 | n = len(coeffs) 15 | past_terms = deque(maxlen=n) 16 | past_terms.extend([0.0]*n) 17 | 18 | coeffs = tuple(reversed(coeffs)) 19 | 20 | while True: 21 | err = rng.normal(0, std) 22 | yield err + sum(c*e for c, e in zip(coeffs, past_terms)) 23 | past_terms.append(err) 24 | 25 | def generate_ar(*coeffs, const=0.0, start=0.0): 26 | n = len(coeffs) 27 | past_terms = deque(maxlen=n) 28 | past_terms.extend([0.0]*(n-1)) 29 | past_terms.append(start) 30 | 31 | coeffs = tuple(reversed(coeffs)) 32 | 33 | while True: 34 | curr = const + sum(c*t for c, t in zip(coeffs, past_terms)) 35 | yield curr 36 | past_terms.append(curr) 37 | 38 | 39 | def generate_arma(ar_coeffs=(0.9,), const=0.0, start=0.0, 40 | ma_coeffs=(), noise_std=1.0, seed=None): 41 | n = len(ar_coeffs) 42 | past_terms = deque(maxlen=n) 43 | past_terms.extend([0.0]*(n-1)) 44 | past_terms.append(start) 45 | 46 | coeffs = tuple(reversed(ar_coeffs)) 47 | 48 | yield start 49 | 50 | ma_proc = generate_ma(*ma_coeffs, std=noise_std, seed=seed) 51 | 52 | for err in ma_proc: 53 | curr = const + err + sum(c*t for c, t in zip(coeffs, past_terms)) 54 | yield curr 55 | past_terms.append(curr) 56 | 57 | 58 | def undifference(iterable): 59 | tot = next(iterable) # first term 60 | for cur in iterable: 61 | yield tot 62 | tot += cur 63 | 64 | def add_season_ar(iterable, period=7, coeffs=(0.7,)): 65 | n = len(coeffs) 66 | coeffs = tuple(reversed(coeffs)) 67 | N = n + period - 1 68 | past_vals = deque(maxlen=N) 69 | past_vals.extend([0.0]*N) 70 | 71 | for item in iterable: 72 | new = item + sum(coeffs[i]*past_vals[i] for i in range(n)) 73 | yield new 74 | past_vals.append(new) 75 | 76 | 77 | def generate_sample_data(train=366, test=50, trend=0.0, undiff=False, seasonal=False): 78 | gen = generate_arma(seed=12345, const=trend, ar_coeffs=(0.8,), ma_coeffs=(-0.5,)) 79 | 80 | if seasonal: 81 | gen = add_season_ar(gen) 82 | 83 | if undiff: 84 | gen = undifference(gen) 85 | 86 | indices = pd.date_range("2020-01-01", periods=train+test) 87 | data = _get_n(gen, train+test) 88 | return (pd.Series(data[:-test], index=indices[:-test]), 89 | pd.Series(data[-test:], index=indices[-test:])) 90 | 91 | 92 | 93 | if __name__ == "__main__": 94 | import matplotlib.pyplot as plt 95 | 96 | gen = generate_arma(seed=12345, ar_coeffs=(0.9,), ma_coeffs=(-0.5,)) 97 | vals = _get_n(gen, 500) 98 | 99 | plt.plot(vals) 100 | plt.show() 101 | 102 | 103 | -------------------------------------------------------------------------------- /Chapter 07/using-linear-regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | import matplotlib.pyplot as plt 4 | 5 | from numpy.random import default_rng 6 | rng = default_rng(12345) 7 | 8 | x = np.linspace(0, 5, 25) 9 | rng.shuffle(x) 10 | trend = 2.0 11 | shift = 5.0 12 | y1 = trend*x + shift + rng.normal(0, 0.5, size=25) 13 | y2 = trend*x + shift + rng.normal(0, 5, size=25) 14 | 15 | fig, ax = plt.subplots() 16 | ax.scatter(x, y1, c="k", marker="x", label="Good correlation") 17 | ax.scatter(x, y2, c="k", marker="o", label="Bad correlation") 18 | ax.legend() 19 | ax.set_xlabel("X"), 20 | ax.set_ylabel("Y") 21 | ax.set_title("Scatter plot of data with best fit lines") 22 | 23 | pred_x = sm.add_constant(x) 24 | 25 | model1 = sm.OLS(y1, pred_x).fit() 26 | print(model1.summary()) 27 | 28 | model2 = sm.OLS(y2, pred_x).fit() 29 | print(model2.summary()) 30 | 31 | model_x = sm.add_constant(np.linspace(0, 5)) 32 | 33 | 34 | model_y1 = model1.predict(model_x) 35 | model_y2 = model2.predict(model_x) 36 | 37 | 38 | ax.plot(model_x[:, 1], model_y1, 'k') 39 | ax.plot(model_x[:, 1], model_y2, 'k--') 40 | 41 | 42 | plt.show() 43 | -------------------------------------------------------------------------------- /Chapter 07/using-multilinear-regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import statsmodels.api as sm 4 | import matplotlib.pyplot as plt 5 | 6 | from numpy.random import default_rng 7 | rng = default_rng(12345) 8 | 9 | 10 | p_vars = pd.DataFrame({ 11 | "const": np.ones((100,)), 12 | "X1": rng.uniform(0, 15, size=100), 13 | "X2": rng.uniform(0, 25, size=100), 14 | "X3": rng.uniform(5, 25, size=100) 15 | }) 16 | 17 | residuals = rng.normal(0.0, 12.0, size=100) 18 | Y = -10.0 + 5.0*p_vars["X1"] - 2.0*p_vars["X2"] + residuals 19 | 20 | fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, tight_layout=True) 21 | ax1.scatter(p_vars["X1"], Y, c="k") 22 | ax2.scatter(p_vars["X2"], Y, c="k") 23 | ax3.scatter(p_vars["X3"], Y, c="k") 24 | 25 | ax1.set_title("Y against X1") 26 | ax1.set_xlabel("X1") 27 | ax1.set_ylabel("Y") 28 | ax2.set_title("Y against X2") 29 | ax2.set_xlabel("X2") 30 | ax3.set_title("Y against X3") 31 | ax3.set_xlabel("X3") 32 | 33 | plt.show() 34 | 35 | model = sm.OLS(Y, p_vars).fit() 36 | print(model.summary()) 37 | 38 | second_model = sm.OLS(Y, p_vars.loc[:, "const":"X2"]).fit() 39 | print(second_model.summary()) 40 | -------------------------------------------------------------------------------- /Chapter 07/using-prophet-to-model-time-series.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from prophet import Prophet 4 | 5 | from tsdata import generate_sample_data 6 | 7 | sample_ts, test_ts = generate_sample_data(undiff=True, trend=0.2) 8 | 9 | df_for_prophet = pd.DataFrame({ 10 | "ds": sample_ts.index, # dates 11 | "y": sample_ts.values # values 12 | }) 13 | 14 | model = Prophet() 15 | model.fit(df_for_prophet) 16 | 17 | forecast_df = model.make_future_dataframe(periods=50) 18 | 19 | forecast = model.predict(forecast_df) 20 | 21 | fig, ax = plt.subplots(tight_layout=True) 22 | sample_ts.plot(ax=ax, label="Observed", title="Forecasts", c="k") 23 | forecast.plot(x="ds", y="yhat", ax=ax, c="k", label="Predicted", ls="--") 24 | ax.fill_between(forecast["ds"].values, forecast["yhat_lower"].values, 25 | forecast["yhat_upper"].values, color="k", alpha=0.4) 26 | test_ts.plot(ax=ax, c="k", label="Future", ls="-.") 27 | ax.legend() 28 | ax.set_xlabel("Date") 29 | ax.set_ylabel("Value") 30 | 31 | 32 | plt.show() 33 | -------------------------------------------------------------------------------- /Chapter 07/using-signatures-to-summarize-time-series-data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import esig 3 | import matplotlib.pyplot as plt 4 | 5 | from numpy.random import default_rng 6 | rng = default_rng(12345) 7 | 8 | upper_limit = 2*np.pi 9 | depth = 2 10 | noise_variance = 0.1 11 | 12 | def make_noisy(signal): 13 | return signal + rng.normal(0.0, noise_variance, size=signal.shape) 14 | 15 | 16 | def signal_a(count): 17 | t = rng.exponential(upper_limit/count, size=count).cumsum() 18 | return t, np.column_stack([t/(1.+t)**2, 1./(1.+t)**2]) 19 | 20 | 21 | def signal_b(count): 22 | t = rng.exponential(upper_limit/count, size=count).cumsum() 23 | return t, np.column_stack([np.cos(t), np.sin(t)]) 24 | 25 | 26 | params_a, true_signal_a = signal_a(100) 27 | params_b, true_signal_b = signal_b(100) 28 | 29 | fig, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, tight_layout=True) 30 | 31 | ax11.plot(params_a, true_signal_a[:, 0], "k") 32 | ax11.plot(params_a, true_signal_a[:, 1], "k--") 33 | ax11.legend(["x", "y"]) 34 | ax12.plot(params_b, true_signal_b[:, 0], "k") 35 | ax12.plot(params_b, true_signal_b[:, 1], "k--") 36 | ax12.legend(["x", "y"]) 37 | ax21.plot(true_signal_a[:, 0], true_signal_a[:, 1], "k") 38 | ax22.plot(true_signal_b[:, 0], true_signal_b[:, 1], "k") 39 | ax11.set_title("Components of signal a") 40 | ax11.set_xlabel("Parameter") 41 | ax11.set_ylabel("Value") 42 | ax12.set_title("Components of signal b") 43 | ax12.set_xlabel("Parameter") 44 | ax12.set_ylabel("Value") 45 | ax21.set_title("Signal a") 46 | ax21.set_xlabel("x") 47 | ax21.set_ylabel("y") 48 | ax22.set_title("Signal b") 49 | ax22.set_xlabel("x") 50 | ax22.set_ylabel("y") 51 | 52 | plt.show() 53 | 54 | signature_a = esig.stream2sig(true_signal_a, 2) 55 | signature_b = esig.stream2sig(true_signal_b, 2) 56 | print(signature_a, signature_b, sep="\n") 57 | 58 | 59 | sigs_a = np.vstack([esig.stream2sig(make_noisy(signal_a(rng.integers(50, 100))[1]), depth) for _ in range(50)]) 60 | sigs_b = np.vstack([esig.stream2sig(make_noisy(signal_b(rng.integers(50, 100))[1]), depth) for _ in range(50)]) 61 | 62 | expected_sig_a = np.mean(sigs_a, axis=0) 63 | expected_sig_b = np.mean(sigs_b, axis=0) 64 | print(expected_sig_a, expected_sig_b, sep="\n") 65 | 66 | diff = np.abs(expected_sig_a - expected_sig_b) 67 | 68 | print("Signal a", np.max(np.abs(expected_sig_a - signature_a))) 69 | print("Signal b", np.max(np.abs(expected_sig_b - signature_b))) 70 | print("Signal a vs signal b", np.max(np.abs(expected_sig_a - expected_sig_b))) 71 | 72 | -------------------------------------------------------------------------------- /Chapter 08/computing-convex-hulls.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib as mpl 3 | import matplotlib.pyplot as plt 4 | 5 | from numpy.random import default_rng 6 | rng = default_rng(12345) 7 | 8 | from shapely.geometry import MultiPoint 9 | 10 | raw_points = rng.uniform(-1.0, 1.0, size=(50, 2)) 11 | 12 | fig, ax = plt.subplots() 13 | ax.plot(raw_points[:, 0], raw_points[:, 1], "kx") 14 | ax.set_axis_off() 15 | 16 | points = MultiPoint(raw_points) 17 | 18 | convex_hull = points.convex_hull 19 | 20 | patch = mpl.patches.Polygon(convex_hull.exterior.coords, ec=(0,0,0,1), fc=(0.5,0.5,0.5,0.4), lw=1.2) 21 | 22 | ax.add_patch(patch) 23 | 24 | plt.show() 25 | -------------------------------------------------------------------------------- /Chapter 08/constructing-bezier-curves.py: -------------------------------------------------------------------------------- 1 | from math import comb as binom 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | 6 | class Bezier: 7 | 8 | def __init__(self, *points): 9 | self.points = points 10 | self.nodes = n = len(points) - 1 11 | self.degree = l = points[0].size 12 | 13 | self.coeffs = [binom(n, i)*p.reshape((l, 1)) for i, p in enumerate(points)] 14 | 15 | def __call__(self, t): 16 | n = self.nodes 17 | t = t.reshape((1, t.size)) 18 | vals = [c @ (t**i)*(1-t)**(n-i) for i, c in enumerate(self.coeffs)] 19 | return np.sum(vals, axis=0) 20 | 21 | 22 | p1 = np.array([0.0, 0.0]) 23 | p2 = np.array([0.0, 1.0]) 24 | p3 = np.array([1.0, 1.0]) 25 | p4 = np.array([1.0, 3.0]) 26 | 27 | 28 | fig, ax = plt.subplots() 29 | ax.plot([0.0, 0.0, 1.0, 1.0], [0.0, 1.0, 1.0, 3.0], "*--k") 30 | ax.set(xlabel="x", ylabel="y", title="Bezier curve with 4 nodes, degree 3") 31 | 32 | b_curve = Bezier(p1, p2, p3, p4) 33 | 34 | t = np.linspace(0, 1) 35 | v = b_curve(t) 36 | 37 | ax.plot(v[0,:], v[1, :], "k") 38 | plt.show() -------------------------------------------------------------------------------- /Chapter 08/finding-edges-in-images.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from skimage.io import imread 3 | from skimage.feature import canny 4 | 5 | image = imread("mandelbrot.png", as_gray=True) 6 | 7 | edges = canny(image, sigma=0.5) 8 | 9 | fig, ax = plt.subplots() 10 | ax.imshow(edges, cmap="gray_r") 11 | ax.set_axis_off() 12 | 13 | 14 | plt.show() -------------------------------------------------------------------------------- /Chapter 08/finding-interior-points.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.pyplot as plt 3 | 4 | from shapely.geometry import Polygon, Point 5 | 6 | polygon = Polygon( 7 | [(0, 2), (-1, 1), (-0.5, -1), (0.5, -1), (1, 1)], 8 | ) 9 | 10 | fig, ax = plt.subplots() 11 | poly_patch = mpl.patches.Polygon(polygon.exterior.coords, ec=(0,0,0,1), fc=(0.5,0.5,0.5,0.4)) 12 | ax.add_patch(poly_patch) 13 | ax.set(xlim=(-1.05, 1.05), ylim=(-1.05, 2.05)) 14 | ax.set_axis_off() 15 | 16 | p1 = Point(0.0, 0.0) 17 | p2 = Point(-1.0, -0.75) 18 | 19 | ax.plot(0.0, 0.0, "k*") 20 | ax.annotate("p1", (0.0, 0.0), (0.05, 0.0)) 21 | ax.plot(-0.8, -0.75, "k*") 22 | ax.annotate("p2", (-0.8, -0.75), (-0.8 + 0.05, -0.75)) 23 | 24 | plt.show() 25 | 26 | print("p1 inside polygon?", polygon.contains(p1)) 27 | print("p2 inside polygon?", polygon.contains(p2)) 28 | -------------------------------------------------------------------------------- /Chapter 08/mandelbrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Applying-Math-with-Python-2nd-Edition/76a99ce637d8f97390682ff72e64b8e7146280f2/Chapter 08/mandelbrot.png -------------------------------------------------------------------------------- /Chapter 08/triangulating-polygonal-regions.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | from shapely.geometry import Polygon 6 | from shapely.ops import triangulate 7 | 8 | polygon = Polygon( 9 | [(2.0, 1.0), (2.0, 1.5), (-4.0, 1.5), (-4.0, 0.5), (-3.0, -1.5), 10 | (0.0, -1.5), (1.0, -2.0), (1.0, -0.5), (0.0, -1.0), (-0.5, -1.0), 11 | (-0.5, 1.0)], 12 | holes=[np.array([[-1.5, -0.5], [-1.5, 0.5], [-2.5, 0.5], [-2.5, -0.5]])] 13 | ) 14 | 15 | fig, ax = plt.subplots() 16 | plt_poly = mpl.patches.Polygon(polygon.exterior.coords, ec=(0,0,0,1), fc=(0.5,0.5,0.5,0.4), zorder=0) 17 | ax.add_patch(plt_poly) 18 | plt_hole = mpl.patches.Polygon(polygon.interiors[0].coords, ec="k", fc="w") 19 | ax.add_patch(plt_hole) 20 | ax.set(xlim=(-4.05, 2.05), ylim=(-2.05, 1.55)) 21 | ax.set_axis_off() 22 | 23 | 24 | triangles = triangulate(polygon) 25 | 26 | filtered = filter(lambda p: polygon.contains(p), triangles) 27 | 28 | patches = map(lambda p: mpl.patches.Polygon(p.exterior.coords), filtered) 29 | col = mpl.collections.PatchCollection(patches, fc="none", ec="k") 30 | 31 | ax.add_collection(col) 32 | 33 | plt.show() 34 | -------------------------------------------------------------------------------- /Chapter 08/visualizing-two-dimensional-geometric-figures.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from matplotlib.patches import Circle 4 | from matplotlib.collections import PatchCollection 5 | 6 | data = np.loadtxt("swisscheese-grid-10411.csv") 7 | 8 | fig, ax = plt.subplots() 9 | 10 | outer = Circle((0.0, 0.0), 1.0, zorder=0, fc="k") 11 | ax.add_patch(outer) 12 | 13 | 14 | col = PatchCollection( 15 | (Circle((x, y), r) for x, y, r in data), 16 | facecolor="white", zorder=1, linewidth=0.2, 17 | ls="-", ec="k" 18 | ) 19 | ax.add_collection(col) 20 | 21 | ax.set_xlim((-1.1, 1.1)) 22 | ax.set_ylim((-1.1, 1.1)) 23 | ax.set_axis_off() 24 | 25 | 26 | plt.show() -------------------------------------------------------------------------------- /Chapter 09/analyzing-simple-two-player-games.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import nashpy as nash 3 | 4 | you = np.array([[1, 3], [1, 4]]) 5 | colleague = np.array([[3, 2], [2, 2]]) 6 | dilemma = nash.Game(you, colleague) 7 | 8 | 9 | print(dilemma[[1, 0], [1, 0]]) # [1 3] 10 | print(dilemma[[1, 0], [0, 1]]) # [3 2] 11 | print(dilemma[[0, 1], [1, 0]]) # [1 2] 12 | print(dilemma[[0, 1], [0, 1]]) # [4 2] 13 | 14 | 15 | print(dilemma[[0.1, 0.9], [0.5, 0.5]]) # [2.45 2.05] -------------------------------------------------------------------------------- /Chapter 09/computing-nash-equilibria.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import nashpy as nash 3 | 4 | rps_p1 = np.array([ 5 | [ 0, -1, 1], # rock payoff 6 | [ 1, 0, -1], # papper payoff 7 | [-1, 1, 0] # scissors payoff 8 | ]) 9 | 10 | rps_p2 = rps_p1.transpose() 11 | 12 | rock_paper_scissors = nash.Game(rps_p1, rps_p2) 13 | 14 | equilibria = rock_paper_scissors.support_enumeration() 15 | 16 | for p1, p2 in equilibria: 17 | print("Player 1", p1) 18 | print("Player 2", p2) -------------------------------------------------------------------------------- /Chapter 09/minimising-a-non-linear-system.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | from scipy import optimize 5 | 6 | 7 | def func(x): 8 | return ((x[0] - 0.5)**2 + (x[1] + 0.5)**2)*np.cos(0.5*x[0]*x[1]) 9 | 10 | x_r = np.linspace(-1, 1) 11 | y_r = np.linspace(-2, 2) 12 | 13 | x, y = np.meshgrid(x_r, y_r) 14 | 15 | z = func([x, y]) 16 | 17 | 18 | fig = plt.figure(tight_layout=True) 19 | ax = fig.add_subplot(projection="3d") 20 | ax.tick_params(axis="both", which="major", labelsize=9) 21 | ax.set(xlabel="x", ylabel="y", zlabel="z") 22 | ax.set_title("Objective function") 23 | 24 | ax.plot_surface(x, y, z, cmap="gray", vmax=8.0, alpha=0.5) 25 | 26 | x0 = np.array([-0.5, 1.0]) 27 | ax.plot([x0[0]], [x0[1]], func(x0), "k*") 28 | 29 | 30 | result = optimize.minimize(func, x0, tol=1e-6, method="Nelder-Mead") 31 | print(result) 32 | 33 | ax.plot([result.x[0]], [result.x[1]], [result.fun], "kx") 34 | 35 | plt.show() 36 | -------------------------------------------------------------------------------- /Chapter 09/minimising-simple-linear-systems.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import optimize 3 | import matplotlib.pyplot as plt 4 | from mpl_toolkits.mplot3d import Axes3D 5 | 6 | 7 | A = np.array([ 8 | [2, 1], # 2*x0 + x1 <= 6 9 | [-1, -1] # -x0 - x1 <= -4 10 | ]) 11 | b = np.array([6, -4]) 12 | 13 | 14 | x0_bounds = (-3, 14) # -3 <= x0 <= 14 15 | x1_bounds = (2, 12) # 2 <= x1 <= 12 16 | 17 | c = np.array([1, 5]) 18 | 19 | 20 | def func(x): 21 | return np.tensordot(c, x, axes=1) 22 | 23 | 24 | fig = plt.figure() 25 | ax = fig.add_subplot(projection="3d") 26 | ax.set(xlabel="x0", ylabel="x1", zlabel="func") 27 | ax.set_title("Values in feasible region") 28 | 29 | X0 = np.linspace(*x0_bounds) 30 | X1 = np.linspace(*x1_bounds) 31 | x0, x1 = np.meshgrid(X0, X1) 32 | z = func([x0, x1]) 33 | 34 | ax.plot_surface(x0, x1, z, cmap="gray", vmax=100.0, alpha=0.3) 35 | 36 | 37 | Y = (b[0] - A[0, 0]*X0) / A[0, 1] 38 | I = np.logical_and(Y >= x1_bounds[0], Y <= x1_bounds[1]) 39 | ax.plot(X0[I], Y[I], func([X0[I], Y[I]]), "k", lw=1.5, alpha=0.6) 40 | 41 | Y = (b[1] - A[1, 0]*X0) / A[1, 1] 42 | I = np.logical_and(Y >= x1_bounds[0], Y <= x1_bounds[1]) 43 | ax.plot(X0[I], Y[I], func([X0[I], Y[I]]), "k", lw=1.5, alpha=0.6) 44 | 45 | 46 | B = np.tensordot(A, np.array([x0, x1]), axes=1) 47 | II = np.logical_and(B[0, ...] <= b[0], B[1, ...] <= b[1]) 48 | ax.plot_trisurf(x0[II], x1[II], z[II], color="k", alpha=0.5) 49 | 50 | 51 | res = optimize.linprog(c, A_ub=A, b_ub=b, bounds=(x0_bounds, x1_bounds)) 52 | print(res) 53 | 54 | ax.plot([res.x[0]], [res.x[1]], [res.fun], "kx") 55 | 56 | 57 | plt.show() 58 | -------------------------------------------------------------------------------- /Chapter 09/using-gradient-descent-methods.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | 5 | def descend(func, x0, grad, bounds, tol=1e-8, max_iter=100): 6 | xn = x0 7 | previous = np.inf 8 | grad_xn = grad(x0) 9 | 10 | for i in range(max_iter): 11 | 12 | if np.linalg.norm(xn - previous) < tol: 13 | break 14 | 15 | direction = -grad_xn 16 | 17 | previous = xn 18 | xn = xn + 0.2*direction 19 | grad_xn = grad(xn) 20 | yield i, xn, func(xn), grad_xn 21 | 22 | 23 | def func(x): 24 | return ((x[0] - 0.5)**2 + (x[1] + 0.5)**2)*np.cos(0.5*x[0]*x[1]) 25 | 26 | 27 | x_r = np.linspace(-1, 1) 28 | y_r = np.linspace(-2, 2) 29 | 30 | x, y = np.meshgrid(x_r, y_r) 31 | 32 | z = func([x, y]) 33 | 34 | 35 | surf_fig = plt.figure(tight_layout=True) 36 | surf_ax = surf_fig.add_subplot(projection="3d") 37 | surf_ax.tick_params(axis="both", which="major", labelsize=9) 38 | surf_ax.set(xlabel="x", ylabel="y", zlabel="z") 39 | surf_ax.set_title("Objective function") 40 | 41 | surf_ax.plot_surface(x, y, z, cmap="gray", vmax=8.0, alpha=0.5) 42 | 43 | 44 | x0 = np.array([-0.8, 1.3]) 45 | surf_ax.plot([x0[0]], [x0[1]], func(x0), "k*") 46 | 47 | def grad(x): 48 | c1 = x[0]**2 - x[0] + x[1]**2 + x[1] + 0.5 49 | cos_t = np.cos(0.5*x[0]*x[1]) 50 | sin_t = np.sin(0.5*x[0]*x[1]) 51 | return np.array([ 52 | (2*x[0]-1)*cos_t - 0.5*x[1]*c1*sin_t, 53 | (2*x[1]+1)*cos_t - 0.5*x[0]*c1*sin_t 54 | ]) 55 | 56 | 57 | cont_fig, cont_ax = plt.subplots() 58 | cont_ax.set(xlabel="x", ylabel="y") 59 | cont_ax.set_title("Contour plot with iterates") 60 | cont_ax.contour(x, y, z, levels=25, cmap="gray", vmax=8.0, opacity=0.6) 61 | 62 | bounds = ((-1, 1), (-2, 2)) 63 | 64 | xnm1 = x0 65 | for i, xn, fxn, grad_xn in descend(func, x0, grad, bounds): 66 | cont_ax.plot([xnm1[0], xn[0]], [xnm1[1], xn[1]], "k*--") 67 | xnm1, grad_xnm1 = xn, grad_xn 68 | 69 | print(f"iterations={i}") 70 | print(f"min val at {xn}") 71 | print(f"min func value = {fxn}") 72 | 73 | 74 | plt.show() 75 | -------------------------------------------------------------------------------- /Chapter 09/using-least-squares-to-fit-a-curve-to-data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from numpy.random import default_rng 5 | rng = default_rng(12345) 6 | 7 | from scipy.optimize import curve_fit 8 | 9 | 10 | SIZE = 100 11 | x_data = rng.uniform(-3.0, 3.0, size=SIZE) 12 | noise = rng.normal(0.0, 0.8, size=SIZE) 13 | 14 | y_data = 2.0*x_data**2 - 4*x_data + noise 15 | 16 | fig, ax = plt.subplots() 17 | ax.scatter(x_data, y_data, marker="x", color="k", alpha=0.5) 18 | ax.set(xlabel="x", ylabel="y", title="Scatter plot of sample data") 19 | 20 | 21 | def func(x, a, b, c): 22 | return a*x**2 + b*x + c 23 | 24 | coeffs, _ = curve_fit(func, x_data, y_data) 25 | print(coeffs) 26 | # [ 1.99611157 -3.97522213 0.04546998] 27 | 28 | x = np.linspace(-3.0, 3.0, SIZE) 29 | y = func(x, coeffs[0], coeffs[1], coeffs[2]) 30 | ax.plot(x, y, "k") 31 | 32 | 33 | plt.show() 34 | -------------------------------------------------------------------------------- /Chapter 10/accelerating-code-with-cython/mandelbrot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Applying-Math-with-Python-2nd-Edition/76a99ce637d8f97390682ff72e64b8e7146280f2/Chapter 10/accelerating-code-with-cython/mandelbrot/__init__.py -------------------------------------------------------------------------------- /Chapter 10/accelerating-code-with-cython/mandelbrot/python_mandel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def in_mandel(cx, cy, max_iter): 4 | x = cx 5 | y = cy 6 | for i in range(max_iter): 7 | x2 = x**2 8 | y2 = y**2 9 | if (x2 + y2) >= 4: 10 | return i 11 | y = 2.0*x*y + cy 12 | x = x2 - y2 + cx 13 | return max_iter 14 | 15 | def compute_mandel(N_x, N_y, N_iter): 16 | xlim_l = -2.5 17 | xlim_u = 0.5 18 | ylim_l = -1.2 19 | ylim_u = 1.2 20 | x_vals = np.linspace(xlim_l, xlim_u, N_x, dtype=np.float64) 21 | y_vals = np.linspace(ylim_l, ylim_u, N_y, dtype=np.float64) 22 | 23 | height = np.empty((N_x, N_y), dtype=np.int64) 24 | for i in range(N_x): 25 | for j in range(N_y): 26 | height[i, j] = in_mandel(x_vals[i], y_vals[j], N_iter) 27 | return height -------------------------------------------------------------------------------- /Chapter 10/accelerating-code-with-cython/mandelbrot/setup.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from setuptools import setup, Extension 3 | from Cython.Build import cythonize 4 | 5 | hybrid = Extension( 6 | "hybrid_mandel", 7 | sources=["python_mandel.py"], 8 | include_dirs=[np.get_include()], 9 | define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")] 10 | ) 11 | 12 | cython = Extension( 13 | "cython_mandel", 14 | sources=["cython_mandel.pyx"], 15 | include_dirs=[np.get_include()], 16 | define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")] 17 | ) 18 | 19 | extensions = [hybrid, cython] 20 | 21 | setup( 22 | ext_modules = cythonize(extensions, compiler_directives={"language_level": "3"}), 23 | ) -------------------------------------------------------------------------------- /Chapter 10/accelerating-code-with-cython/run.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | from functools import wraps 3 | import matplotlib.pyplot as plt 4 | 5 | from mandelbrot.python_mandel import compute_mandel as compute_mandel_py 6 | from mandelbrot.hybrid_mandel import compute_mandel as compute_mandel_hy 7 | from mandelbrot.cython_mandel import compute_mandel as compute_mandel_cy 8 | 9 | def timer(func, name): 10 | @wraps(func) 11 | def wrapper(*args, **kwargs): 12 | t_start = time() 13 | val = func(*args, **kwargs) 14 | t_end = time() 15 | print(f"Time taken for {name}: {t_end - t_start}") 16 | return val 17 | return wrapper 18 | 19 | mandel_py = timer(compute_mandel_py, "Python") 20 | mandel_hy = timer(compute_mandel_hy, "Hybrid") 21 | mandel_cy = timer(compute_mandel_cy, "Cython") 22 | 23 | Nx = 320 24 | Ny = 240 25 | steps = 255 26 | 27 | mandel_py(Nx, Ny, steps) 28 | mandel_hy(Nx, Ny, steps) 29 | vals = mandel_cy(Nx, Ny, steps) 30 | 31 | fig, ax = plt.subplots() 32 | ax.imshow(vals.T, extent=(-2.5, 0.5, -1.2, 1.2), cmap="Greys") 33 | 34 | plt.show() 35 | -------------------------------------------------------------------------------- /Chapter 10/accouting-for-uncertainty-in-calculations.py: -------------------------------------------------------------------------------- 1 | from uncertainties import ufloat, umath 2 | 3 | seconds = ufloat(3.0, 0.4) 4 | print(seconds) # 3.0+/-0.4 5 | 6 | 7 | depth = 0.5*9.81*seconds*seconds 8 | print(depth) # 44+/-12 9 | 10 | other_depth = ufloat(44, 12) 11 | time = umath.sqrt(2.0*other_depth/9.81) 12 | print("Estimated time", time) 13 | # Estimated time 3.0+/-0.4 14 | 15 | -------------------------------------------------------------------------------- /Chapter 10/distributing-computations-with-dask.py: -------------------------------------------------------------------------------- 1 | import dask.dataframe as dd 2 | 3 | 4 | data = dd.read_csv("sample.csv", dtype={"number": "object"}) 5 | print(data.head()) 6 | 7 | 8 | sum_data = data.lower + data.upper 9 | print(sum_data) 10 | 11 | result = sum_data.compute() 12 | print(result.head()) 13 | 14 | 15 | means = data[["lower", "upper"]].mean().compute() 16 | print(means) 17 | -------------------------------------------------------------------------------- /Chapter 10/keeping-track-of-units-with-pint.py: -------------------------------------------------------------------------------- 1 | import pint 2 | 3 | ureg = pint.UnitRegistry(system="mks") 4 | 5 | 6 | distance = 5280 * ureg.feet 7 | print(distance.to("miles")) 8 | print(distance.to_base_units()) 9 | print(distance.to_base_units().to_compact()) 10 | 11 | @ureg.wraps(ureg.meter, ureg.second) 12 | def calc_depth(dropping_time): 13 | # s = u*t + 0.5*a*t*t 14 | # u = 0, a = 9.81 15 | return 0.5*9.81*dropping_time*dropping_time 16 | 17 | 18 | depth = calc_depth(0.05 * ureg.minute) 19 | print("Depth", depth) 20 | # Depth 44.144999999999996 meter -------------------------------------------------------------------------------- /Chapter 10/loading-and-storing-data-from-netcdf.py: -------------------------------------------------------------------------------- 1 | import xarray as xr 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | from numpy.random import default_rng 7 | rng = default_rng(12345) 8 | 9 | dates = pd.date_range("2020-01-01", periods=365, name="date") 10 | locations = list(range(25)) 11 | steps = rng.normal(0, 1, size=(365,25)) 12 | accumulated = np.add.accumulate(steps) 13 | 14 | data_array = xr.Dataset({ 15 | "steps": (("date", "location"), steps), 16 | "accumulated": (("date", "location"), accumulated) 17 | }, 18 | {"location": locations, "date": dates} 19 | ) 20 | 21 | print(data_array) 22 | # 23 | # Dimensions: (date: 365, location: 25) 24 | # Coordinates: 25 | # * location (location) int64 0 1 2 3 4 5 6 7 8 ... 17 18 19 20 21 22 23 24 26 | # * date (date) datetime64[ns] 2020-01-01 2020-01-02 ... 2020-12-30 27 | # Data variables: 28 | # steps (date, location) float64 -1.424 1.264 ... -0.4547 -0.4873 29 | # accumulated (date, location) float64 -1.424 1.264 -0.8707 ... 8.935 -3.525 30 | 31 | means = data_array.mean(dim="location") 32 | 33 | fig, ax = plt.subplots(tight_layout=True) 34 | means["accumulated"].to_dataframe().plot(ax=ax, color="k") 35 | ax.set(title="Mean accumulated values", xlabel="date", ylabel="value") 36 | 37 | plt.show() 38 | 39 | data_array.to_netcdf("data.nc") 40 | 41 | new_data = xr.load_dataset("data.nc") 42 | print(new_data) 43 | # 44 | # Dimensions: (date: 365, location: 25) 45 | # Coordinates: 46 | # * location (location) int64 0 1 2 3 4 5 6 7 8 ... 17 18 19 20 21 22 23 24 47 | # * date (date) datetime64[ns] 2020-01-01 2020-01-02 ... 2020-12-30 48 | # Data variables: 49 | # steps (date, location) float64 -1.424 1.264 ... -0.4547 -0.4873 50 | # accumulated (date, location) float64 -1.424 1.264 -0.8707 ... 8.935 -3.525 51 | -------------------------------------------------------------------------------- /Chapter 10/sample.csv: -------------------------------------------------------------------------------- 1 | id,number,lower,upper 2 | row0,0,-0.5453279550656607,-0.36648332058049427 3 | row1,1,0.5947309146654682,0.3525093415019491 4 | row2,2,-0.217780898796182,-0.33437214426723094 5 | row3,3,0.19661750717437965,-0.6265316287925733 6 | row4,4,0.3455120880292426,0.8836057305398743 7 | row5,5,-0.503508570740858,0.8977623036666365 8 | row6,6,0.3344749062007448,-0.8082041288117758 9 | row7,7,-0.11632066766437443,0.7729598386550354 10 | row8,8,0.3949069997640442,-0.3470542718597758 11 | row9,9,0.467856326660133,-0.5597300889090275 12 | row10,10,-0.8368108609155838,-0.680208797849905 13 | row11,11,-1.3197996300905894,-0.06961369259589811 14 | row12,12,-0.46715794341845807,0.6315528068496139 15 | row13,13,-0.613411221421011,-0.7410618476455995 16 | row14,14,-0.8166704969101282,0.19713602732982638 17 | row15,15,0.7094838087480027,0.20324248338742623 18 | row16,16,0.863976722271967,0.44956272218404014 19 | row17,17,0.7211026347865848,0.8586756031506326 20 | row18,None,0.09237201816470608,0.8753459175355138 21 | row19,19,-0.010024119842351409,-0.452453635020025 22 | row20,20,-0.09644258505047865,0.3300778467990606 23 | row21,21,-0.3382181390658907,0.8069080136164781 24 | row22,22,-0.4858516494469314,-0.32034332477936034 25 | row23,23,-0.48229320271414533,-0.28910704011142796 26 | row24,24,-0.9899553325657364,0.2572090881993574 27 | row25,25,-0.43523458514976343,-0.8638246210241085 28 | row26,26,0.23365795451276106,-0.6473473594375931 29 | row27,27,-0.3912232255608208,-0.11822637824776394 30 | row28,28,-0.6995953178745984,-0.56414227382913 31 | row29,29,-0.051333769332911006,-0.04726228983761627 32 | row30,30,-0.48953529236099946,-0.40486946370390386 33 | row31,31,-0.4418657603724667,-0.4788415750174049 34 | row32,32,-0.034476814401368516,-1.5760419272969788 35 | row33,33,-0.008738806653918685,-0.5074773483385249 36 | row34,34,0.6769653049338895,-0.6397388198099299 37 | row35,35,0.7243125830184729,-0.6434011103096251 38 | row36,36,0.5010626638744882,0.2222408076611304 39 | row37,37,-0.5816899301427854,0.5197448422479904 40 | row38,38,-0.501478860930175,-0.828856536026884 41 | row39,39,0.2361134446361819,0.07393666206467109 42 | row40,40,0.2690534224305514,-0.6512517826172235 43 | row41,41,-0.5036710202870951,0.36964596927879834 44 | row42,42,-0.838256707498185,0.7501472015122523 45 | row43,43,-0.14261123692001632,0.23678839079475567 46 | row44,44,-0.3737889916297603,-0.6420742894142648 47 | row45,45,-0.9805757444090948,-0.579914083103094 48 | row46,46,0.7400013575433042,0.9456596047951173 49 | row47,47,-0.11641531361779522,-0.2425010103814933 50 | row48,48,-0.4481058374636997,0.9322082184688836 51 | row49,49,-0.883594789468312,-0.1825322022762903 52 | row50,50,-0.6627423109430279,-0.5197118831966243 53 | row51,51,0.5600157126684238,-0.5924648076887753 54 | row52,52,0.10410190952324339,-0.26601171589666506 55 | row53,53,0.014563442784713665,-0.33312440660742837 56 | row54,54,-0.4345566517842432,-0.43633939738958594 57 | row55,55,-0.8292374157574245,-0.03637268843862951 58 | row56,56,0.7666857890291086,0.8944555320200425 59 | row57,57,-0.9452325561457624,0.8355044890051131 60 | row58,58,-0.7569509410291808,0.4956955195716175 61 | row59,59,0.7930414840993898,-0.6641404001256874 62 | row60,60,-0.3370735614663771,-0.2436867487683949 63 | row61,61,-0.306302084651618,0.03251140223072535 64 | row62,62,-0.9820119452573393,-0.15464359103523306 65 | row63,63,1.7553154590232336,-0.8251896942950534 66 | row64,64,-0.031830365083926226,-0.03754454531893958 67 | row65,65,0.5651429806397303,0.9291191502767817 68 | row66,66,0.4141928832710864,-0.45252655816900744 69 | row67,67,0.34022660093971635,-0.3049303945035231 70 | row68,68,0.5362556721098188,0.3515428325049723 71 | row69,69,0.9550640560545192,0.7334195791892422 72 | row70,70,-0.9077839707081079,-0.4193525738247359 73 | row71,71,0.7247821953781601,0.20169566808153072 74 | row72,72,-0.3114840830298182,-0.8887948464221092 75 | row73,73,0.5257453399641963,-0.9969624245218904 76 | row74,74,-0.814312921521728,0.9870083196112802 77 | row75,75,0.5520261544573772,0.7365417258524953 78 | row76,76,-0.04568349682654338,-0.6623027191553501 79 | row77,77,0.06795464229033121,-0.1483362463468949 80 | row78,78,-0.6739408067896355,-0.6682448575985709 81 | row79,79,-0.48369966334835657,0.8861831397315372 82 | row80,80,0.9736138836952428,0.40733282972103124 83 | row81,81,0.4463148267419048,0.4649956144843359 84 | row82,82,0.03479103702911046,-0.6451473834892649 85 | row83,83,0.7552924138551387,0.7607807114991783 86 | row84,84,0.419069105579164,0.8668576143141729 87 | row85,85,0.9599963207988473,0.006951564264227272 88 | row86,86,0.5151390139169887,0.23323704687810198 89 | row87,87,-0.7702218635789513,-0.36802303975273887 90 | row88,88,-0.8645322049402049,0.7737423562224535 91 | row89,89,-0.9558592022501617,0.07457340680881064 92 | row90,90,-0.6447757870185391,-0.7060101812706867 93 | row91,91,-0.6683110398703005,0.7539079578178016 94 | row92,92,-0.011059748841883543,-0.31819900933172107 95 | row93,93,0.9984196517634787,0.09194979150319749 96 | row94,94,-0.39333774878924466,-0.25869659832069414 97 | row95,95,0.3917337561065648,-0.7595068627225243 98 | row96,96,0.5164423763375303,0.7675352136978346 99 | row97,97,-0.04973771044592468,-0.1253905900675356 100 | row98,98,-0.7137704534622713,-0.15451548318042696 101 | row99,99,0.5288233887583287,-0.1720334171281932 102 | -------------------------------------------------------------------------------- /Chapter 10/sample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sample Jupyter notebook\n", 8 | "This is a sample notebook." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import matplotlib.pyplot as plt\n", 18 | "from numpy.random import default_rng\n", 19 | "rng = default_rng(12345)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "uniform_data = rng.uniform(-5, 5, size=(2, 100))" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "fig, ax = plt.subplots(tight_layout=True)\n", 38 | "ax.scatter(uniform_data[0, :], uniform_data[1, :], color=\"k\")\n", 39 | "ax.set(title=\"Scatter plot\", xlabel=\"x\", ylabel=\"y\")" 40 | ] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 3 (ipykernel)", 46 | "language": "python", 47 | "name": "python3" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 3 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython3", 59 | "version": "3.10.1+" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /Chapter 10/validating-data.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import cerberus 3 | 4 | float_schema = {"type": "float", "coerce": float, "min": -1.0, "max": 1.0} 5 | 6 | item_schema = { 7 | "type": "dict", 8 | "schema": { 9 | "id": {"type": "string"}, 10 | "number": {"type": "integer", "coerce": int}, 11 | "lower": float_schema, 12 | "upper": float_schema, 13 | } 14 | } 15 | 16 | schema = { 17 | "rows": { 18 | "type": "list", 19 | "schema": item_schema 20 | } 21 | } 22 | 23 | 24 | validator = cerberus.Validator(schema) 25 | 26 | 27 | with open("sample.csv") as f: 28 | dr = csv.DictReader(f) 29 | document = {"rows": list(dr)} 30 | 31 | 32 | validator.validate(document) 33 | 34 | errors = validator.errors["rows"][0] 35 | 36 | for row_n, errs in errors.items(): 37 | print(f"row {row_n}: {errs}") 38 | 39 | -------------------------------------------------------------------------------- /Chapter 10/working-with-geographical-data.py: -------------------------------------------------------------------------------- 1 | import geopandas 2 | import geoplot 3 | import matplotlib.pyplot as plt 4 | 5 | world = geopandas.read_file( 6 | geopandas.datasets.get_path("naturalearth_lowres") 7 | ) 8 | 9 | cities = geopandas.read_file( 10 | geopandas.datasets.get_path("naturalearth_cities") 11 | ) 12 | 13 | fig, ax = plt.subplots() 14 | geoplot.polyplot(world, ax=ax, alpha=0.7) 15 | 16 | 17 | geoplot.pointplot(cities, ax=ax, fc="k", marker="2") 18 | ax.axis((-180, 180, -90, 90)) 19 | 20 | 21 | plt.show() 22 | -------------------------------------------------------------------------------- /Chapter 10/writing-reproducible-code-for-data-science.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | import matplotlib.pyplot as plt 5 | 6 | from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.tree import DecisionTreeClassifier 9 | 10 | rng = np.random.default_rng(12345) 11 | 12 | 13 | def get_data(): 14 | permute = rng.permutation(200) 15 | 16 | data = np.vstack([ 17 | rng.normal((1.0, 2.0, -3.0), 1.0, size=(50, 3)), 18 | rng.normal((-1.0, 1.0, 1.0), 1.0, size=(50, 3)), 19 | rng.normal((0.0, -1.0, -1.0), 1.0, size=(50, 3)), 20 | rng.normal((-1.0, -1.0, -2.0), 1.0, size=(50, 3)) 21 | ]) 22 | labels = np.hstack([ 23 | [1]*50, [2]*50, [3]*50, [4]*50 24 | ]) 25 | 26 | X = pd.DataFrame(np.take(data, permute, axis=0), columns=["A", "B", "C"]) 27 | y = pd.Series(np.take(labels, permute, axis=0)) 28 | return X, y 29 | 30 | 31 | data, labels = get_data() 32 | 33 | data.to_csv("data.csv") 34 | labels.to_csv("labels.csv") 35 | 36 | X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=23456) 37 | 38 | print(X_train.index.size, X_test.index.size) 39 | 40 | X_train.index.to_series().to_csv("train_index.csv", index=False, header=False) 41 | X_test.index.to_series().to_csv("test_index.csv", index=False, header=False) 42 | 43 | classifier = DecisionTreeClassifier(random_state=34567) 44 | classifier.fit(X_train, y_train) 45 | 46 | feature_importance = pd.DataFrame(classifier.feature_importances_, index=classifier.feature_names_in_, columns=["Importance"]) 47 | feature_importance.to_csv("feature_importance.csv") 48 | 49 | train_predictions = classifier.predict(X_train) 50 | test_predictions = classifier.predict(X_test) 51 | 52 | pd.Series(train_predictions, index=X_train.index, name="Predicted labels").to_csv("train_predictions.csv") 53 | pd.Series(test_predictions, index=X_test.index, name="Predicted labels").to_csv("test_predictions.csv") 54 | 55 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True) 56 | ax1.set_title("Confusion matrix for training data") 57 | ax2.set_title("Confusion matrix for test data") 58 | ConfusionMatrixDisplay.from_predictions(y_train, train_predictions, ax=ax1, cmap="Greys", colorbar=False) 59 | ConfusionMatrixDisplay.from_predictions(y_test, test_predictions, ax=ax2, cmap="Greys", colorbar=False) 60 | 61 | print(f"Train accuracy {accuracy_score(y_train, train_predictions)}", 62 | f"Test accuracy {accuracy_score(y_test, test_predictions)}", sep="\n") 63 | 64 | plt.show() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ### [Packt Conference : Put Generative AI to work on Oct 11-13 (Virtual)](https://packt.link/JGIEY) 3 | 4 |

[![Packt Conference](https://hub.packtpub.com/wp-content/uploads/2023/08/put-generative-ai-to-work-packt.png)](https://packt.link/JGIEY)

5 | 3 Days, 20+ AI Experts, 25+ Workshops and Power Talks 6 | 7 | Code: USD75OFF 8 | 9 | 10 | 11 | 12 | # Applying Math with Python - Second Edition 13 | Applying Math with Python - Second Edition 14 | 15 | This is the code repository for [Applying Math with Python - Second Edition](https://www.packtpub.com/product/applying-math-with-python-second-edition/9781804618370), published by Packt. 16 | 17 | **Over 70 practical recipes for solving real-world computational math problems** 18 | 19 | ## What is this book about? 20 | The updated edition of Applying Math with Python will help you solve complex problems in a wide variety of mathematical fields in simple and efficient ways. Old recipes have been revised for new libraries and several recipes have been added to demonstrate new tools such as JAX. 21 | You'll start by refreshing your knowledge of several core mathematical fields and learn about packages covered in Python's scientific stack, including NumPy, SciPy, and Matplotlib. As you progress, you'll gradually get to grips with more advanced topics of calculus, probability, and networks (graph theory). Once you’ve developed a solid base in these topics, you’ll have the confidence to set out on math adventures with Python as you explore Python's applications in data science and statistics, forecasting, geometry, and optimization. The final chapters will take you through a collection of miscellaneous problems, including working with specific data formats and accelerating code. 22 | By the end of this book, you'll have an arsenal of practical coding solutions that can be used and modified to solve a wide range of practical problems in computational mathematics and data science. 23 | 24 | This book covers the following exciting features: 25 | * Become familiar with basic Python packages, tools, and libraries for solving mathematical problems 26 | * Explore real-world applications of mathematics to reduce a problem in optimization 27 | * Understand the core concepts of applied mathematics and their application in computer science 28 | * Find out how to choose the most suitable package, tool, or technique to solve a problem 29 | * Implement basic mathematical plotting, change plot styles, and add labels to plots using Matplotlib 30 | * Get to grips with probability theory with the Bayesian inference and Markov Chain Monte Carlo (MCMC) methods 31 | 32 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1804618373) today! 33 | 34 | https://www.packtpub.com/ 35 | 36 | ## Instructions and Navigations 37 | All of the code is organized into folders. 38 | 39 | The code will look like the following: 40 | ``` 41 | from decimal import getcontext 42 | ctx = getcontext() 43 | num = Decimal('1.1') 44 | num**4 # Decimal('1.4641') 45 | ctx.prec=4 # set the new precision 46 | num**4 # Decimal('1.464') 47 | ``` 48 | 49 | **Following is what you need for this book:** 50 | Whether you are a professional programmer or a student looking to solve mathematical problems computationally using Python, this is the book for you. Advanced mathematics proficiency is not a prerequisite, but basic knowledge of mathematics will help you to get the most out of this Python math book. Familiarity with the concepts of data structures in Python is assumed. 51 | 52 | With the following software and hardware list you can run all code files present in the book (Chapter 1-10). 53 | 54 | ### Software and Hardware List 55 | 56 | | Chapter | Software required | OS required | 57 | | -------- | -------------------------------------------------------------------------------------| -----------------------------------| 58 | | 1-10 | Python 3.6 or higher | Windows, Mac OS X, and Linux (Any) | 59 | 60 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](http://packt.link/OxkXD). 61 | 62 | 63 | ### Related products 64 | * Hands-On Mathematics for Deep Learning [[Packt]](https://www.packtpub.com/product/hands-on-mathematics-for-deep-learning/9781838647292) [[Amazon]](https://www.amazon.com/dp/1838647295) 65 | 66 | * Essential Mathematics for Quantum Computing [[Packt]](https://www.packtpub.com/product/essential-mathematics-for-quantum-computing/9781801073141?_ga=2.176321189.1855619319.1669697710-1347501151.1654864057) [[Amazon]](https://www.amazon.com/dp/1801073147) 67 | 68 | ## Get to Know the Author 69 | **Sam Morley** is an experienced lecturer in mathematics and a researcher in pure mathematics. He is currently a research software engineer at the University of Oxford working on the DataSig project. He was previously a lecturer in mathematics at the University of East Anglia and Nottingham Trent University. His research interests lie in functional analysis, especially Banach algebras. Sam has a firm commitment to providing high-quality, inclusive, and enjoyable teaching, with the aim of inspiring his students and spreading his enthusiasm for mathematics. 70 | 71 | ### Download a free PDF 72 | 73 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
74 |

https://packt.link/free-ebook/9781804618370

75 | --------------------------------------------------------------------------------