├── Chapter 01
    ├── array-arithmetic-and-functions.py
    ├── basic-mathematical-functions.py
    ├── complex-type.py
    ├── decimal-type.py
    ├── eigenvalus-and-eigenvectors.py
    ├── fraction-type.py
    ├── matrices-and-n-dimensional-arrays.py
    ├── matrix-properties-of-numpy-arrays.py
    ├── numpy-arrays.py
    ├── solving-equations.py
    ├── sparse-matrices.py
    ├── systems-of-equations.py
    └── useful-array-creation-routines.py
├── Chapter 02
    ├── adding-labels-and-legends-to-plots.py
    ├── adding-subplots.py
    ├── basic-plotting-with-matplotlib.py
    ├── changing-the-plotting-style.py
    ├── customising-3d-plots.py
    ├── plotting-with-error-bars.py
    ├── saving-matplotlib-figures.py
    ├── surface-and-contour-plots.py
    ├── trisurf.py
    └── visualizing-vector-fields-with-quivers.py
├── Chapter 03
    ├── automatic-differentiation-and-calculus-using-jax.py
    ├── discrete-fourier-transforms-for-signals-processing.py
    ├── numerical-integration.py
    ├── partial-differential-equations.py
    ├── polynomials-and-calculus.py
    ├── solving-differential-equations-with-jax.py
    ├── solving-equations.py
    ├── solving-simple-differential-equations-numerically.py
    ├── solving-systems-of-differential-equations.py
    └── symbolic-calculus-using-sympy.py
├── Chapter 04
    ├── analysing-conversion-rates-with-bayesian-techniques.py
    ├── changing-the-random-number-generator.py
    ├── estimating-parameters-with-monte-carlo-simulations.py
    ├── generating-normally-distributed-random-numbers.py
    ├── generating-random-data.py
    ├── selecting-items-at-random.py
    └── working-with-random-processes.py
├── Chapter 05
    ├── coloring-a-network.py
    ├── creating-directed-and-weighted-networks.py
    ├── creating-networks-in-python.py
    ├── finding-minimum-spanning-trees-and-dominating-sets.py
    ├── finding-shortest-paths.py
    ├── generating-the-adjacency-matrix-for-a-network.py
    ├── getting-the-basic-characteristics-of-networks.py
    ├── quantifying-clustering-in-a-network.py
    └── visualising-networks.py
├── Chapter 06
    ├── creating-interactive-plots-with-Bokeh.py
    ├── creating-series-and-dataframes.py
    ├── getting-descriptive-statistics-from-dataframes.py
    ├── loading-and-storing-data-from-a-dataframe.py
    ├── manipulating-data-frames.py
    ├── performing-operations-on-grouped-data-in-a-dataframe.py
    ├── plotting-data-from-a-DataFrame.py
    ├── testing-hypotheses-for-non-parametric-data.py
    ├── testing-hypotheses-using-ANOVA.py
    ├── testing-hypotheses-using-t-tests.py
    └── understanding-a-population-using-sampling.py
├── Chapter 07
    ├── classifying-using-logarithmic-regression.py
    ├── forecasting-from-time-series-data-using-arima.py
    ├── forecasting-seasonal-data-with-arima.py
    ├── modelling-time-series-data-with-arma.py
    ├── tsdata.py
    ├── using-linear-regression.py
    ├── using-multilinear-regression.py
    ├── using-prophet-to-model-time-series.py
    └── using-signatures-to-summarize-time-series-data.py
├── Chapter 08
    ├── computing-convex-hulls.py
    ├── constructing-bezier-curves.py
    ├── finding-edges-in-images.py
    ├── finding-interior-points.py
    ├── mandelbrot.png
    ├── swisscheese-grid-10411.csv
    ├── triangulating-polygonal-regions.py
    └── visualizing-two-dimensional-geometric-figures.py
├── Chapter 09
    ├── analyzing-simple-two-player-games.py
    ├── computing-nash-equilibria.py
    ├── minimising-a-non-linear-system.py
    ├── minimising-simple-linear-systems.py
    ├── using-gradient-descent-methods.py
    └── using-least-squares-to-fit-a-curve-to-data.py
├── Chapter 10
    ├── accelerating-code-with-cython
    │   ├── mandelbrot
    │   │   ├── __init__.py
    │   │   ├── python_mandel.py
    │   │   └── setup.py
    │   └── run.py
    ├── accouting-for-uncertainty-in-calculations.py
    ├── distributing-computations-with-dask.py
    ├── keeping-track-of-units-with-pint.py
    ├── loading-and-storing-data-from-netcdf.py
    ├── sample.csv
    ├── sample.ipynb
    ├── validating-data.py
    ├── working-with-geographical-data.py
    └── writing-reproducible-code-for-data-science.py
├── LICENSE
└── README.md


/Chapter 01/array-arithmetic-and-functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | arr_a = np.array([1, 2, 3, 4])
 5 | arr_b = np.array([1, 0, -3, 1])
 6 | arr_a + arr_b # array([2, 2, 0, 5])
 7 | arr_a - arr_b # array([0, 2, 6, 3])
 8 | arr_a * arr_b # array([ 1, 0, -9, 4])
 9 | arr_b / arr_a # array([ 1. , 0. , -1. , 0.25])
10 | arr_b**arr_a # array([1, 0, -27, 1])
11 | 
12 | 
13 | arr = np.array([1, 2, 3, 4])
14 | new = 2*arr
15 | print(new)
16 | # [2, 4, 6, 8]


--------------------------------------------------------------------------------
/Chapter 01/basic-mathematical-functions.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | math.sqrt(4)  # 2.0
 4 | 
 5 | theta = math.pi / 4
 6 | math.cos(theta)  # 0.7071067811865476
 7 | math.sin(theta)  # 0.7071067811865475
 8 | math.tan(theta)  # 0.9999999999999999
 9 | 
10 | math.asin(-1)  # -1.5707963267948966
11 | math.acos(-1)  # 3.141592653589793
12 | math.atan(1)  # 0.7853981633974483
13 | 
14 | math.log(10)  # 2.302585092994046
15 | math.log(10, 10)  # 1.0
16 | 
17 | math.gamma(5)  # 24.0
18 | math.erf(2)  # 0.9953222650189527
19 | 
20 | 
21 | math.comb(5, 2) # 10
22 | math.factorial(5) # 120
23 | 
24 | 
25 | math.gcd(2, 4) # 2
26 | math.gcd(2, 3) # 1
27 | 
28 | 
29 | nums = [0.1]*10 # list containing 0.1 ten times
30 | sum(nums) # 0.9999999999999999
31 | math.fsum(nums) # 1.0


--------------------------------------------------------------------------------
/Chapter 01/complex-type.py:
--------------------------------------------------------------------------------
1 | z = 1 + 1j
2 | z + 2 # 3 + 1j
3 | z.conjugate() # 1 - 1j


--------------------------------------------------------------------------------
/Chapter 01/decimal-type.py:
--------------------------------------------------------------------------------
 1 | from decimal import Decimal
 2 | 
 3 | num1 = Decimal('1.1')
 4 | num2 = Decimal('1.563')
 5 | num1 + num2  # Decimal('2.663')
 6 | 
 7 | from decimal import getcontext
 8 | ctx = getcontext()
 9 | num = Decimal('1.1')
10 | num**4 # Decimal('1.4641')
11 | ctx.prec = 4 # set new precision
12 | num**4 # Decimal('1.464')
13 | 
14 | 
15 | from decimal import localcontext
16 | num = Decimal("1.1")
17 | with localcontext() as ctx:
18 |     ctx.prec = 2
19 |     num**4 # Decimal('1.5')
20 | num**4 # Decimal('1.4641')
21 | 
22 | 


--------------------------------------------------------------------------------
/Chapter 01/eigenvalus-and-eigenvectors.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from numpy import linalg
 4 | 
 5 | A = np.array([[3, -1, 4], [-1, 0, -1], [4, -1, 2]])
 6 | 
 7 | v, B = linalg.eig(A)
 8 | 
 9 | i = 0  # first eigenvalue/eigenvector pair
10 | lambda0 = v[i]
11 | print(lambda0)
12 | # 6.823156164525971
13 | x0 = B[:, i]  # ith column of B
14 | print(x0)
15 | # array([ 0.73271846, -0.20260301, 0.649672352])
16 | 
17 | linalg.norm(x0)  # 1.0  - eigenvalues are normalised.
18 | 
19 | 
20 | lhs = A @ x0
21 | rhs = lambda0*x0
22 | linalg.norm(lhs - rhs)  # 2.8445583831733384e-15 - very small.
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter 01/fraction-type.py:
--------------------------------------------------------------------------------
1 | from fractions import Fraction
2 | num1 = Fraction(1, 3)
3 | num2 = Fraction(1, 7)
4 | num1 * num2  # Fraction(1, 21)
5 | 


--------------------------------------------------------------------------------
/Chapter 01/matrices-and-n-dimensional-arrays.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | mat = np.array([[1, 2], [3, 4]])
 4 | vec = np.array([1, 2])
 5 | 
 6 | mat.shape  # (2, 2)
 7 | vec.shape  # (2,)
 8 | 
 9 | 
10 | mat.reshape(4,)
11 | # array([1, 2, 3, 4])
12 | 
13 | 
14 | mat1 = [[1, 2], [3, 4]]
15 | mat2 = [[5, 6], [7, 8]]
16 | mat3 = [[9, 10], [11, 12]]
17 | 
18 | arr_3d = np.array([mat1, mat2, mat3])
19 | arr_3d.shape  # (3, 2, 2)
20 | 
21 | mat[0, 0] # 1 - top left element
22 | mat[1, 1] # 4 - bottom right element
23 | 
24 | mat[:, 0] # array([1, 3])
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/Chapter 01/matrix-properties-of-numpy-arrays.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | np.eye(3)
 4 | # array([[1., 0., 0.],
 5 | #        [0., 1., 0.],
 6 | #        [0., 0., 1.]])
 7 | 
 8 | 
 9 | mat = np.array([[1, 2], [3, 4]])
10 | mat.transpose()
11 | # array([[1, 3],
12 | #        [2, 4]])
13 | mat.T
14 | # array([[1, 3],
15 | #        [2, 4]])
16 | 
17 | 
18 | A = np.array([[1, 2], [3, 4]])
19 | A.trace()  # 5
20 | 
21 | A = np.array([[1, 2], [3, 4]])
22 | B = np.array([[-1, 1], [0, 1]])
23 | A @ B
24 | # array([[-1, 3],
25 | #        [-3, 7]])
26 | A * B
27 | # array([[-1, 2],
28 | #        [ 0, 4]])
29 | 
30 | 
31 | A = np.array([[1, 2], [3, 4]])
32 | I = np.eye(2)
33 | A @ I
34 | # array([[1, 2],
35 | #        [3, 4]])
36 | 
37 | 
38 | from numpy import linalg
39 | linalg.det(A)  # -2.0000000000000004
40 | linalg.inv(A)
41 | # array([[-2. , 1. ],
42 | #        [ 1.5, -0.5]])
43 | 
44 | 
45 | Ainv = linalg.inv(A)
46 | Ainv @ A
47 | # Approximately
48 | # array([[1., 0.],
49 | #       [0., 1.]])
50 | 
51 | A @ Ainv
52 | # Approximately
53 | # array([[1., 0.],
54 | #        [0., 1.]])
55 | 
56 | 


--------------------------------------------------------------------------------
/Chapter 01/numpy-arrays.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ary = np.array([1, 2, 3, 4])  # array([1, 2, 3, 4])
 4 | 
 5 | ary[0]  # 1
 6 | ary[2]  # 3
 7 | ary[::2]  # array([1, 3])
 8 | 
 9 | 
10 | np.array([1, 2, 3, 4], dtype=np.float32)
11 | # array([1., 2., 3., 4.], dtype=float32)
12 | 
13 | 
14 | arr = np.array([1, 2, 3, 4])
15 | print(arr.dtype) # dtype('int64')
16 | arr.dtype = np.float32
17 | print(arr)
18 | # [1.e-45 0.e+00 3.e-45 0.e+00 4.e-45 0.e+00 6.e-45 0.e+00]
19 | 
20 | 
21 | arr = arr.astype(np.float32)
22 | print(arr)
23 | # [1. 2. 3. 4.]


--------------------------------------------------------------------------------
/Chapter 01/solving-equations.py:
--------------------------------------------------------------------------------
 1 | from scipy import optimize
 2 | from math import exp
 3 | 
 4 | def f(x):
 5 |     return x*(x - 2)*exp(3 - x)
 6 | 
 7 | def fp(x):
 8 |     return -(x**2 - 4*x + 2)*exp(3 - x)
 9 | 
10 | 
11 | optimize.newton(f, 1., x1=1.5) # Using x1 = 1.5 and the secant method
12 | # 1.9999999999999862
13 | optimize.newton(f, 1., fprime=fp) # Using Newton-Raphson method
14 | # 2.0
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter 01/sparse-matrices.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import sparse
 3 | 
 4 | T = sparse.diags([-1, 2, -1], (-1, 0, 1), shape=(5, 5), format="csr")
 5 | T.toarray()
 6 | # array([[ 2, -1,  0,  0,  0],
 7 | #        [-1,  2, -1,  0,  0],
 8 | #        [ 0, -1,  2, -1,  0],
 9 | #        [ 0,  0, -1,  2, -1],
10 | #        [ 0,  0,  0, -1,  2]])
11 | 
12 | from scipy.sparse import linalg
13 | linalg.spsolve(T.tocsr(), np.array([1, 2, 3, 4, 5]))
14 | # array([ 5.83333333, 10.66666667, 13.5, 13.33333333, 9.16666667])
15 | 


--------------------------------------------------------------------------------
/Chapter 01/systems-of-equations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy import linalg
 3 | 
 4 | A = np.array([[3, -2, 1], [1, 1, -2], [-3, -2, 1]])
 5 | b = np.array([7, -4, 1])
 6 | 
 7 | linalg.solve(A, b)  # array([ 1., -1., 2.])
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/Chapter 01/useful-array-creation-routines.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | np.linspace(0, 1, 5) # array([0., 0.25, 0.5, 0.75, 1.0])
5 | np.arange(0, 1, 0.3) # array([0.0, 0.3, 0.6, 0.9])
6 | 
7 | 


--------------------------------------------------------------------------------
/Chapter 02/adding-labels-and-legends-to-plots.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | y1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
 5 | y2 = np.array([1.2, 1.6, 3.1, 4.2, 4.8])
 6 | y3 = np.array([3.2, 1.1, 2.0, 3.9, 2.5])
 7 | 
 8 | fig, ax = plt.subplots()
 9 | 
10 | lines = ax.plot(y1, 'o', y2, 'x', y3, '*')
11 | 
12 | ax.set_title("Plot of the data y1, y2, and y3")
13 | ax.set_xlabel("x axis label")
14 | ax.set_ylabel("y axis label")
15 | 
16 | 
17 | 
18 | ax.legend(("data y1", "data y2", "data y3"))
19 | 
20 | 
21 | plt.show()
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/Chapter 02/adding-subplots.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from math import fabs
 4 | 
 5 | def generate_newton_iters(x0, number):
 6 |     yield x0, fabs(x0 - 1.)
 7 |     for _ in range(number):
 8 |         x0 = x0 - (x0*x0 - 1.)/(2*x0)
 9 |         yield x0, fabs(x0 - 1.)
10 | 
11 | 
12 | data = np.array(list(generate_newton_iters(2.0, 5)))
13 | iterates, errors = data[:, 0], data[:, 1]
14 | 
15 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True) # 1 row, 2 columns
16 | 
17 | ax1.plot(iterates, "kx")
18 | ax1.set_title("Iterates")
19 | ax1.set_xlabel("$i$", usetex=True)
20 | ax1.set_ylabel("$x_i$", usetex=True)
21 | 
22 | ax2.semilogy(errors, "kx") # plot y on a logarithmic scale
23 | ax2.set_title("Error")
24 | ax2.set_xlabel("$i$", usetex=True)
25 | ax2.set_ylabel("Error")
26 | 
27 | plt.show()
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/Chapter 02/basic-plotting-with-matplotlib.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | # set up
 5 | def f(x):
 6 |     return x*(x-2)*np.exp(3 - x)
 7 | 
 8 | def g(x):
 9 |     return x**2
10 | 
11 | def h(x):
12 |     return 1 - x
13 | 
14 | 
15 | x = np.linspace(-0.5, 3.0) # 50 values between -0.5 and 3.0
16 | 
17 | 
18 | y1 = f(x)  # evaluate f on the x points
19 | y2 = g(x)  # evaluate g on the x points
20 | y3 = h(x)  # evaluate h on the x points
21 | 
22 | 
23 | fig, ax = plt.subplots()
24 | 
25 | ax.plot(x, y1, "k")  # black solid line style
26 | 
27 | 
28 | ax.plot(x, y2, "k--")  # black dashed line style
29 | ax.plot(x, y3, "k.-")  # black dot-dashed line style
30 | 
31 | 
32 | ax.set_title("Plot of the functions f, g, and h")
33 | ax.set_xlabel("x")
34 | ax.set_ylabel("y")
35 | 
36 | 
37 | ax.legend(["f", "g", "h"])
38 | 
39 | ax.text(0.4, 2.0, "Intersection")
40 | 
41 | plt.show()


--------------------------------------------------------------------------------
/Chapter 02/changing-the-plotting-style.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | y1 = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
 6 | y2 = np.array([1.2, 1.6, 3.1, 4.2, 4.8])
 7 | y3 = np.array([3.2, 1.1, 2.0, 3.9, 2.5])
 8 | 
 9 | fig, ax = plt.subplots()
10 | 
11 | lines = ax.plot(y1, 'o', y2, 'x', y3, '*', color="k")
12 | 
13 | ax.set_xlabel("x")
14 | ax.set_ylabel("y")
15 | ax.set_title("Plot of several data points with only markers")
16 | 
17 | plt.show()
18 | 


--------------------------------------------------------------------------------
/Chapter 02/customising-3d-plots.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from mpl_toolkits import mplot3d
 4 | 
 5 | t = np.linspace(-5, 5)
 6 | x, y = np.meshgrid(t, t)
 7 | z = np.exp(-((x-2.)**2 + (y-3.)**2)/4) - np.exp(-((x+3.)**2 + (y+2)**2)/3)
 8 | 
 9 | 
10 | fig = plt.figure()
11 | 
12 | ax = fig.add_subplot(projection="3d", proj_type="ortho")
13 | 
14 | ax.plot_surface(x, y, z, cmap="gray", vmin=-1.2, vmax=1.2)
15 | ax.set_title("Customized 3D surface plot")
16 | ax.set_xlabel("x")
17 | ax.set_ylabel("y")
18 | ax.set_zlabel("z")
19 | 
20 | 
21 | plt.show()
22 | 
23 | 


--------------------------------------------------------------------------------
/Chapter 02/plotting-with-error-bars.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | measurement_id = np.arange(1, 11)
 5 | measurements = np.array([2.3, 1.9, 4.4, 1.5, 3.0, 3.3, 2.9, 2.6, 4.1, 3.6])
 6 | err = np.array([0.1]*10)
 7 | 
 8 | fig, ax = plt.subplots()
 9 | 
10 | ax.errorbar(measurement_id, measurements, yerr=err, fmt="kx", capsize=2.0)
11 | 
12 | ax.set_title("Plot of measurements and their estimated error")
13 | ax.set_xlabel("Measurement ID")
14 | ax.set_ylabel("Measurement (cm)")
15 | 
16 | 
17 | ax.set_xticks(measurement_id)
18 | 
19 | plt.show()
20 | 


--------------------------------------------------------------------------------
/Chapter 02/saving-matplotlib-figures.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | x = np.arange(1, 5, 0.1)
 5 | y = x*x
 6 | 
 7 | fig, ax = plt.subplots()
 8 | ax.plot(x, y)
 9 | ax.set_title("Graph of $y=x^2$")
10 | ax.set_xlabel("$x$")
11 | ax.set_ylabel("$y$")
12 | fig.savefig("savingfigs.png", dpi=300)
13 | plt.show()
14 | 


--------------------------------------------------------------------------------
/Chapter 02/surface-and-contour-plots.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | X = np.linspace(-5, 5)
 5 | Y = np.linspace(-5, 5)
 6 | 
 7 | x, y = np.meshgrid(X, Y)
 8 | 
 9 | z = np.exp(-((x - 2.)**2 + (y - 3.)**2)/4)  - np.exp(-((x + 3.)**2 + (y + 2.)**2)/3)
10 | 
11 | from mpl_toolkits import mplot3d
12 | 
13 | fig = plt.figure()
14 | ax = fig.add_subplot(projection="3d")
15 | 
16 | ax.plot_surface(x, y, z, cmap="gray")
17 | 
18 | ax.set_xlabel("x")
19 | ax.set_ylabel("y")
20 | ax.set_zlabel("z")
21 | ax.set_title("Graph of the function f(x, y)")
22 | 
23 | plt.show()  # paused here
24 | 
25 | 
26 | fig, ax = plt.subplots()
27 | ax.contour(x, y, z, cmap="gray")
28 | ax.set_title("Contours of f(x, y)")
29 | ax.set_xlabel("x")
30 | ax.set_ylabel("y")
31 | 
32 | 
33 | plt.show()
34 | 


--------------------------------------------------------------------------------
/Chapter 02/trisurf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from mpl_toolkits import mplot3d
 4 | 
 5 | 
 6 | x = np.array([ 0.19, -0.82, 0.8 , 0.95, 0.46, 0.71,
 7 |      -0.86, -0.55,   0.75,-0.98, 0.55, -0.17, -0.89,
 8 |          -0.4 , 0.48, -0.09, 1., -0.03, -0.87, -0.43])
 9 | y = np.array([-0.25, -0.71, -0.88, 0.55, -0.88, 0.23,
10 |       0.18,-0.06, 0.95, 0.04, -0.59, -0.21, 0.14, 0.94,
11 |           0.51, 0.47, 0.79, 0.33, -0.85, 0.19])
12 | z = np.array([-0.04, 0.44, -0.53, 0.4, -0.31, 0.13,
13 |       -0.12, 0.03, 0.53, -0.03, -0.25, 0.03, -0.1 ,
14 |           -0.29, 0.19, -0.03, 0.58, -0.01, 0.55, -0.06])
15 | 
16 | 
17 | fig = plt.figure(tight_layout=True)
18 | ax1 = fig.add_subplot(1, 2, 1, projection="3d")
19 | ax1.plot_trisurf(x, y, z, cmap="gray")
20 | 
21 | ax1.set_xlabel("x")
22 | ax1.set_ylabel("y")
23 | ax1.set_zlabel("z")
24 | ax1.set_title("Approximate surface")
25 | 
26 | ax2 = fig.add_subplot(1, 2, 2)
27 | ax2.tricontour(x, y, z, cmap="gray")
28 | ax2.set_xlabel("x")
29 | ax2.set_ylabel("y")
30 | ax2.set_title("Approximate contours")
31 | 
32 | plt.show()
33 | 


--------------------------------------------------------------------------------
/Chapter 02/visualizing-vector-fields-with-quivers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def f(x, y): 
 6 |     v = x**2 + y**2
 7 |     return np.exp(-2*v)*(x + y), np.exp(-2*v)*(x - y)
 8 | 
 9 | 
10 | t = np.linspace(-1., 1.)
11 | x, y = np.meshgrid(t, t)
12 | 
13 | dx, dy = f(x, y)
14 | 
15 | 
16 | fig, ax = plt.subplots()
17 | ax.quiver(x, y, dx, dy)
18 | 
19 | ax.set_xlabel("x")
20 | ax.set_ylabel("y")
21 | ax.set_title("Quiver plot of a vector field")
22 | 
23 | plt.show()
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/Chapter 03/automatic-differentiation-and-calculus-using-jax.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from mpl_toolkits import mplot3d
 3 | 
 4 | import jax.numpy as jnp
 5 | from jax import grad, jit, vmap
 6 | 
 7 | @jit
 8 | def f(x, y):
 9 |     return jnp.exp(-(x**2 + y**2))
10 | 
11 | t = jnp.linspace(-1.0, 1.0)
12 | x, y = jnp.meshgrid(t, t)
13 | fig = plt.figure()
14 | ax = fig.add_subplot(projection="3d")
15 | ax.plot_surface(x, y, f(x, y), cmap="gray")
16 | ax.set_title("Plot of the function f(x, y)")
17 | ax.set_xlabel("x")
18 | ax.set_ylabel("y")
19 | ax.set_zlabel("z")
20 | 
21 | 
22 | fx = jit(grad(f, 0))
23 | fy = jit(grad(f, 1))
24 | 
25 | print(fx(1., -1.), fy(1., -1.))
26 | 
27 | zx = vmap(fx)(x.ravel(), y.ravel()).reshape(x.shape)
28 | figpd = plt.figure()
29 | axpd = figpd.add_subplot(projection="3d")
30 | axpd.plot_surface(x, y, zx, cmap="gray")
31 | axpd.set_title("Partial derivative with respect to x")
32 | axpd.set_xlabel("x")
33 | axpd.set_ylabel("y")
34 | axpd.set_zlabel("z")
35 | 
36 | 
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/Chapter 03/discrete-fourier-transforms-for-signals-processing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | rng = np.random.default_rng(12345)
 4 | 
 5 | 
 6 | def signal(t, freq_1=4.0, freq_2=7.0):
 7 |     return np.sin(freq_1*2*np.pi*t) + np.sin(freq_2*2*np.pi*t)
 8 | 
 9 | 
10 | sample_size = 2**7
11 | sample_t = np.linspace(0, 4, sample_size, dtype=np.float64)
12 | sample_y = signal(sample_t) + rng.standard_normal(sample_size)
13 | sample_d = 4. / (sample_size - 1) # Spacing for linspace array
14 | true_signal = signal(sample_t)
15 | 
16 | from numpy import fft
17 | 
18 | fig1, ax1 = plt.subplots()
19 | ax1.plot(sample_t, sample_y, "k.", label="Noisy signal")
20 | ax1.plot(sample_t, true_signal, "k--", label="True signal")
21 | 
22 | ax1.set_title("Sample signal with noise")
23 | ax1.set_xlabel("Time")
24 | ax1.set_ylabel("Amplitude")
25 | ax1.legend()
26 | 
27 | 
28 | spectrum = fft.fft(sample_y)
29 | 
30 | freq = fft.fftfreq(sample_size, sample_d)
31 | pos_freq_i = np.arange(1, sample_size//2, dtype=int)
32 | 
33 | psd = np.abs(spectrum[pos_freq_i])**2 + np.abs(spectrum[-pos_freq_i])**2
34 | 
35 | fig2, ax2 = plt.subplots()
36 | ax2.plot(freq[pos_freq_i], psd, "k")
37 | ax2.set_title("PSD of the noisy signal")
38 | ax2.set_xlabel("Frequency")
39 | ax2.set_ylabel("Density")
40 | 
41 | 
42 | filtered = pos_freq_i[psd > 2e3]
43 | 
44 | new_spec = np.zeros_like(spectrum)
45 | new_spec[filtered] = spectrum[filtered]
46 | new_spec[-filtered] = spectrum[-filtered]
47 | 
48 | new_sample = np.real(fft.ifft(new_spec))
49 | 
50 | fig3, ax3 = plt.subplots()
51 | ax3.plot(sample_t, true_signal, color="#8c8c8c", linewidth=1.5, label="True signal")
52 | ax3.plot(sample_t, new_sample, "k--", label="Filtered signal")
53 | ax3.legend()
54 | ax3.set_title("Plot comparing filtered signal and true signal")
55 | ax3.set_xlabel("Time")
56 | ax3.set_ylabel("Amplitude")
57 | 
58 | 
59 | plt.show()
60 | 
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/Chapter 03/numerical-integration.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def erf_integrand(t):
 4 |     return np.exp(-t**2)
 5 | 
 6 | from scipy import integrate
 7 | 
 8 | val_quad, err_quad = integrate.quad(erf_integrand, -1.0, 1.0)
 9 | # (1.493648265624854, 1.6582826951881447e-14)
10 | 
11 | 
12 | val_quadr, err_quadr = integrate.quadrature(erf_integrand, -1.0, 1.0)
13 | # (1.4936482656450039, 7.459897144457273e-10)
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter 03/partial-differential-equations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from mpl_toolkits import mplot3d
 4 | 
 5 | alpha = 1
 6 | x0 = 0 # Left hand x limit
 7 | xL = 2 # Right hand x limit
 8 | 
 9 | N = 10
10 | x = np.linspace(x0, xL, N+1)
11 | h = (xL - x0) / N
12 | 
13 | k = 0.01
14 | steps = 100
15 | t = np.array([i*k for i in range(steps+1)])
16 | 
17 | r = alpha*k / h**2
18 | assert r < 0.5, f"Must have r < 0.5, currently r={r}"
19 | 
20 | from scipy import sparse
21 | diag = [1, *(1-2*r for _ in range(N-1)), 1]
22 | abv_diag = [0, *(r for _ in range(N-1))]
23 | blw_diag = [*(r for _ in range(N-1)), 0]
24 | 
25 | A = sparse.diags([blw_diag, diag, abv_diag], (-1, 0, 1), shape=(N+1, N+1), dtype=np.float64, format="csr")
26 | 
27 | u = np.zeros((steps+1, N+1), dtype=np.float64)
28 | 
29 | def initial_profile(x):
30 |     return 3*np.sin(np.pi*x/2)
31 | 
32 | u[0, :] = initial_profile(x)
33 | 
34 | for i in range(steps):
35 |     u[i+1, :] = A @ u[i, :]
36 | 
37 | 
38 | X, T = np.meshgrid(x, t)
39 | fig = plt.figure()
40 | ax = fig.add_subplot(projection="3d")
41 | 
42 | ax.plot_surface(T, X, u, cmap="gray")
43 | ax.set_title("Solution of the heat equation")
44 | ax.set_xlabel("t")
45 | ax.set_ylabel("x")
46 | ax.set_zlabel("u")
47 | 
48 | 
49 | plt.show()
50 | 
51 | 


--------------------------------------------------------------------------------
/Chapter 03/polynomials-and-calculus.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Polynomial:
 3 |     """Basic polynomial class"""
 4 | 
 5 |     def __init__(self, coeffs):
 6 |         self.coeffs = coeffs
 7 | 
 8 |     def __repr__(self):
 9 |         return f"Polynomial({repr(self.coeffs)})"
10 | 
11 |     def __call__(self, x):
12 |         return sum(coeff*x**i for i, coeff in enumerate(self.coeffs))
13 | 
14 |     def differentiate(self):
15 |         """Differentiate the polynomial and return the derivative"""
16 |         coeffs = [i*c for i, c in enumerate(self.coeffs[1:], start=1)]
17 |         return Polynomial(coeffs)
18 | 
19 |     def integrate(self, constant=0):
20 |         """Integrate the polynomial and return the integral"""
21 |         coeffs = [float(constant)]
22 |         coeffs += [c/i for i, c in enumerate(self.coeffs, start=1)]
23 |         return Polynomial(coeffs)
24 | 
25 | 
26 | p = Polynomial([1, -2, 1])
27 | p.differentiate()
28 | # Polynomial([2, -2])
29 | p.integrate(constant=1)
30 | # Polynomial([1.0, 1.0, -1.0, 0.333333333333])
31 | 


--------------------------------------------------------------------------------
/Chapter 03/solving-differential-equations-with-jax.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import jax.numpy as jnp
 3 | import diffrax
 4 | 
 5 | def f(x, y, args):
 6 |     u = y[..., 0]
 7 |     v = y[..., 1]
 8 |     return jnp.array([v, 3.*x**2*v-(1.-x)*u])
 9 | 
10 | term = diffrax.ODETerm(f)
11 | solver = diffrax.Dopri5()
12 | save_at = diffrax.SaveAt(ts=jnp.linspace(0., 1.))
13 | y0 = jnp.array([0., 1.])
14 | 
15 | solution = diffrax.diffeqsolve(term, solver, t0=0., t1=2., dt0=0.1, y0=y0, saveat=save_at)
16 | 
17 | x = solution.ts
18 | y = solution.ys[:, 0]
19 | 
20 | fig, ax = plt.subplots()
21 | ax.plot(x, y, "k")
22 | ax.set_title("Plot of the solution to the second order ODE")
23 | ax.set_xlabel("x")
24 | ax.set_ylabel("y")
25 | 
26 | 
27 | plt.show()
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/Chapter 03/solving-equations.py:
--------------------------------------------------------------------------------
 1 | from scipy import optimize
 2 | from math import exp
 3 | 
 4 | def f(x):
 5 |     return x*(x - 2)*exp(3 - x)
 6 | 
 7 | def fp(x):
 8 |     return -(x**2 - 4*x + 2)*exp(3 - x)
 9 | 
10 | 
11 | optimize.newton(f, 1., x1=1.5) # Using x1 = 1.5 and the secant method
12 | # 1.9999999999999862
13 | optimize.newton(f, 1., fprime=fp) # Using Newton-Raphson method
14 | # 2.0
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/Chapter 03/solving-simple-differential-equations-numerically.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import integrate
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | def f(t, y):
 6 |     return -0.2*y
 7 | 
 8 | t_range = (0, 5)
 9 | 
10 | T0 = np.array([50.])
11 | 
12 | def true_solution(t):
13 |     return 50.*np.exp(-0.2*t)
14 | 
15 | sol = integrate.solve_ivp(f, t_range, T0, max_step=0.1)
16 | 
17 | t_vals = sol.t
18 | T_vals = sol.y[0, :]
19 | 
20 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True)
21 | 
22 | ax1.plot(t_vals, T_vals, "k")
23 | ax1.set_xlabel("$t$")
24 | ax1.set_ylabel("$T$")
25 | ax1.set_title("Solution of the cooling equation")
26 | 
27 | 
28 | err = np.abs(T_vals - true_solution(t_vals))
29 | ax2.semilogy(t_vals, err, "k")
30 | ax2.set_xlabel("$t$")
31 | ax2.set_ylabel("Error")
32 | ax2.set_title("Error in approximation")
33 | 
34 | plt.show()
35 | 


--------------------------------------------------------------------------------
/Chapter 03/solving-systems-of-differential-equations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | def predator_prey_system(t, y):
 5 |     return np.array([5*y[0] - 0.1*y[0]*y[1], 0.1*y[1]*y[0] - 6*y[1]])
 6 | 
 7 | 
 8 | p = np.linspace(0, 100, 25)
 9 | w = np.linspace(0, 100, 25)
10 | P, W = np.meshgrid(p, w)
11 | 
12 | dp, dw = predator_prey_system(0, np.array([P, W]))
13 | 
14 | fig, ax = plt.subplots()
15 | ax.quiver(P, W, dp, dw)
16 | ax.set_title("Population dynamics for two competing species")
17 | ax.set_xlabel("P")
18 | ax.set_ylabel("W")
19 | 
20 | 
21 | initial_conditions = np.array([85, 40])
22 | 
23 | from scipy import integrate
24 | sol = integrate.solve_ivp(predator_prey_system, (0., 5.), initial_conditions, max_step=0.01)
25 | 
26 | ax.plot(initial_conditions[0], initial_conditions[1], "ko")
27 | ax.plot(sol.y[0, :], sol.y[1, :], "k", linewidth=0.5)
28 | 
29 | plt.show()
30 | 
31 | 


--------------------------------------------------------------------------------
/Chapter 03/symbolic-calculus-using-sympy.py:
--------------------------------------------------------------------------------
 1 | import sympy
 2 | 
 3 | x = sympy.symbols('x')
 4 | 
 5 | f = (x**2 - 2*x)*sympy.exp(3 - x)
 6 | 
 7 | fp = sympy.simplify(sympy.diff(f)) # (x*(2 - x) + 2*x - 2)*exp(3 - x)
 8 | print(fp)
 9 | 
10 | fp2 = -(x**2 - 4*x + 2)*sympy.exp(3 - x)
11 | 
12 | print(sympy.simplify(fp2 - fp) == 0)  # True
13 | 
14 | 
15 | F = sympy.integrate(fp, x)  
16 | print(F) # (x**2 - 2*x)*exp(3 - x)
17 | 


--------------------------------------------------------------------------------
/Chapter 04/analysing-conversion-rates-with-bayesian-techniques.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from scipy.stats import beta as beta_dist
 7 | beta_pdf = beta_dist.pdf
 8 | 
 9 | 
10 | prior_alpha = 25
11 | prior_beta = 75
12 | 
13 | args = (prior_alpha, prior_beta)
14 | prior_over_33, err = sp.integrate.quad(beta_pdf, 0.33, 1, args=args)
15 | print("Prior probability", prior_over_33)
16 | # 0.037830787030165056
17 | 
18 | observed_successes = 122
19 | observed_failures = 257
20 | 
21 | posterior_alpha = prior_alpha + observed_successes
22 | posterior_beta = prior_beta + observed_failures
23 | 
24 | args = (posterior_alpha, posterior_beta)
25 | posterior_over_33, err2 = sp.integrate.quad(beta_pdf, 0.33, 1, args=args)
26 | print("Posterior probability", posterior_over_33)
27 | # 0.13686193416281017
28 | 
29 | p = np.linspace(0, 1, 500)
30 | prior_dist = beta_pdf(p, prior_alpha, prior_beta)
31 | posterior_dist = beta_pdf(p, posterior_alpha, posterior_beta)
32 | 
33 | fig, ax = plt.subplots()
34 | ax.plot(p, prior_dist, "k--", label="Prior")
35 | ax.plot(p, posterior_dist, "k", label="Posterior")
36 | ax.legend()
37 | ax.set_xlabel("Success rate")
38 | ax.set_ylabel("Density")
39 | ax.set_title("Prior and posterior distributions for success rate")
40 | 
41 | plt.show()
42 | 


--------------------------------------------------------------------------------
/Chapter 04/changing-the-random-number-generator.py:
--------------------------------------------------------------------------------
 1 | from numpy import random
 2 | 
 3 | seed_seq = random.SeedSequence()
 4 | 
 5 | print(seed_seq.entropy)
 6 | # 9219863422733683567749127389169034574
 7 | 
 8 | bit_gen = random.MT19937(seed_seq)
 9 | 
10 | rng = random.Generator(bit_gen)
11 | 


--------------------------------------------------------------------------------
/Chapter 04/estimating-parameters-with-monte-carlo-simulations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | 
 7 | import pymc as pm
 8 | 
 9 | def underlying(x, params):
10 |     return params[0]*x**2 + params[1]*x + params[2]
11 | 
12 | size = 100
13 | true_params = [2, -7, 6]
14 | 
15 | x_vals = np.linspace(-5, 5, size)
16 | raw_model = underlying(x_vals, true_params)
17 | noise = rng.normal(loc=0.0, scale=10.0, size=size)
18 | sample = raw_model + noise
19 | 
20 | fig1, ax1 = plt.subplots()
21 | ax1.scatter(x_vals, sample, label="Sampled data", color="k", alpha=0.6)
22 | ax1.plot(x_vals, raw_model, "k--", label="Underlying model")
23 | ax1.set_title("Sampled data")
24 | ax1.set_xlabel("x")
25 | ax1.set_ylabel("y")
26 | 
27 | plt.show()
28 | 
29 | with pm.Model() as model:
30 |     params = pm.Normal("params", mu=1, sigma=1, shape=3)
31 |     y = underlying(x_vals, params)
32 |     y_obs = pm.Normal("y_obs", mu=y, sigma=2, observed=sample)
33 |     trace = pm.sample(cores=4)
34 | 
35 | 
36 | fig2, axs2 = plt.subplots(1, 3, tight_layout=True)
37 | 
38 | pm.plot_posterior(trace, ax=axs2, color="k")
39 | 
40 | plt.show()
41 | 
42 | estimated_params = trace.posterior["params"].mean(axis=(0, 1)).to_numpy()
43 | print("Estimated parameters", estimated_params)
44 | 
45 | estimated = underlying(x_vals, estimated_params)
46 | 
47 | fig3, ax3 = plt.subplots()
48 | ax3.plot(x_vals, raw_model, "k", label="True model")
49 | ax3.plot(x_vals, estimated, "k--", label="Estimated model")
50 | ax3.set_title("Plot of true and estimated models")
51 | ax3.set_xlabel("x")
52 | ax3.set_ylabel("y")
53 | ax3.legend()
54 | 
55 | 
56 | plt.show()
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/Chapter 04/generating-normally-distributed-random-numbers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | 
 7 | mu = 5.0  # mean value
 8 | sigma = 3.0 # standard deviation
 9 | rands = rng.normal(loc=mu, scale=sigma, size=10000)
10 | 
11 | fig, ax = plt.subplots()
12 | ax.hist(rands, bins=20, color="k", alpha=0.6)
13 | ax.set_title("Histogram of normally distributed data")
14 | ax.set_xlabel("Value")
15 | ax.set_ylabel("Density")
16 | 
17 | def normal_dist_curve(x):
18 |     return 10000*np.exp(-0.5*((x-mu)/sigma)**2)/(sigma*np.sqrt(2*np.pi))
19 | 
20 | x_range = np.linspace(-5, 15)
21 | y = normal_dist_curve(x_range)
22 | ax.plot(x_range, y, "k--")
23 | 
24 | 
25 | plt.show()
26 | 


--------------------------------------------------------------------------------
/Chapter 04/generating-random-data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)  # changing seed for reproducibility
 6 | 
 7 | random_floats = rng.random(size=(5, 5))
 8 | # array([[0.22733602, 0.31675834, 0.79736546, 0.67625467, 0.39110955],
 9 | #        [0.33281393, 0.59830875, 0.18673419, 0.67275604, 0.94180287],
10 | #        [0.24824571, 0.94888115, 0.66723745, 0.09589794, 0.44183967],
11 | #        [0.88647992, 0.6974535 , 0.32647286, 0.73392816, 0.22013496],
12 | #        [0.08159457, 0.1598956 , 0.34010018, 0.46519315, 0.26642103]])
13 | 
14 | random_ints = rng.integers(1, 20, endpoint=True, size=10)
15 | # array([12, 17, 10, 4, 1, 3, 2, 2, 3, 12])
16 | 
17 | 
18 | dist = rng.random(size=1000)
19 | 
20 | 
21 | fig, ax = plt.subplots()
22 | ax.hist(dist, color="k", alpha=0.6)
23 | ax.set_title("Histogram of random numbers")
24 | ax.set_xlabel("Value")
25 | ax.set_ylabel("Density")
26 | 
27 | plt.show()


--------------------------------------------------------------------------------
/Chapter 04/selecting-items-at-random.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | rng = np.random.default_rng(12345)
 4 | 
 5 | data = np.arange(15)
 6 | probabilities = np.array([0.3, 0.2, 0.1, 0.05, 0.05, 0.05, 0.05, 0.025,
 7 |                           0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025])
 8 | 
 9 | assert round(sum(probabilities), 10) == 1.0, "Probabilities must sum to 1"
10 | 
11 | selected = rng.choice(data, p=probabilities, replace=True)
12 | # 0
13 | 
14 | selected_array = rng.choice(data, p=probabilities, replace=True, size=(5, 5))
15 | #array([[ 1, 6, 4, 1, 1],
16 | #       [ 2, 0, 4, 12, 0],
17 | #       [12, 4, 0, 1, 10],
18 | #       [ 4, 1, 5, 0, 0],
19 | #       [ 0, 1, 1, 0, 7]])
20 | 


--------------------------------------------------------------------------------
/Chapter 04/working-with-random-processes.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | 
 7 | rate = 4.0
 8 | inter_arrival_times = rng.exponential(scale=1./rate, size=50)
 9 | 
10 | arrivals = np.add.accumulate(inter_arrival_times)
11 | count = np.arange(50)
12 | 
13 | 
14 | fig1, ax1 = plt.subplots()
15 | 
16 | ax1.step(arrivals, count, where="post", color="k")
17 | ax1.set_xlabel("Time")
18 | ax1.set_ylabel("Number of arrivals")
19 | ax1.set_title("Arrivals over time")
20 | 
21 | 
22 | fig1.savefig("arrivals.png", dpi=300, bbox_inches="tight")
23 | 
24 | from scipy.special import factorial
25 | N = np.arange(15)
26 | 
27 | def probability(events, time=1, param=rate):
28 |     return ((param*time)**events/factorial(events))*np.exp(-param*time)
29 | 
30 | 
31 | fig2, ax2 = plt.subplots()
32 | ax2.plot(N, probability(N), "k", label="True distribution")
33 | ax2.set_xlabel("Number of arrivals in 1 time unit")
34 | ax2.set_ylabel("Probability")
35 | ax2.set_title("Probability distribution")
36 | 
37 | estimated_scale = np.mean(inter_arrival_times)
38 | estimated_rate = 1.0/estimated_scale
39 | 
40 | ax2.plot(N, probability(N, param=estimated_rate), "k--", label="Estimated distribution")
41 | ax2.legend()
42 | 
43 | 
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/Chapter 05/coloring-a-network.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | G = nx.complete_graph(3)
 5 | G.add_nodes_from(range(3, 7))
 6 | G.add_edges_from([
 7 |     (2, 3), (2, 4), (2, 6), (0, 3), (0, 6), (1, 6),
 8 |     (1, 5), (2, 5), (4, 5)
 9 | ])
10 | 
11 | 
12 | fig, ax = plt.subplots()
13 | nx.draw_circular(G, ax=ax, with_labels=True)
14 | ax.set_title("Scheduling network")
15 | 
16 | plt.show()
17 | 
18 | 
19 | coloring = nx.greedy_color(G)
20 | print("Coloring", coloring)
21 | # Coloring {2: 0, 0: 1, 1: 2, 5: 1, 6: 3, 3: 2, 4: 2}
22 | 
23 | different_colors = set(coloring.values())
24 | print("Different colors", different_colors)
25 | # Different colors {0, 1, 2, 3}
26 | 


--------------------------------------------------------------------------------
/Chapter 05/creating-directed-and-weighted-networks.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import networkx as nx
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | G = nx.DiGraph()
 6 | 
 7 | G.add_nodes_from(range(5))
 8 | 
 9 | G.add_edge(0, 1, weight=1.0)
10 | G.add_weighted_edges_from([
11 |     (1, 2, 0.5), (1, 3, 2.0), (2, 3, 0.3), (3, 2, 0.3),
12 |     (2, 4, 1.2), (3, 4, 0.8)
13 | ])
14 | 
15 | 
16 | fig, ax = plt.subplots()
17 | pos = {0: (-1, 0), 1: (0, 0), 2: (1, 1), 3: (1, -1), 4: (2, 0)}
18 | nx.draw(G, ax=ax, pos=pos, with_labels=True)
19 | ax.set_title("Weighted, directed network")
20 | 
21 | plt.show()
22 | 
23 | 
24 | adj_mat = nx.adjacency_matrix(G).todense()
25 | print(adj_mat)
26 | # [[0. 1. 0.  0.  0. ]
27 | #  [0. 0. 0.5 2.  0. ]
28 | #  [0. 0. 0.  0.3 1.2]
29 | #  [0. 0. 0.3 0.  0.8]
30 | #  [0. 0. 0.  0.  0. ]]
31 | 


--------------------------------------------------------------------------------
/Chapter 05/creating-networks-in-python.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | G = nx.Graph()
 4 | 
 5 | G.add_node(1)
 6 | G.add_node(2)
 7 | 
 8 | G.add_nodes_from([3, 4, 5, 6])
 9 | 
10 | G.add_edge(1, 2)
11 | G.add_edges_from([(2, 3), (3, 4), (3, 5), (3, 6), (4, 5), (5, 6)])
12 | 
13 | 
14 | print(G.nodes)
15 | print(G.edges)


--------------------------------------------------------------------------------
/Chapter 05/finding-minimum-spanning-trees-and-dominating-sets.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | G = nx.gnm_random_graph(15, 22, seed=12345)
 5 | 
 6 | 
 7 | fig, ax = plt.subplots()
 8 | pos = nx.circular_layout(G)
 9 | nx.draw(G, pos=pos, ax=ax, with_labels=True, style="--")
10 | ax.set_title("Network with minimum spanning tree overlaid")
11 | 
12 | min_span_tree = nx.minimum_spanning_tree(G)
13 | print(list(min_span_tree.edges))
14 | # [(0, 13), (0, 7), (0, 5), (1, 13), (1, 11),
15 | #   (2, 5), (2, 9), (2, 8), (2, 3), (2, 12),
16 | #   (3, 4), (4, 6), (5, 14), (8, 10)]
17 | 
18 | nx.draw_networkx_edges(min_span_tree, pos=pos, ax=ax, width=2.)
19 | 
20 | 
21 | dominating_set = nx.dominating_set(G)
22 | print("Dominating set", dominating_set)
23 | # Dominating set {0, 1, 2, 4, 10, 14}
24 | 
25 | 
26 | plt.show()
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/Chapter 05/finding-shortest-paths.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | 
 7 | G = nx.gnm_random_graph(10, 17, seed=12345)
 8 | 
 9 | fig, ax = plt.subplots()
10 | nx.draw_circular(G, ax=ax, with_labels=True)
11 | ax.set_title("Random network for shortest path finding")
12 | 
13 | plt.show()
14 | 
15 | for u, v in G.edges:
16 |     G.edges[u, v]["weight"] = rng.integers(5, 15)
17 | 
18 | 
19 | path = nx.shortest_path(G, 7, 9, weight="weight")
20 | print(path)
21 | # [7, 5, 2, 9]
22 | 
23 | length = nx.shortest_path_length(G, 7, 9, weight="weight")
24 | print("Length", length)
25 | # Length 32
26 | 
27 | 


--------------------------------------------------------------------------------
/Chapter 05/generating-the-adjacency-matrix-for-a-network.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import networkx as nx
 3 | 
 4 | G = nx.dense_gnm_random_graph(5, 5, seed=12345)
 5 | 
 6 | matrix = nx.adjacency_matrix(G).todense()
 7 | print(matrix)
 8 | # [[0 0 1 0 0]
 9 | #  [0 0 1 1 0]
10 | #  [1 1 0 0 1]
11 | #  [0 1 0 0 1]
12 | #  [0 0 1 1 0]]
13 | 
14 | paths_len_4 = np.linalg.matrix_power(matrix, 4)
15 | print(paths_len_4)
16 | # [[ 3 5  0  0 5]
17 | #  [ 5 9  0  0 9]
18 | #  [ 0 0 13 10 0]
19 | #  [ 0 0 10  8 0]
20 | #  [ 5 9  0  0 9]]
21 | 


--------------------------------------------------------------------------------
/Chapter 05/getting-the-basic-characteristics-of-networks.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | G = nx.Graph()
 5 | G.add_nodes_from(range(10))
 6 | G.add_edges_from([
 7 |     (0, 1), (1, 2), (2, 3), (2, 4),
 8 |     (2, 5), (3, 4), (4, 5), (6, 7),
 9 |     (6, 8), (6, 9), (7, 8), (8, 9)
10 | ])
11 | 
12 | fig, ax = plt.subplots()
13 | nx.draw_circular(G, ax=ax, with_labels=True)
14 | ax.set_title("Simple network")
15 | 
16 | plt.show()
17 | 
18 | print(G)
19 | # Name:
20 | # Type: Graph
21 | # Number of nodes: 10
22 | # Number of edges: 12
23 | # Average degree: 2.4000
24 | 
25 | for i in [0, 2, 7]:
26 |     degree = G.degree[i]
27 |     print(f"Degree of {i}: {degree}")
28 | # Degree of 0: 1
29 | # Degree of 2: 4
30 | # Degree of 7: 2
31 | 
32 | components = list(nx.connected_components(G))
33 | print(components)
34 | 
35 | 
36 | density = nx.density(G)
37 | print("Density", density)
38 | # Density 0.26666666666666666
39 | 
40 | is_planar, _ = nx.check_planarity(G)
41 | print("Is planar", is_planar)
42 | # Is planar True
43 | 
44 | 


--------------------------------------------------------------------------------
/Chapter 05/quantifying-clustering-in-a-network.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | G = nx.Graph()
 5 | complete_part = nx.complete_graph(4)
 6 | cycle_part = nx.cycle_graph(range(4, 9))
 7 | G.update(complete_part)
 8 | G.update(cycle_part)
 9 | G.add_edges_from([(0, 8), (3, 4)])
10 | 
11 | fig, ax = plt.subplots()
12 | nx.draw_circular(G, ax=ax, with_labels=True)
13 | ax.set_title("Network with different clustering behavior")
14 | 
15 | plt.show()
16 | 
17 | cluster_coeffs = nx.clustering(G)
18 | 
19 | for i in [0, 2, 6]:
20 |     print(f"Node {i}, clustering {cluster_coeffs[i]}")
21 | # Node 0, clustering 0.5
22 | # Node 2, clustering 1.0
23 | # Node 6, clustering 0
24 | 
25 | av_clustering = nx.average_clustering(G)
26 | print(av_clustering)
27 | # 0.3333333333333333
28 | 


--------------------------------------------------------------------------------
/Chapter 05/visualising-networks.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | # Graph from "Creating networks" recipe
 5 | G = nx.Graph()
 6 | 
 7 | G.add_nodes_from(range(1, 7))
 8 | G.add_edges_from([
 9 |     (1, 2), (2, 3), (3, 4), (3, 5),
10 |     (3, 6), (4, 5), (5, 6)
11 | ])
12 | 
13 | fig, ax = plt.subplots()
14 | 
15 | layout = nx.shell_layout(G)
16 | 
17 | nx.draw(G, ax=ax, pos=layout, with_labels=True)
18 | ax.set_title("Simple network drawing")
19 | 
20 | 
21 | plt.show()
22 | 


--------------------------------------------------------------------------------
/Chapter 06/creating-interactive-plots-with-Bokeh.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from bokeh import plotting as bk
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from numpy.random import default_rng
 7 | rng = default_rng(12345)
 8 | 
 9 | date_range = pd.date_range("2020-01-01", periods=50)
10 | data = rng.normal(0, 3, size=50).cumsum()
11 | series = pd.Series(data, index=date_range)
12 | 
13 | 
14 | bk.output_file("sample.html")
15 | 
16 | fig = bk.figure(title="Time series data",
17 |                 x_axis_label="date",
18 |                 x_axis_type="datetime",
19 |                 y_axis_label="value")
20 | 
21 | fig.line(date_range, series)
22 | 
23 | bk.show(fig)
24 | 


--------------------------------------------------------------------------------
/Chapter 06/creating-series-and-dataframes.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | 
 7 | diff_data = rng.normal(0, 1, size=100)
 8 | cumulative = diff_data.cumsum() 
 9 | 
10 | data_series = pd.Series(diff_data)
11 | print(data_series)
12 | 
13 | data_frame = pd.DataFrame({
14 |     "diffs": data_series, 
15 |     "cumulative": cumulative 
16 | })
17 | 
18 | print(data_frame)
19 | 


--------------------------------------------------------------------------------
/Chapter 06/getting-descriptive-statistics-from-dataframes.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from numpy.random import default_rng
 6 | rng = default_rng(12345)
 7 | 
 8 | uniform = rng.uniform(1, 5, size=100)
 9 | normal = rng.normal(1, 2.5, size=100)
10 | bimodal = np.concatenate([rng.normal(0, 1, size=50), rng.normal(6, 1, size=50)])
11 | 
12 | df = pd.DataFrame({
13 |     "uniform": uniform,
14 |     "normal": normal,
15 |     "bimodal": bimodal
16 | })
17 | 
18 | fig, (ax1, ax2, ax3) = plt.subplots(1, 3, tight_layout=True)
19 | 
20 | df["uniform"].plot(kind="hist", title="Uniform", ax=ax1, color="k", alpha=0.6)
21 | df["normal"].plot(kind="hist", title="Normal", ax=ax2, color="k", alpha=0.6)
22 | df["bimodal"].plot(kind="hist", title="Bimodal", ax=ax3, bins=20, color="k", alpha=0.6)
23 | 
24 | descriptive = df.describe()
25 | descriptive.loc["kurtosis"] = df.kurtosis()
26 | print(descriptive)
27 | 
28 | uniform_mean = descriptive.loc["mean", "uniform"]
29 | normal_mean = descriptive.loc["mean", "normal"]
30 | bimodal_mean = descriptive.loc["mean", "bimodal"]
31 | 
32 | ax1.vlines(uniform_mean, 0, 20, "k")
33 | ax2.vlines(normal_mean, 0, 25, "k")
34 | ax3.vlines(bimodal_mean, 0, 12,"k")
35 | 
36 | plt.show()
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/Chapter 06/loading-and-storing-data-from-a-dataframe.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from numpy.random import default_rng
 4 | rng = default_rng(12345)
 5 | 
 6 | 
 7 | diffs = rng.normal(0, 1, size=100)
 8 | cumulative = diffs.cumsum()
 9 | 
10 | data_frame = pd.DataFrame({
11 |     "diffs": diffs,
12 |     "cumulative": cumulative
13 | })
14 | print(data_frame)
15 | 
16 | 
17 | data_frame.to_csv("sample.csv", index=False)
18 | 
19 | 
20 | df = pd.read_csv("sample.csv", index_col=False)
21 | print(df)
22 | 


--------------------------------------------------------------------------------
/Chapter 06/manipulating-data-frames.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from numpy.random import default_rng
 4 | rng = default_rng(12345)
 5 | three = rng.uniform(-0.2, 1.0, size=100)
 6 | three[three < 0] = np.nan
 7 | 
 8 | data_frame = pd.DataFrame({
 9 |     "one": rng.random(size=100),
10 |     "two": rng.normal(0, 1, size=100).cumsum(),
11 |     "three": three
12 | })
13 | 
14 | data_frame["four"] = data_frame["one"] > 0.5
15 | 
16 | def transform_function(row):
17 |     if row["four"]:
18 |         return 0.5*row["two"]
19 |     return row["one"]*row["two"]
20 | 
21 | data_frame["five"] = data_frame.apply(transform_function, axis=1)
22 | 
23 | print(data_frame)
24 | 
25 | df = data_frame.dropna()
26 | 
27 | print(df)
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/Chapter 06/performing-operations-on-grouped-data-in-a-dataframe.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import pandas as pd
 4 | 
 5 | rng = np.random.default_rng(12345)
 6 | from matplotlib.rcsetup import cycler
 7 | plt.rc("axes", prop_cycle=cycler(c=["k"]*3, ls=["-", "--", "-."]))
 8 | 
 9 | labels1 = rng.choice(["A", "B", "C"], size=50)
10 | labels2 = rng.choice([1, 2], size=50)
11 | data = rng.normal(0.0, 2.0, size=50)
12 | 
13 | df = pd.DataFrame({"label1": labels1, "label2": labels2, "data": data})
14 | 
15 | df["first_group"] = df.groupby("label1")["data"].cumsum()
16 | print(df.head())
17 | 
18 | 
19 | grouped = df.groupby(["label1", "label2"])
20 | df["second_group"] = grouped["data"].transform(
21 |     lambda d: d.rolling(2, min_periods=1).mean())
22 | 
23 | print(df.head())
24 | 
25 | print(df[df["label1"] == "C"].head())
26 | 
27 | 
28 | fig, ax = plt.subplots()
29 | df.groupby("label1")["first_group"].plot(ax=ax)
30 | ax.set(title="Grouped data cumulative sums", xlabel="Index", ylabel="value")
31 | ax.legend()
32 | 
33 | 
34 | plt.show()
35 | 


--------------------------------------------------------------------------------
/Chapter 06/plotting-data-from-a-DataFrame.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | 
 7 | diffs = rng.standard_normal(size=100)
 8 | walk = diffs.cumsum()
 9 | df = pd.DataFrame({
10 |     "diffs": diffs,
11 |     "walk": walk
12 | })
13 | 
14 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True)
15 | 
16 | df["walk"].plot(ax=ax1, title="Random walk", color="k")
17 | ax1.set_xlabel("Index")
18 | ax1.set_ylabel("Value")
19 | 
20 | df["diffs"].plot(kind="hist", ax=ax2, title="Histogram of diffs", color="k", alpha=0.6)
21 | ax2.set_xlabel("Difference")
22 | 
23 | 
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/Chapter 06/testing-hypotheses-for-non-parametric-data.py:
--------------------------------------------------------------------------------
 1 | from scipy import stats
 2 | from numpy.random import default_rng
 3 | rng = default_rng(12345)
 4 | 
 5 | 
 6 | sample_A = rng.uniform(2.5, 3.5, size=25)
 7 | sample_B = rng.uniform(3.0, 4.4, size=25)
 8 | sample_C = rng.uniform(3.1, 4.5, size=25)
 9 | 
10 | significance = 0.05
11 | 
12 | statistic, p_value = stats.kruskal(sample_A, sample_B, sample_C)
13 | print(f"Statistic: {statistic}, p value: {p_value}")
14 | # Statistic: 40.22214736842102, p value: 1.8444703308682906e-09
15 | 
16 | if p_value <= significance:
17 |     print("There are differences between population medians")
18 | else:
19 |     print("Accept H0: all medians equal")
20 | # There are differences between population medians
21 | 
22 | _, p_A_B = stats.ranksums(sample_A, sample_B)
23 | _, p_A_C = stats.ranksums(sample_A, sample_C)
24 | _, p_B_C = stats.ranksums(sample_B, sample_C)
25 | 
26 | if p_A_B <= significance:
27 |     print("Significant differences between A and B, p value", p_A_B)
28 | # Significant differences between A and B, p value 1.0035366080480683e-07
29 | 
30 | if p_A_C <= significance:
31 |     print("Significant differences between A and C, p value", p_A_C)
32 | # Significant differences between A and C, p value 2.428534673701913e-08
33 | 
34 | if p_B_C <= significance:
35 |     print("Significant differences between B and C, p value", p_B_C)
36 | else:
37 |     print("No significant differences between B and C, p value", p_B_C)
38 | # No significant differences between B and C, p value 0.3271631660572756
39 | 


--------------------------------------------------------------------------------
/Chapter 06/testing-hypotheses-using-ANOVA.py:
--------------------------------------------------------------------------------
 1 | from scipy import stats
 2 | from numpy.random import default_rng
 3 | rng = default_rng(12345)
 4 | 
 5 | current = rng.normal(4.0, 2.0, size=40)
 6 | process_a = rng.normal(6.2, 2.0, size=25)
 7 | process_b = rng.normal(4.5, 2.0, size=64)
 8 | 
 9 | significance = 0.05
10 | 
11 | F_stat, p_value = stats.f_oneway(current, process_a, process_b)
12 | 
13 | print(f"F stat: {F_stat}, p value: {p_value}")
14 | # F stat: 9.949052026027028, p value: 9.732322721019206e-05
15 | 
16 | if p_value <= significance:
17 |     print("Reject H0: there is a difference between means")
18 | else:
19 |     print("Accept H0: all means equal")
20 | # Reject H0: there is a difference between means
21 | 


--------------------------------------------------------------------------------
/Chapter 06/testing-hypotheses-using-t-tests.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from scipy import stats
 3 | 
 4 | sample = pd.Series([
 5 |     2.4, 2.4, 2.9, 2.6, 1.8, 2.7, 2.6, 2.4, 2.8, 2.4, 2.4,
 6 |     2.4, 2.7, 2.7, 2.3, 2.4, 2.4, 3.2, 2.2, 2.5, 2.1, 1.8,
 7 |     2.9, 2.5, 2.5, 3.2, 2. , 2.3, 3. , 1.5, 3.1, 2.5, 3.1,
 8 |     2.4, 3. , 2.5, 2.7, 2.1, 2.3, 2.2, 2.5, 2.6, 2.5, 2.8,
 9 |     2.5, 2.9, 2.1, 2.8, 2.1, 2.3
10 | ])
11 | 
12 | mu0 = 2.0
13 | significance = 0.05
14 | 
15 | t_statistic, p_value = stats.ttest_1samp(sample, mu0)
16 | 
17 | print(f"t stat: {t_statistic}, p value: {p_value}")
18 | # t stat: 9.752368720068665, p value: 4.596949515944238e-13
19 | 
20 | 
21 | if p_value <= significance:
22 |     print("Reject H0 in favour of H1: mu != 2.0")
23 | else:
24 |     print("Accept H0: mu = 2.0")
25 | # Reject H0 in favour of H1: mu != 2.0
26 | 


--------------------------------------------------------------------------------
/Chapter 06/understanding-a-population-using-sampling.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import math
 3 | from scipy import stats
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | sample_data = pd.Series(
 7 |     [172.3, 171.3, 164.7, 162.9, 172.5, 176.3, 174.8, 171.9,
 8 |      176.8, 167.8, 164.5, 179.7, 157.8, 170.6, 189.9, 185. ,
 9 |      172.7, 165.5, 174.5, 171.5]
10 | )
11 | 
12 | sample_mean = sample_data.mean()
13 | sample_std = sample_data.std()
14 | 
15 | print(f"Mean {sample_mean}, st. dev {sample_std}")
16 | # Mean 172.15, st. dev 7.473778724383846
17 | 
18 | N = sample_data.count()
19 | std_err = sample_std/math.sqrt(N)
20 | 
21 | cv_95, cv_99 = stats.t.ppf([0.975, 0.995], df=N-1)
22 | 
23 | pm_95 = cv_95*std_err
24 | conf_interval_95 = [sample_mean - pm_95, sample_mean + pm_95]
25 | pm_99 = cv_99*std_err
26 | conf_interval_99 = [sample_mean - pm_99, sample_mean + pm_99]
27 | 
28 | print("95% confidence", conf_interval_95)
29 | # 95% confidence [168.65216388659374, 175.64783611340627]
30 | print("99% confidence", conf_interval_99)
31 | # 99% confidence [167.36884119608774, 176.93115880391227]
32 | 
33 | 


--------------------------------------------------------------------------------
/Chapter 07/classifying-using-logarithmic-regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | from sklearn.linear_model import LogisticRegression
 7 | from sklearn.metrics import classification_report, roc_curve
 8 | 
 9 | 
10 | df = pd.DataFrame({
11 |     "var1": np.concatenate([rng.normal(3.0, 1.5, size=50), rng.normal(-4.0, 2.0, size=50)]),
12 |     "var2": rng.uniform(size=100),
13 |     "var3": np.concatenate([rng.normal(-2.0, 2.0, size=50), rng.normal(1.5, 0.8, size=50)])
14 | })
15 | 
16 | 
17 | score = 4.0 + df["var1"] - df["var3"]
18 | Y = score >= 0
19 | 
20 | fig1, ax1 = plt.subplots()
21 | ax1.plot(df.loc[Y, "var1"], df.loc[Y, "var3"], "ko", label="True data")
22 | ax1.plot(df.loc[~Y, "var1"], df.loc[~Y, "var3"], "kx", label="False data")
23 | ax1.legend()
24 | ax1.set_xlabel("var1")
25 | ax1.set_ylabel("var3")
26 | ax1.set_title("Scatter plot of var3 against var1")
27 | 
28 | plt.show()
29 | 
30 | model = LogisticRegression()
31 | model.fit(df, Y)
32 | 
33 | 
34 | test_df = pd.DataFrame({
35 |     "var1": np.concatenate([rng.normal(3.0, 1.5, size=25), rng.normal(-4.0, 2.0, size=25)]),
36 |     "var2": rng.uniform(size=50),
37 |     "var3": np.concatenate([rng.normal(-2.0, 2.0, size=25), rng.normal(1.5, 0.8, size=25)])
38 | })
39 | 
40 | test_scores = 4.0 + test_df["var1"] - test_df["var3"]
41 | test_Y = test_scores >= 0
42 | 
43 | test_predicts = model.predict(test_df)
44 | 
45 | plt.show()
46 | 
47 | print(classification_report(test_Y, test_predicts))
48 | 


--------------------------------------------------------------------------------
/Chapter 07/forecasting-from-time-series-data-using-arima.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | import statsmodels.api as sm
 5 | 
 6 | from tsdata import generate_sample_data
 7 | 
 8 | from matplotlib.rcsetup import cycler
 9 | plt.rc("axes", prop_cycle=cycler(c="k"))
10 | 
11 | sample_ts, test_ts = generate_sample_data(trend=0.2, undiff=True)
12 | 
13 | ts_fig, ts_ax = plt.subplots(tight_layout=True)
14 | sample_ts.plot(ax=ts_ax, label="Observed")
15 | ts_ax.set_title("Training time series data")
16 | ts_ax.set_xlabel("Date")
17 | ts_ax.set_ylabel("Value")
18 | 
19 | diffs = sample_ts.diff().dropna()
20 | 
21 | ap_fig, (acf_ax, pacf_ax) = plt.subplots(2, 1, tight_layout=True)
22 | sm.graphics.tsa.plot_acf(diffs, ax=acf_ax)
23 | sm.graphics.tsa.plot_pacf(diffs, ax=pacf_ax)
24 | acf_ax.set_ylabel("Value")
25 | acf_ax.set_xlabel("Lag")
26 | pacf_ax.set_xlabel("Lag")
27 | pacf_ax.set_ylabel("Value")
28 | 
29 | 
30 | model = sm.tsa.ARIMA(sample_ts, order=(1,1,1))
31 | fitted = model.fit()
32 | print(fitted.summary())
33 | 
34 | forecast = fitted.get_forecast(steps=50).summary_frame()
35 | print(forecast)
36 | 
37 | forecast["mean"].plot(ax=ts_ax, label="Forecast", ls="--")
38 | ts_ax.fill_between(forecast.index, forecast["mean_ci_lower"],
39 |                    forecast["mean_ci_upper"], alpha=0.4)
40 | 
41 | 
42 | test_ts.plot(ax=ts_ax, label="Actual", ls="-.")
43 | ts_ax.legend()
44 | 
45 | plt.show()
46 | 


--------------------------------------------------------------------------------
/Chapter 07/forecasting-seasonal-data-with-arima.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | import statsmodels.api as sm
 6 | 
 7 | from tsdata import generate_sample_data
 8 | 
 9 | from matplotlib.rcsetup import cycler
10 | plt.rc("axes", prop_cycle=cycler(c="k"))
11 | 
12 | sample_ts, test_ts = generate_sample_data(undiff=True, seasonal=True)
13 | 
14 | ts_fig, ts_ax = plt.subplots(tight_layout=True)
15 | sample_ts.plot(ax=ts_ax, title="Time series", label="Observed")
16 | ts_ax.set_xlabel("Date")
17 | ts_ax.set_ylabel("Value")
18 | 
19 | ap_fig, (acf_ax, pacf_ax) = plt.subplots(2, 1, tight_layout=True)
20 | sm.graphics.tsa.plot_acf(sample_ts, ax=acf_ax)
21 | sm.graphics.tsa.plot_pacf(sample_ts, ax=pacf_ax)
22 | acf_ax.set_xlabel("Lag")
23 | pacf_ax.set_xlabel("Lag")
24 | acf_ax.set_ylabel("Value")
25 | pacf_ax.set_ylabel("Value")
26 | 
27 | diffs = sample_ts.diff().dropna()
28 | dap_fig, (dacf_ax, dpacf_ax) = plt.subplots(2, 1, tight_layout=True)
29 | sm.graphics.tsa.plot_acf(diffs, ax=dacf_ax, title="Differenced ACF")
30 | sm.graphics.tsa.plot_pacf(diffs, ax=dpacf_ax, title="Differenced PACF")
31 | dacf_ax.set_xlabel("Lag")
32 | dpacf_ax.set_xlabel("Lag")
33 | dacf_ax.set_ylabel("Value")
34 | dpacf_ax.set_ylabel("Value")
35 | 
36 | model = sm.tsa.SARIMAX(sample_ts, order=(1, 1, 1), seasonal_order=(1, 0, 0, 7))
37 | fitted_seasonal = model.fit()
38 | print(fitted_seasonal.summary())
39 | 
40 | forecast_result = fitted_seasonal.get_forecast(steps=50)
41 | forecast_index = pd.date_range("2021-01-01", periods=50)
42 | forecast = forecast_result.predicted_mean
43 | 
44 | forecast.plot(ax=ts_ax, label="Forecasts", ls="--")
45 | conf = forecast_result.conf_int()
46 | ts_ax.fill_between(forecast_index, conf["lower y"], conf["upper y"], alpha=0.4)
47 | test_ts.plot(ax=ts_ax, label="Actual future", ls="-.")
48 | ts_ax.legend()
49 | 
50 | plt.show()
51 | 
52 | 


--------------------------------------------------------------------------------
/Chapter 07/modelling-time-series-data-with-arma.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import statsmodels.api as sm
 3 | 
 4 | from matplotlib.rcsetup import cycler
 5 | plt.rc("axes", prop_cycle=cycler(c="k"))
 6 | 
 7 | from tsdata import generate_sample_data
 8 | 
 9 | sample_ts, _ = generate_sample_data()
10 | 
11 | ts_fig, ts_ax = plt.subplots(tight_layout=True)
12 | sample_ts.plot(ax=ts_ax, label="Observed", ls="--", alpha=0.4)
13 | ts_ax.set_title("Time series data")
14 | ts_ax.set_xlabel("Date")
15 | ts_ax.set_ylabel("Value")
16 | 
17 | adf_results = sm.tsa.adfuller(sample_ts)
18 | adf_pvalue = adf_results[1]
19 | print("Augmented Dickey-Fuller test:\nP-value:", adf_pvalue)
20 | 
21 | ap_fig, (acf_ax, pacf_ax) = plt.subplots(2, 1, tight_layout=True)
22 | sm.graphics.tsa.plot_acf(sample_ts, ax=acf_ax, title="Observed autocorrelation")
23 | sm.graphics.tsa.plot_pacf(sample_ts, ax=pacf_ax, title="Observed partial autocorrelation")
24 | acf_ax.set_xlabel("Lags")
25 | pacf_ax.set_xlabel("Lags")
26 | pacf_ax.set_ylabel("Value")
27 | acf_ax.set_ylabel("Value")
28 | 
29 | arma_model = sm.tsa.ARIMA(sample_ts, order=(1, 0, 1))
30 | 
31 | arma_results = arma_model.fit()
32 | print(arma_results.summary())
33 | 
34 | residuals = arma_results.resid
35 | rap_fig, (racf_ax, rpacf_ax) = plt.subplots(2, 1, tight_layout=True)
36 | sm.graphics.tsa.plot_acf(residuals, ax=racf_ax, title="Residual autocorrelation")
37 | sm.graphics.tsa.plot_pacf(residuals, ax=rpacf_ax, title="Residual partial autocorrelation")
38 | racf_ax.set_xlabel("Lags")
39 | rpacf_ax.set_xlabel("Lags")
40 | rpacf_ax.set_ylabel("Value")
41 | racf_ax.set_ylabel("Value")
42 | 
43 | 
44 | fitted = arma_results.fittedvalues
45 | fitted.plot(ax=ts_ax, label="Fitted")
46 | ts_ax.legend()
47 | 
48 | 
49 | plt.show()
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/Chapter 07/tsdata.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | import numpy as np
  3 | import pandas as pd
  4 | import itertools
  5 | 
  6 | from numpy.random import default_rng
  7 | 
  8 | 
  9 | def _get_n(iterable, n):
 10 |     return list(itertools.islice(iterable, n))
 11 | 
 12 | def generate_ma(*coeffs, std=1.0, seed=12345):
 13 |     rng = default_rng(seed=seed)
 14 |     n = len(coeffs)
 15 |     past_terms = deque(maxlen=n)
 16 |     past_terms.extend([0.0]*n)
 17 | 
 18 |     coeffs = tuple(reversed(coeffs))
 19 | 
 20 |     while True:
 21 |         err = rng.normal(0, std)
 22 |         yield err + sum(c*e for c, e in zip(coeffs, past_terms))
 23 |         past_terms.append(err)
 24 | 
 25 | def generate_ar(*coeffs, const=0.0, start=0.0):
 26 |     n = len(coeffs)
 27 |     past_terms = deque(maxlen=n)
 28 |     past_terms.extend([0.0]*(n-1))
 29 |     past_terms.append(start)
 30 | 
 31 |     coeffs = tuple(reversed(coeffs))
 32 | 
 33 |     while True:
 34 |         curr = const + sum(c*t for c, t in zip(coeffs, past_terms))
 35 |         yield curr
 36 |         past_terms.append(curr)
 37 | 
 38 | 
 39 | def generate_arma(ar_coeffs=(0.9,), const=0.0, start=0.0,
 40 |                   ma_coeffs=(), noise_std=1.0, seed=None):
 41 |     n = len(ar_coeffs)
 42 |     past_terms = deque(maxlen=n)
 43 |     past_terms.extend([0.0]*(n-1))
 44 |     past_terms.append(start)
 45 | 
 46 |     coeffs = tuple(reversed(ar_coeffs))
 47 | 
 48 |     yield start
 49 | 
 50 |     ma_proc = generate_ma(*ma_coeffs, std=noise_std, seed=seed)
 51 | 
 52 |     for err in ma_proc:
 53 |         curr = const + err + sum(c*t for c, t in zip(coeffs, past_terms))
 54 |         yield curr
 55 |         past_terms.append(curr)
 56 | 
 57 | 
 58 | def undifference(iterable):
 59 |     tot = next(iterable)  # first term
 60 |     for cur in iterable:
 61 |         yield tot
 62 |         tot += cur
 63 | 
 64 | def add_season_ar(iterable, period=7, coeffs=(0.7,)):
 65 |     n = len(coeffs)
 66 |     coeffs = tuple(reversed(coeffs))
 67 |     N = n + period - 1
 68 |     past_vals = deque(maxlen=N)
 69 |     past_vals.extend([0.0]*N)
 70 | 
 71 |     for item in iterable:
 72 |         new = item + sum(coeffs[i]*past_vals[i] for i in range(n))
 73 |         yield new
 74 |         past_vals.append(new)
 75 | 
 76 | 
 77 | def generate_sample_data(train=366, test=50, trend=0.0, undiff=False, seasonal=False):
 78 |     gen = generate_arma(seed=12345, const=trend, ar_coeffs=(0.8,), ma_coeffs=(-0.5,))
 79 | 
 80 |     if seasonal:
 81 |         gen = add_season_ar(gen)
 82 | 
 83 |     if undiff:
 84 |         gen = undifference(gen)
 85 | 
 86 |     indices = pd.date_range("2020-01-01", periods=train+test)
 87 |     data = _get_n(gen, train+test)
 88 |     return (pd.Series(data[:-test], index=indices[:-test]),
 89 |             pd.Series(data[-test:], index=indices[-test:]))
 90 | 
 91 | 
 92 | 
 93 | if __name__ == "__main__":
 94 |     import matplotlib.pyplot as plt
 95 | 
 96 |     gen = generate_arma(seed=12345, ar_coeffs=(0.9,), ma_coeffs=(-0.5,))
 97 |     vals = _get_n(gen, 500)
 98 | 
 99 |     plt.plot(vals)
100 |     plt.show()
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/Chapter 07/using-linear-regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import statsmodels.api as sm
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from numpy.random import default_rng
 6 | rng = default_rng(12345)
 7 | 
 8 | x = np.linspace(0, 5, 25)
 9 | rng.shuffle(x)
10 | trend = 2.0
11 | shift = 5.0
12 | y1 = trend*x + shift + rng.normal(0, 0.5, size=25)
13 | y2 = trend*x + shift + rng.normal(0, 5, size=25)
14 | 
15 | fig, ax = plt.subplots()
16 | ax.scatter(x, y1, c="k", marker="x", label="Good correlation")
17 | ax.scatter(x, y2, c="k", marker="o", label="Bad correlation")
18 | ax.legend()
19 | ax.set_xlabel("X"),
20 | ax.set_ylabel("Y")
21 | ax.set_title("Scatter plot of data with best fit lines")
22 | 
23 | pred_x = sm.add_constant(x)
24 | 
25 | model1 = sm.OLS(y1, pred_x).fit()
26 | print(model1.summary())
27 | 
28 | model2 = sm.OLS(y2, pred_x).fit()
29 | print(model2.summary())
30 | 
31 | model_x = sm.add_constant(np.linspace(0, 5))
32 | 
33 | 
34 | model_y1 = model1.predict(model_x)
35 | model_y2 = model2.predict(model_x)
36 | 
37 | 
38 | ax.plot(model_x[:, 1], model_y1, 'k')
39 | ax.plot(model_x[:, 1], model_y2, 'k--')
40 | 
41 | 
42 | plt.show()
43 | 


--------------------------------------------------------------------------------
/Chapter 07/using-multilinear-regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import statsmodels.api as sm
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from numpy.random import default_rng
 7 | rng = default_rng(12345)
 8 | 
 9 | 
10 | p_vars = pd.DataFrame({
11 |     "const": np.ones((100,)),
12 |     "X1": rng.uniform(0, 15, size=100),
13 |     "X2": rng.uniform(0, 25, size=100),
14 |     "X3": rng.uniform(5, 25, size=100)
15 | })
16 | 
17 | residuals = rng.normal(0.0, 12.0, size=100)
18 | Y = -10.0 + 5.0*p_vars["X1"] - 2.0*p_vars["X2"] + residuals
19 | 
20 | fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, tight_layout=True)
21 | ax1.scatter(p_vars["X1"], Y, c="k")
22 | ax2.scatter(p_vars["X2"], Y, c="k")
23 | ax3.scatter(p_vars["X3"], Y, c="k")
24 | 
25 | ax1.set_title("Y against X1")
26 | ax1.set_xlabel("X1")
27 | ax1.set_ylabel("Y")
28 | ax2.set_title("Y against X2")
29 | ax2.set_xlabel("X2")
30 | ax3.set_title("Y against X3")
31 | ax3.set_xlabel("X3")
32 | 
33 | plt.show()
34 | 
35 | model = sm.OLS(Y, p_vars).fit()
36 | print(model.summary())
37 | 
38 | second_model = sm.OLS(Y, p_vars.loc[:, "const":"X2"]).fit()
39 | print(second_model.summary())
40 | 


--------------------------------------------------------------------------------
/Chapter 07/using-prophet-to-model-time-series.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | from prophet import Prophet
 4 | 
 5 | from tsdata import generate_sample_data
 6 | 
 7 | sample_ts, test_ts = generate_sample_data(undiff=True, trend=0.2)
 8 | 
 9 | df_for_prophet = pd.DataFrame({
10 |     "ds": sample_ts.index,   # dates
11 |     "y": sample_ts.values    # values
12 | })
13 | 
14 | model = Prophet()
15 | model.fit(df_for_prophet)
16 | 
17 | forecast_df = model.make_future_dataframe(periods=50)
18 | 
19 | forecast = model.predict(forecast_df)
20 | 
21 | fig, ax = plt.subplots(tight_layout=True)
22 | sample_ts.plot(ax=ax, label="Observed", title="Forecasts", c="k")
23 | forecast.plot(x="ds", y="yhat", ax=ax, c="k", label="Predicted", ls="--")
24 | ax.fill_between(forecast["ds"].values, forecast["yhat_lower"].values,
25 |                 forecast["yhat_upper"].values, color="k", alpha=0.4)
26 | test_ts.plot(ax=ax, c="k", label="Future", ls="-.")
27 | ax.legend()
28 | ax.set_xlabel("Date")
29 | ax.set_ylabel("Value")
30 | 
31 | 
32 | plt.show()
33 | 


--------------------------------------------------------------------------------
/Chapter 07/using-signatures-to-summarize-time-series-data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import esig
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from numpy.random import default_rng
 6 | rng = default_rng(12345)
 7 | 
 8 | upper_limit = 2*np.pi
 9 | depth = 2
10 | noise_variance = 0.1 
11 | 
12 | def make_noisy(signal):
13 |     return signal + rng.normal(0.0, noise_variance, size=signal.shape)
14 | 
15 | 
16 | def signal_a(count):
17 |     t = rng.exponential(upper_limit/count, size=count).cumsum()
18 |     return t, np.column_stack([t/(1.+t)**2, 1./(1.+t)**2])
19 | 
20 | 
21 | def signal_b(count):
22 |     t = rng.exponential(upper_limit/count, size=count).cumsum()
23 |     return t, np.column_stack([np.cos(t), np.sin(t)])
24 | 
25 | 
26 | params_a, true_signal_a = signal_a(100)
27 | params_b, true_signal_b = signal_b(100)
28 | 
29 | fig, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, tight_layout=True)
30 | 
31 | ax11.plot(params_a, true_signal_a[:, 0], "k")
32 | ax11.plot(params_a, true_signal_a[:, 1], "k--")
33 | ax11.legend(["x", "y"])
34 | ax12.plot(params_b, true_signal_b[:, 0], "k")
35 | ax12.plot(params_b, true_signal_b[:, 1], "k--")
36 | ax12.legend(["x", "y"])
37 | ax21.plot(true_signal_a[:, 0], true_signal_a[:, 1], "k")
38 | ax22.plot(true_signal_b[:, 0], true_signal_b[:, 1], "k")
39 | ax11.set_title("Components of signal a")
40 | ax11.set_xlabel("Parameter")
41 | ax11.set_ylabel("Value")
42 | ax12.set_title("Components of signal b")
43 | ax12.set_xlabel("Parameter")
44 | ax12.set_ylabel("Value")
45 | ax21.set_title("Signal a")
46 | ax21.set_xlabel("x")
47 | ax21.set_ylabel("y")
48 | ax22.set_title("Signal b")
49 | ax22.set_xlabel("x")
50 | ax22.set_ylabel("y")
51 | 
52 | plt.show()
53 | 
54 | signature_a = esig.stream2sig(true_signal_a, 2)
55 | signature_b = esig.stream2sig(true_signal_b, 2)
56 | print(signature_a, signature_b, sep="\n")
57 | 
58 | 
59 | sigs_a = np.vstack([esig.stream2sig(make_noisy(signal_a(rng.integers(50, 100))[1]), depth) for _ in range(50)])
60 | sigs_b = np.vstack([esig.stream2sig(make_noisy(signal_b(rng.integers(50, 100))[1]), depth) for _ in range(50)])
61 | 
62 | expected_sig_a = np.mean(sigs_a, axis=0)
63 | expected_sig_b = np.mean(sigs_b, axis=0)
64 | print(expected_sig_a, expected_sig_b, sep="\n")
65 | 
66 | diff = np.abs(expected_sig_a - expected_sig_b)
67 | 
68 | print("Signal a", np.max(np.abs(expected_sig_a - signature_a)))
69 | print("Signal b", np.max(np.abs(expected_sig_b - signature_b)))
70 | print("Signal a vs signal b", np.max(np.abs(expected_sig_a - expected_sig_b)))
71 | 
72 | 


--------------------------------------------------------------------------------
/Chapter 08/computing-convex-hulls.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib as mpl
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from numpy.random import default_rng
 6 | rng = default_rng(12345)
 7 | 
 8 | from shapely.geometry import MultiPoint
 9 | 
10 | raw_points = rng.uniform(-1.0, 1.0, size=(50, 2))
11 | 
12 | fig, ax = plt.subplots()
13 | ax.plot(raw_points[:, 0], raw_points[:, 1], "kx")
14 | ax.set_axis_off()
15 | 
16 | points = MultiPoint(raw_points)
17 | 
18 | convex_hull = points.convex_hull
19 | 
20 | patch = mpl.patches.Polygon(convex_hull.exterior.coords, ec=(0,0,0,1), fc=(0.5,0.5,0.5,0.4), lw=1.2)
21 | 
22 | ax.add_patch(patch)
23 | 
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/Chapter 08/constructing-bezier-curves.py:
--------------------------------------------------------------------------------
 1 | from math import comb as binom
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Bezier:
 7 | 
 8 |     def __init__(self, *points):
 9 |         self.points = points
10 |         self.nodes = n = len(points) - 1
11 |         self.degree = l = points[0].size
12 | 
13 |         self.coeffs = [binom(n, i)*p.reshape((l, 1)) for i, p in enumerate(points)]
14 | 
15 |     def __call__(self, t):
16 |         n = self.nodes
17 |         t = t.reshape((1, t.size))
18 |         vals = [c @ (t**i)*(1-t)**(n-i) for i, c in enumerate(self.coeffs)]
19 |         return np.sum(vals, axis=0)
20 | 
21 | 
22 | p1 = np.array([0.0, 0.0])
23 | p2 = np.array([0.0, 1.0])
24 | p3 = np.array([1.0, 1.0])
25 | p4 = np.array([1.0, 3.0])
26 | 
27 | 
28 | fig, ax = plt.subplots()
29 | ax.plot([0.0, 0.0, 1.0, 1.0], [0.0, 1.0, 1.0, 3.0], "*--k")
30 | ax.set(xlabel="x", ylabel="y", title="Bezier curve with 4 nodes, degree 3")
31 | 
32 | b_curve = Bezier(p1, p2, p3, p4)
33 | 
34 | t = np.linspace(0, 1)
35 | v = b_curve(t)
36 | 
37 | ax.plot(v[0,:], v[1, :], "k")
38 | plt.show()


--------------------------------------------------------------------------------
/Chapter 08/finding-edges-in-images.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from skimage.io import imread
 3 | from skimage.feature import canny
 4 | 
 5 | image = imread("mandelbrot.png", as_gray=True)
 6 | 
 7 | edges = canny(image, sigma=0.5)
 8 | 
 9 | fig, ax = plt.subplots()
10 | ax.imshow(edges, cmap="gray_r")
11 | ax.set_axis_off()
12 | 
13 | 
14 | plt.show()


--------------------------------------------------------------------------------
/Chapter 08/finding-interior-points.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as mpl
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from shapely.geometry import Polygon, Point
 5 | 
 6 | polygon = Polygon(
 7 |     [(0, 2), (-1, 1), (-0.5, -1), (0.5, -1), (1, 1)],
 8 | )
 9 | 
10 | fig, ax = plt.subplots()
11 | poly_patch = mpl.patches.Polygon(polygon.exterior.coords, ec=(0,0,0,1), fc=(0.5,0.5,0.5,0.4))
12 | ax.add_patch(poly_patch)
13 | ax.set(xlim=(-1.05, 1.05), ylim=(-1.05, 2.05))
14 | ax.set_axis_off()
15 | 
16 | p1 = Point(0.0, 0.0)
17 | p2 = Point(-1.0, -0.75)
18 | 
19 | ax.plot(0.0, 0.0, "k*")
20 | ax.annotate("p1", (0.0, 0.0), (0.05, 0.0))
21 | ax.plot(-0.8, -0.75, "k*")
22 | ax.annotate("p2", (-0.8, -0.75), (-0.8 + 0.05, -0.75))
23 | 
24 | plt.show()
25 | 
26 | print("p1 inside polygon?", polygon.contains(p1))
27 | print("p2 inside polygon?", polygon.contains(p2))
28 | 


--------------------------------------------------------------------------------
/Chapter 08/mandelbrot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Applying-Math-with-Python-2nd-Edition/76a99ce637d8f97390682ff72e64b8e7146280f2/Chapter 08/mandelbrot.png


--------------------------------------------------------------------------------
/Chapter 08/triangulating-polygonal-regions.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as mpl
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | from shapely.geometry import Polygon
 6 | from shapely.ops import triangulate
 7 | 
 8 | polygon = Polygon(
 9 |     [(2.0, 1.0), (2.0, 1.5), (-4.0, 1.5), (-4.0, 0.5), (-3.0, -1.5),
10 |      (0.0, -1.5), (1.0, -2.0), (1.0, -0.5), (0.0, -1.0), (-0.5, -1.0),
11 |      (-0.5, 1.0)],
12 |     holes=[np.array([[-1.5, -0.5], [-1.5, 0.5], [-2.5, 0.5], [-2.5, -0.5]])]
13 | )
14 | 
15 | fig, ax = plt.subplots()
16 | plt_poly = mpl.patches.Polygon(polygon.exterior.coords, ec=(0,0,0,1), fc=(0.5,0.5,0.5,0.4), zorder=0)
17 | ax.add_patch(plt_poly)
18 | plt_hole = mpl.patches.Polygon(polygon.interiors[0].coords, ec="k", fc="w")
19 | ax.add_patch(plt_hole)
20 | ax.set(xlim=(-4.05, 2.05), ylim=(-2.05, 1.55))
21 | ax.set_axis_off()
22 | 
23 | 
24 | triangles = triangulate(polygon)
25 | 
26 | filtered = filter(lambda p: polygon.contains(p), triangles)
27 | 
28 | patches = map(lambda p: mpl.patches.Polygon(p.exterior.coords), filtered)
29 | col = mpl.collections.PatchCollection(patches, fc="none", ec="k")
30 | 
31 | ax.add_collection(col)
32 | 
33 | plt.show()
34 | 


--------------------------------------------------------------------------------
/Chapter 08/visualizing-two-dimensional-geometric-figures.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from matplotlib.patches import Circle
 4 | from matplotlib.collections import PatchCollection
 5 | 
 6 | data = np.loadtxt("swisscheese-grid-10411.csv")
 7 | 
 8 | fig, ax = plt.subplots()
 9 | 
10 | outer = Circle((0.0, 0.0), 1.0, zorder=0, fc="k")
11 | ax.add_patch(outer)
12 | 
13 | 
14 | col = PatchCollection(
15 |     (Circle((x, y), r) for x, y, r in data),
16 |     facecolor="white", zorder=1, linewidth=0.2,
17 |     ls="-", ec="k"
18 | )
19 | ax.add_collection(col)
20 | 
21 | ax.set_xlim((-1.1, 1.1))
22 | ax.set_ylim((-1.1, 1.1))
23 | ax.set_axis_off()
24 | 
25 | 
26 | plt.show()


--------------------------------------------------------------------------------
/Chapter 09/analyzing-simple-two-player-games.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import nashpy as nash
 3 | 
 4 | you = np.array([[1, 3], [1, 4]])
 5 | colleague = np.array([[3, 2], [2, 2]])
 6 | dilemma = nash.Game(you, colleague)
 7 | 
 8 | 
 9 | print(dilemma[[1, 0], [1, 0]])  # [1 3]
10 | print(dilemma[[1, 0], [0, 1]])  # [3 2]
11 | print(dilemma[[0, 1], [1, 0]])  # [1 2]
12 | print(dilemma[[0, 1], [0, 1]])  # [4 2]
13 | 
14 | 
15 | print(dilemma[[0.1, 0.9], [0.5, 0.5]])  # [2.45 2.05]


--------------------------------------------------------------------------------
/Chapter 09/computing-nash-equilibria.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import nashpy as nash
 3 | 
 4 | rps_p1 = np.array([
 5 |     [ 0, -1,  1],  # rock payoff
 6 |     [ 1,  0, -1],  # papper payoff
 7 |     [-1,  1,  0]   # scissors payoff
 8 | ])
 9 | 
10 | rps_p2 = rps_p1.transpose()
11 | 
12 | rock_paper_scissors = nash.Game(rps_p1, rps_p2)
13 | 
14 | equilibria = rock_paper_scissors.support_enumeration()
15 | 
16 | for p1, p2 in equilibria:
17 |     print("Player 1", p1)
18 |     print("Player 2", p2)


--------------------------------------------------------------------------------
/Chapter 09/minimising-a-non-linear-system.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from mpl_toolkits.mplot3d import Axes3D
 4 | from scipy import optimize
 5 | 
 6 | 
 7 | def func(x):
 8 |     return ((x[0] - 0.5)**2 + (x[1] + 0.5)**2)*np.cos(0.5*x[0]*x[1])
 9 | 
10 | x_r = np.linspace(-1, 1)
11 | y_r = np.linspace(-2, 2)
12 | 
13 | x, y = np.meshgrid(x_r, y_r)
14 | 
15 | z = func([x, y])
16 | 
17 | 
18 | fig = plt.figure(tight_layout=True)
19 | ax = fig.add_subplot(projection="3d")
20 | ax.tick_params(axis="both", which="major", labelsize=9)
21 | ax.set(xlabel="x", ylabel="y", zlabel="z")
22 | ax.set_title("Objective function")
23 | 
24 | ax.plot_surface(x, y, z, cmap="gray", vmax=8.0, alpha=0.5)
25 | 
26 | x0 = np.array([-0.5, 1.0])
27 | ax.plot([x0[0]], [x0[1]], func(x0), "k*")
28 | 
29 | 
30 | result = optimize.minimize(func, x0, tol=1e-6, method="Nelder-Mead")
31 | print(result)
32 | 
33 | ax.plot([result.x[0]], [result.x[1]], [result.fun], "kx")
34 | 
35 | plt.show()
36 | 


--------------------------------------------------------------------------------
/Chapter 09/minimising-simple-linear-systems.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import optimize
 3 | import matplotlib.pyplot as plt
 4 | from mpl_toolkits.mplot3d import Axes3D
 5 | 
 6 | 
 7 | A = np.array([
 8 |     [2, 1],     # 2*x0 + x1 <= 6
 9 |     [-1, -1]    # -x0 - x1 <= -4
10 | ])
11 | b = np.array([6, -4])
12 | 
13 | 
14 | x0_bounds = (-3, 14)  # -3 <= x0 <= 14
15 | x1_bounds = (2, 12)   #  2 <= x1 <= 12
16 | 
17 | c = np.array([1, 5])
18 | 
19 | 
20 | def func(x):
21 |     return np.tensordot(c, x, axes=1)
22 | 
23 | 
24 | fig = plt.figure()
25 | ax = fig.add_subplot(projection="3d")
26 | ax.set(xlabel="x0", ylabel="x1", zlabel="func")
27 | ax.set_title("Values in feasible region")
28 | 
29 | X0 = np.linspace(*x0_bounds)
30 | X1 = np.linspace(*x1_bounds)
31 | x0, x1 = np.meshgrid(X0, X1)
32 | z = func([x0, x1])
33 | 
34 | ax.plot_surface(x0, x1, z, cmap="gray", vmax=100.0, alpha=0.3)
35 | 
36 | 
37 | Y = (b[0] - A[0, 0]*X0) / A[0, 1]
38 | I = np.logical_and(Y >= x1_bounds[0], Y <= x1_bounds[1])
39 | ax.plot(X0[I], Y[I], func([X0[I], Y[I]]), "k", lw=1.5, alpha=0.6)
40 | 
41 | Y = (b[1] - A[1, 0]*X0) / A[1, 1]
42 | I = np.logical_and(Y >= x1_bounds[0], Y <= x1_bounds[1])
43 | ax.plot(X0[I], Y[I], func([X0[I], Y[I]]), "k", lw=1.5, alpha=0.6)
44 | 
45 | 
46 | B = np.tensordot(A, np.array([x0, x1]), axes=1)
47 | II = np.logical_and(B[0, ...] <= b[0], B[1, ...] <= b[1])
48 | ax.plot_trisurf(x0[II], x1[II], z[II], color="k", alpha=0.5)
49 | 
50 | 
51 | res = optimize.linprog(c, A_ub=A, b_ub=b, bounds=(x0_bounds, x1_bounds))
52 | print(res)
53 | 
54 | ax.plot([res.x[0]], [res.x[1]], [res.fun], "kx")
55 | 
56 | 
57 | plt.show()
58 | 


--------------------------------------------------------------------------------
/Chapter 09/using-gradient-descent-methods.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from mpl_toolkits.mplot3d import Axes3D
 4 | 
 5 | def descend(func, x0, grad, bounds, tol=1e-8, max_iter=100):
 6 |     xn = x0
 7 |     previous = np.inf
 8 |     grad_xn = grad(x0)
 9 | 
10 |     for i in range(max_iter):
11 | 
12 |         if np.linalg.norm(xn - previous) < tol:
13 |             break
14 | 
15 |         direction = -grad_xn
16 | 
17 |         previous = xn
18 |         xn = xn + 0.2*direction
19 |         grad_xn = grad(xn)
20 |         yield i, xn, func(xn), grad_xn
21 | 
22 | 
23 | def func(x):
24 |     return ((x[0] - 0.5)**2 + (x[1] + 0.5)**2)*np.cos(0.5*x[0]*x[1])
25 | 
26 | 
27 | x_r = np.linspace(-1, 1)
28 | y_r = np.linspace(-2, 2)
29 | 
30 | x, y = np.meshgrid(x_r, y_r)
31 | 
32 | z = func([x, y])
33 | 
34 | 
35 | surf_fig = plt.figure(tight_layout=True)
36 | surf_ax = surf_fig.add_subplot(projection="3d")
37 | surf_ax.tick_params(axis="both", which="major", labelsize=9)
38 | surf_ax.set(xlabel="x", ylabel="y", zlabel="z")
39 | surf_ax.set_title("Objective function")
40 | 
41 | surf_ax.plot_surface(x, y, z, cmap="gray", vmax=8.0, alpha=0.5)
42 | 
43 | 
44 | x0 = np.array([-0.8, 1.3])
45 | surf_ax.plot([x0[0]], [x0[1]], func(x0), "k*")
46 | 
47 | def grad(x):
48 |     c1 = x[0]**2 - x[0] + x[1]**2 + x[1] + 0.5
49 |     cos_t = np.cos(0.5*x[0]*x[1])
50 |     sin_t = np.sin(0.5*x[0]*x[1])
51 |     return np.array([
52 |         (2*x[0]-1)*cos_t - 0.5*x[1]*c1*sin_t,
53 |         (2*x[1]+1)*cos_t - 0.5*x[0]*c1*sin_t
54 |     ])
55 | 
56 | 
57 | cont_fig, cont_ax = plt.subplots()
58 | cont_ax.set(xlabel="x", ylabel="y")
59 | cont_ax.set_title("Contour plot with iterates")
60 | cont_ax.contour(x, y, z, levels=25, cmap="gray", vmax=8.0, opacity=0.6)
61 | 
62 | bounds = ((-1, 1), (-2, 2))
63 | 
64 | xnm1 = x0
65 | for i, xn, fxn, grad_xn in descend(func, x0, grad, bounds):
66 |     cont_ax.plot([xnm1[0], xn[0]], [xnm1[1], xn[1]], "k*--")
67 |     xnm1, grad_xnm1 = xn, grad_xn
68 | 
69 | print(f"iterations={i}")
70 | print(f"min val at {xn}")
71 | print(f"min func value = {fxn}")
72 | 
73 | 
74 | plt.show()
75 | 


--------------------------------------------------------------------------------
/Chapter 09/using-least-squares-to-fit-a-curve-to-data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | from numpy.random import default_rng
 5 | rng = default_rng(12345)
 6 | 
 7 | from scipy.optimize import curve_fit
 8 | 
 9 | 
10 | SIZE = 100
11 | x_data = rng.uniform(-3.0, 3.0, size=SIZE)
12 | noise = rng.normal(0.0, 0.8, size=SIZE)
13 | 
14 | y_data = 2.0*x_data**2 - 4*x_data + noise
15 | 
16 | fig, ax = plt.subplots()
17 | ax.scatter(x_data, y_data, marker="x", color="k", alpha=0.5)
18 | ax.set(xlabel="x", ylabel="y", title="Scatter plot of sample data")
19 | 
20 | 
21 | def func(x, a, b, c):
22 |     return a*x**2 + b*x + c
23 | 
24 | coeffs, _ = curve_fit(func, x_data, y_data)
25 | print(coeffs)
26 | # [ 1.99611157 -3.97522213  0.04546998]
27 | 
28 | x = np.linspace(-3.0, 3.0, SIZE)
29 | y = func(x, coeffs[0], coeffs[1], coeffs[2])
30 | ax.plot(x, y, "k")
31 | 
32 | 
33 | plt.show()
34 | 


--------------------------------------------------------------------------------
/Chapter 10/accelerating-code-with-cython/mandelbrot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Applying-Math-with-Python-2nd-Edition/76a99ce637d8f97390682ff72e64b8e7146280f2/Chapter 10/accelerating-code-with-cython/mandelbrot/__init__.py


--------------------------------------------------------------------------------
/Chapter 10/accelerating-code-with-cython/mandelbrot/python_mandel.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def in_mandel(cx, cy, max_iter):
 4 |     x = cx
 5 |     y = cy
 6 |     for i in range(max_iter):
 7 |         x2 = x**2
 8 |         y2 = y**2
 9 |         if (x2 + y2) >= 4:
10 |             return i
11 |         y = 2.0*x*y + cy
12 |         x = x2 - y2 + cx
13 |     return max_iter
14 | 
15 | def compute_mandel(N_x, N_y, N_iter):
16 |     xlim_l = -2.5
17 |     xlim_u = 0.5
18 |     ylim_l = -1.2
19 |     ylim_u = 1.2
20 |     x_vals = np.linspace(xlim_l, xlim_u, N_x, dtype=np.float64)
21 |     y_vals = np.linspace(ylim_l, ylim_u, N_y, dtype=np.float64)
22 | 
23 |     height = np.empty((N_x, N_y), dtype=np.int64)
24 |     for i in range(N_x):
25 |         for j in range(N_y):
26 |             height[i, j] = in_mandel(x_vals[i], y_vals[j], N_iter)
27 |     return height


--------------------------------------------------------------------------------
/Chapter 10/accelerating-code-with-cython/mandelbrot/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from setuptools import setup, Extension
 3 | from Cython.Build import cythonize
 4 | 
 5 | hybrid = Extension(
 6 |     "hybrid_mandel",
 7 |     sources=["python_mandel.py"],
 8 |     include_dirs=[np.get_include()],
 9 |     define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")]
10 | )
11 | 
12 | cython = Extension(
13 |     "cython_mandel",
14 |     sources=["cython_mandel.pyx"],
15 |     include_dirs=[np.get_include()],
16 |     define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")]
17 | )
18 | 
19 | extensions = [hybrid, cython]
20 | 
21 | setup(
22 |     ext_modules = cythonize(extensions, compiler_directives={"language_level": "3"}),
23 | )


--------------------------------------------------------------------------------
/Chapter 10/accelerating-code-with-cython/run.py:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | from functools import wraps
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from mandelbrot.python_mandel import compute_mandel as compute_mandel_py
 6 | from mandelbrot.hybrid_mandel import compute_mandel as compute_mandel_hy
 7 | from mandelbrot.cython_mandel import compute_mandel as compute_mandel_cy
 8 | 
 9 | def timer(func, name):
10 |     @wraps(func)
11 |     def wrapper(*args, **kwargs):
12 |         t_start = time()
13 |         val = func(*args, **kwargs)
14 |         t_end = time()
15 |         print(f"Time taken for {name}: {t_end - t_start}")
16 |         return val
17 |     return wrapper
18 | 
19 | mandel_py = timer(compute_mandel_py, "Python")
20 | mandel_hy = timer(compute_mandel_hy, "Hybrid")
21 | mandel_cy = timer(compute_mandel_cy, "Cython")
22 | 
23 | Nx = 320
24 | Ny = 240
25 | steps = 255
26 | 
27 | mandel_py(Nx, Ny, steps)
28 | mandel_hy(Nx, Ny, steps)
29 | vals = mandel_cy(Nx, Ny, steps)
30 | 
31 | fig, ax = plt.subplots()
32 | ax.imshow(vals.T, extent=(-2.5, 0.5, -1.2, 1.2), cmap="Greys")
33 | 
34 | plt.show()
35 | 


--------------------------------------------------------------------------------
/Chapter 10/accouting-for-uncertainty-in-calculations.py:
--------------------------------------------------------------------------------
 1 | from uncertainties import ufloat, umath
 2 | 
 3 | seconds = ufloat(3.0, 0.4)
 4 | print(seconds)  # 3.0+/-0.4
 5 | 
 6 | 
 7 | depth = 0.5*9.81*seconds*seconds
 8 | print(depth)  # 44+/-12
 9 | 
10 | other_depth = ufloat(44, 12)
11 | time = umath.sqrt(2.0*other_depth/9.81)
12 | print("Estimated time", time)
13 | # Estimated time 3.0+/-0.4
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter 10/distributing-computations-with-dask.py:
--------------------------------------------------------------------------------
 1 | import dask.dataframe as dd
 2 | 
 3 | 
 4 | data = dd.read_csv("sample.csv", dtype={"number": "object"})
 5 | print(data.head())
 6 | 
 7 | 
 8 | sum_data = data.lower + data.upper
 9 | print(sum_data)
10 | 
11 | result = sum_data.compute()
12 | print(result.head())
13 | 
14 | 
15 | means = data[["lower", "upper"]].mean().compute()
16 | print(means)
17 | 


--------------------------------------------------------------------------------
/Chapter 10/keeping-track-of-units-with-pint.py:
--------------------------------------------------------------------------------
 1 | import pint
 2 | 
 3 | ureg = pint.UnitRegistry(system="mks")
 4 | 
 5 | 
 6 | distance = 5280 * ureg.feet
 7 | print(distance.to("miles"))
 8 | print(distance.to_base_units())
 9 | print(distance.to_base_units().to_compact())
10 | 
11 | @ureg.wraps(ureg.meter, ureg.second)
12 | def calc_depth(dropping_time):
13 |     # s = u*t + 0.5*a*t*t
14 |     # u = 0, a = 9.81
15 |     return 0.5*9.81*dropping_time*dropping_time
16 | 
17 | 
18 | depth = calc_depth(0.05 * ureg.minute)
19 | print("Depth", depth)
20 | # Depth 44.144999999999996 meter


--------------------------------------------------------------------------------
/Chapter 10/loading-and-storing-data-from-netcdf.py:
--------------------------------------------------------------------------------
 1 | import xarray as xr
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from numpy.random import default_rng
 7 | rng = default_rng(12345)
 8 | 
 9 | dates = pd.date_range("2020-01-01", periods=365, name="date")
10 | locations = list(range(25))
11 | steps = rng.normal(0, 1, size=(365,25))
12 | accumulated = np.add.accumulate(steps)
13 | 
14 | data_array = xr.Dataset({
15 |     "steps": (("date", "location"), steps),
16 |     "accumulated": (("date", "location"), accumulated)
17 |     },
18 |     {"location": locations, "date": dates}
19 | )
20 | 
21 | print(data_array)
22 | # <xarray.Dataset>
23 | # Dimensions:      (date: 365, location: 25)
24 | # Coordinates:
25 | #   * location     (location) int64 0 1 2 3 4 5 6 7 8 ... 17 18 19 20 21 22 23 24
26 | #   * date         (date) datetime64[ns] 2020-01-01 2020-01-02 ... 2020-12-30
27 | # Data variables:
28 | #     steps        (date, location) float64 -1.424 1.264 ... -0.4547 -0.4873
29 | #     accumulated  (date, location) float64 -1.424 1.264 -0.8707 ... 8.935 -3.525
30 | 
31 | means = data_array.mean(dim="location")
32 | 
33 | fig, ax = plt.subplots(tight_layout=True)
34 | means["accumulated"].to_dataframe().plot(ax=ax, color="k")
35 | ax.set(title="Mean accumulated values", xlabel="date", ylabel="value")
36 | 
37 | plt.show()
38 | 
39 | data_array.to_netcdf("data.nc")
40 | 
41 | new_data = xr.load_dataset("data.nc")
42 | print(new_data)
43 | # <xarray.Dataset>
44 | # Dimensions:      (date: 365, location: 25)
45 | # Coordinates:
46 | #   * location     (location) int64 0 1 2 3 4 5 6 7 8 ... 17 18 19 20 21 22 23 24
47 | #   * date         (date) datetime64[ns] 2020-01-01 2020-01-02 ... 2020-12-30
48 | # Data variables:
49 | #     steps        (date, location) float64 -1.424 1.264 ... -0.4547 -0.4873
50 | #     accumulated  (date, location) float64 -1.424 1.264 -0.8707 ... 8.935 -3.525
51 | 


--------------------------------------------------------------------------------
/Chapter 10/sample.csv:
--------------------------------------------------------------------------------
  1 | id,number,lower,upper
  2 | row0,0,-0.5453279550656607,-0.36648332058049427
  3 | row1,1,0.5947309146654682,0.3525093415019491
  4 | row2,2,-0.217780898796182,-0.33437214426723094
  5 | row3,3,0.19661750717437965,-0.6265316287925733
  6 | row4,4,0.3455120880292426,0.8836057305398743
  7 | row5,5,-0.503508570740858,0.8977623036666365
  8 | row6,6,0.3344749062007448,-0.8082041288117758
  9 | row7,7,-0.11632066766437443,0.7729598386550354
 10 | row8,8,0.3949069997640442,-0.3470542718597758
 11 | row9,9,0.467856326660133,-0.5597300889090275
 12 | row10,10,-0.8368108609155838,-0.680208797849905
 13 | row11,11,-1.3197996300905894,-0.06961369259589811
 14 | row12,12,-0.46715794341845807,0.6315528068496139
 15 | row13,13,-0.613411221421011,-0.7410618476455995
 16 | row14,14,-0.8166704969101282,0.19713602732982638
 17 | row15,15,0.7094838087480027,0.20324248338742623
 18 | row16,16,0.863976722271967,0.44956272218404014
 19 | row17,17,0.7211026347865848,0.8586756031506326
 20 | row18,None,0.09237201816470608,0.8753459175355138
 21 | row19,19,-0.010024119842351409,-0.452453635020025
 22 | row20,20,-0.09644258505047865,0.3300778467990606
 23 | row21,21,-0.3382181390658907,0.8069080136164781
 24 | row22,22,-0.4858516494469314,-0.32034332477936034
 25 | row23,23,-0.48229320271414533,-0.28910704011142796
 26 | row24,24,-0.9899553325657364,0.2572090881993574
 27 | row25,25,-0.43523458514976343,-0.8638246210241085
 28 | row26,26,0.23365795451276106,-0.6473473594375931
 29 | row27,27,-0.3912232255608208,-0.11822637824776394
 30 | row28,28,-0.6995953178745984,-0.56414227382913
 31 | row29,29,-0.051333769332911006,-0.04726228983761627
 32 | row30,30,-0.48953529236099946,-0.40486946370390386
 33 | row31,31,-0.4418657603724667,-0.4788415750174049
 34 | row32,32,-0.034476814401368516,-1.5760419272969788
 35 | row33,33,-0.008738806653918685,-0.5074773483385249
 36 | row34,34,0.6769653049338895,-0.6397388198099299
 37 | row35,35,0.7243125830184729,-0.6434011103096251
 38 | row36,36,0.5010626638744882,0.2222408076611304
 39 | row37,37,-0.5816899301427854,0.5197448422479904
 40 | row38,38,-0.501478860930175,-0.828856536026884
 41 | row39,39,0.2361134446361819,0.07393666206467109
 42 | row40,40,0.2690534224305514,-0.6512517826172235
 43 | row41,41,-0.5036710202870951,0.36964596927879834
 44 | row42,42,-0.838256707498185,0.7501472015122523
 45 | row43,43,-0.14261123692001632,0.23678839079475567
 46 | row44,44,-0.3737889916297603,-0.6420742894142648
 47 | row45,45,-0.9805757444090948,-0.579914083103094
 48 | row46,46,0.7400013575433042,0.9456596047951173
 49 | row47,47,-0.11641531361779522,-0.2425010103814933
 50 | row48,48,-0.4481058374636997,0.9322082184688836
 51 | row49,49,-0.883594789468312,-0.1825322022762903
 52 | row50,50,-0.6627423109430279,-0.5197118831966243
 53 | row51,51,0.5600157126684238,-0.5924648076887753
 54 | row52,52,0.10410190952324339,-0.26601171589666506
 55 | row53,53,0.014563442784713665,-0.33312440660742837
 56 | row54,54,-0.4345566517842432,-0.43633939738958594
 57 | row55,55,-0.8292374157574245,-0.03637268843862951
 58 | row56,56,0.7666857890291086,0.8944555320200425
 59 | row57,57,-0.9452325561457624,0.8355044890051131
 60 | row58,58,-0.7569509410291808,0.4956955195716175
 61 | row59,59,0.7930414840993898,-0.6641404001256874
 62 | row60,60,-0.3370735614663771,-0.2436867487683949
 63 | row61,61,-0.306302084651618,0.03251140223072535
 64 | row62,62,-0.9820119452573393,-0.15464359103523306
 65 | row63,63,1.7553154590232336,-0.8251896942950534
 66 | row64,64,-0.031830365083926226,-0.03754454531893958
 67 | row65,65,0.5651429806397303,0.9291191502767817
 68 | row66,66,0.4141928832710864,-0.45252655816900744
 69 | row67,67,0.34022660093971635,-0.3049303945035231
 70 | row68,68,0.5362556721098188,0.3515428325049723
 71 | row69,69,0.9550640560545192,0.7334195791892422
 72 | row70,70,-0.9077839707081079,-0.4193525738247359
 73 | row71,71,0.7247821953781601,0.20169566808153072
 74 | row72,72,-0.3114840830298182,-0.8887948464221092
 75 | row73,73,0.5257453399641963,-0.9969624245218904
 76 | row74,74,-0.814312921521728,0.9870083196112802
 77 | row75,75,0.5520261544573772,0.7365417258524953
 78 | row76,76,-0.04568349682654338,-0.6623027191553501
 79 | row77,77,0.06795464229033121,-0.1483362463468949
 80 | row78,78,-0.6739408067896355,-0.6682448575985709
 81 | row79,79,-0.48369966334835657,0.8861831397315372
 82 | row80,80,0.9736138836952428,0.40733282972103124
 83 | row81,81,0.4463148267419048,0.4649956144843359
 84 | row82,82,0.03479103702911046,-0.6451473834892649
 85 | row83,83,0.7552924138551387,0.7607807114991783
 86 | row84,84,0.419069105579164,0.8668576143141729
 87 | row85,85,0.9599963207988473,0.006951564264227272
 88 | row86,86,0.5151390139169887,0.23323704687810198
 89 | row87,87,-0.7702218635789513,-0.36802303975273887
 90 | row88,88,-0.8645322049402049,0.7737423562224535
 91 | row89,89,-0.9558592022501617,0.07457340680881064
 92 | row90,90,-0.6447757870185391,-0.7060101812706867
 93 | row91,91,-0.6683110398703005,0.7539079578178016
 94 | row92,92,-0.011059748841883543,-0.31819900933172107
 95 | row93,93,0.9984196517634787,0.09194979150319749
 96 | row94,94,-0.39333774878924466,-0.25869659832069414
 97 | row95,95,0.3917337561065648,-0.7595068627225243
 98 | row96,96,0.5164423763375303,0.7675352136978346
 99 | row97,97,-0.04973771044592468,-0.1253905900675356
100 | row98,98,-0.7137704534622713,-0.15451548318042696
101 | row99,99,0.5288233887583287,-0.1720334171281932
102 | 


--------------------------------------------------------------------------------
/Chapter 10/sample.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Sample Jupyter notebook\n",
 8 |     "This is a sample notebook."
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "metadata": {},
15 |    "outputs": [],
16 |    "source": [
17 |     "import matplotlib.pyplot as plt\n",
18 |     "from numpy.random import default_rng\n",
19 |     "rng = default_rng(12345)"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": [
28 |     "uniform_data = rng.uniform(-5, 5, size=(2, 100))"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": null,
34 |    "metadata": {},
35 |    "outputs": [],
36 |    "source": [
37 |     "fig, ax = plt.subplots(tight_layout=True)\n",
38 |     "ax.scatter(uniform_data[0, :], uniform_data[1, :], color=\"k\")\n",
39 |     "ax.set(title=\"Scatter plot\", xlabel=\"x\", ylabel=\"y\")"
40 |    ]
41 |   }
42 |  ],
43 |  "metadata": {
44 |   "kernelspec": {
45 |    "display_name": "Python 3 (ipykernel)",
46 |    "language": "python",
47 |    "name": "python3"
48 |   },
49 |   "language_info": {
50 |    "codemirror_mode": {
51 |     "name": "ipython",
52 |     "version": 3
53 |    },
54 |    "file_extension": ".py",
55 |    "mimetype": "text/x-python",
56 |    "name": "python",
57 |    "nbconvert_exporter": "python",
58 |    "pygments_lexer": "ipython3",
59 |    "version": "3.10.1+"
60 |   }
61 |  },
62 |  "nbformat": 4,
63 |  "nbformat_minor": 2
64 | }
65 | 


--------------------------------------------------------------------------------
/Chapter 10/validating-data.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import cerberus
 3 | 
 4 | float_schema = {"type": "float", "coerce": float, "min": -1.0, "max": 1.0}
 5 | 
 6 | item_schema = {
 7 |     "type": "dict",
 8 |     "schema": {
 9 |         "id": {"type": "string"},
10 |         "number": {"type": "integer", "coerce": int},
11 |         "lower": float_schema,
12 |         "upper": float_schema,
13 |     }
14 | }
15 | 
16 | schema = {
17 |     "rows": {
18 |         "type": "list",
19 |         "schema": item_schema
20 |     }
21 | }
22 | 
23 | 
24 | validator = cerberus.Validator(schema)
25 | 
26 | 
27 | with open("sample.csv") as f:
28 |     dr = csv.DictReader(f)
29 |     document = {"rows": list(dr)}
30 | 
31 | 
32 | validator.validate(document)
33 | 
34 | errors = validator.errors["rows"][0]
35 | 
36 | for row_n, errs in errors.items():
37 |     print(f"row {row_n}: {errs}")
38 | 
39 | 


--------------------------------------------------------------------------------
/Chapter 10/working-with-geographical-data.py:
--------------------------------------------------------------------------------
 1 | import geopandas
 2 | import geoplot
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | world = geopandas.read_file(
 6 |         geopandas.datasets.get_path("naturalearth_lowres")
 7 | )
 8 | 
 9 | cities = geopandas.read_file(
10 |         geopandas.datasets.get_path("naturalearth_cities")
11 | )
12 | 
13 | fig, ax = plt.subplots()
14 | geoplot.polyplot(world, ax=ax, alpha=0.7)
15 | 
16 | 
17 | geoplot.pointplot(cities, ax=ax, fc="k", marker="2")
18 | ax.axis((-180, 180, -90, 90))
19 | 
20 | 
21 | plt.show()
22 | 


--------------------------------------------------------------------------------
/Chapter 10/writing-reproducible-code-for-data-science.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.tree import DecisionTreeClassifier
 9 | 
10 | rng = np.random.default_rng(12345)
11 | 
12 | 
13 | def get_data():
14 |     permute = rng.permutation(200)
15 | 
16 |     data = np.vstack([
17 |         rng.normal((1.0, 2.0, -3.0), 1.0, size=(50, 3)),
18 |         rng.normal((-1.0, 1.0, 1.0), 1.0, size=(50, 3)),
19 |         rng.normal((0.0, -1.0, -1.0), 1.0, size=(50, 3)),
20 |         rng.normal((-1.0, -1.0, -2.0), 1.0, size=(50, 3))
21 |     ])
22 |     labels = np.hstack([
23 |         [1]*50, [2]*50, [3]*50, [4]*50
24 |     ])
25 | 
26 |     X = pd.DataFrame(np.take(data, permute, axis=0), columns=["A", "B", "C"])
27 |     y = pd.Series(np.take(labels, permute, axis=0))
28 |     return X, y
29 | 
30 | 
31 | data, labels = get_data()
32 | 
33 | data.to_csv("data.csv")
34 | labels.to_csv("labels.csv")
35 | 
36 | X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=23456)
37 | 
38 | print(X_train.index.size, X_test.index.size)
39 | 
40 | X_train.index.to_series().to_csv("train_index.csv", index=False, header=False)
41 | X_test.index.to_series().to_csv("test_index.csv", index=False, header=False)
42 | 
43 | classifier  = DecisionTreeClassifier(random_state=34567)
44 | classifier.fit(X_train, y_train)
45 | 
46 | feature_importance = pd.DataFrame(classifier.feature_importances_, index=classifier.feature_names_in_, columns=["Importance"])
47 | feature_importance.to_csv("feature_importance.csv")
48 | 
49 | train_predictions = classifier.predict(X_train)
50 | test_predictions = classifier.predict(X_test)
51 | 
52 | pd.Series(train_predictions, index=X_train.index, name="Predicted labels").to_csv("train_predictions.csv")
53 | pd.Series(test_predictions, index=X_test.index, name="Predicted labels").to_csv("test_predictions.csv")
54 | 
55 | fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True)
56 | ax1.set_title("Confusion matrix for training data")
57 | ax2.set_title("Confusion matrix for test data")
58 | ConfusionMatrixDisplay.from_predictions(y_train, train_predictions, ax=ax1, cmap="Greys", colorbar=False)
59 | ConfusionMatrixDisplay.from_predictions(y_test, test_predictions, ax=ax2, cmap="Greys", colorbar=False)
60 | 
61 | print(f"Train accuracy {accuracy_score(y_train, train_predictions)}",
62 |       f"Test accuracy {accuracy_score(y_test, test_predictions)}", sep="\n")
63 | 
64 | plt.show()


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### [Packt Conference : Put Generative AI to work on Oct 11-13 (Virtual)](https://packt.link/JGIEY)
 3 | 
 4 | <b><p align='center'>[![Packt Conference](https://hub.packtpub.com/wp-content/uploads/2023/08/put-generative-ai-to-work-packt.png)](https://packt.link/JGIEY)</p></b> 
 5 | 3 Days, 20+ AI Experts, 25+ Workshops and Power Talks 
 6 | 
 7 | Code: <b>USD75OFF</b>
 8 | 
 9 | 
10 | 
11 | 
12 | # Applying Math with Python - Second Edition
13 | <a href="https://www.packtpub.com/product/applying-math-with-python-second-edition/9781804618370"><img src="https://static.packt-cdn.com/products/9781804618370/cover/smaller" alt="Applying Math with Python - Second Edition" height="256px" align="right"></a>
14 | 
15 | This is the code repository for [Applying Math with Python - Second Edition](https://www.packtpub.com/product/applying-math-with-python-second-edition/9781804618370), published by Packt.
16 | 
17 | **Over 70 practical recipes for solving real-world computational math problems**
18 | 
19 | ## What is this book about?
20 | The updated edition of Applying Math with Python will help you solve complex problems in a wide variety of mathematical fields in simple and efficient ways. Old recipes have been revised for new libraries and several recipes have been added to demonstrate new tools such as JAX.
21 | You'll start by refreshing your knowledge of several core mathematical fields and learn about packages covered in Python's scientific stack, including NumPy, SciPy, and Matplotlib. As you progress, you'll gradually get to grips with more advanced topics of calculus, probability, and networks (graph theory). Once you’ve developed a solid base in these topics, you’ll have the confidence to set out on math adventures with Python as you explore Python's applications in data science and statistics, forecasting, geometry, and optimization. The final chapters will take you through a collection of miscellaneous problems, including working with specific data formats and accelerating code.
22 | By the end of this book, you'll have an arsenal of practical coding solutions that can be used and modified to solve a wide range of practical problems in computational mathematics and data science.
23 | 
24 | This book covers the following exciting features: 
25 | * Become familiar with basic Python packages, tools, and libraries for solving mathematical problems
26 | * Explore real-world applications of mathematics to reduce a problem in optimization
27 | * Understand the core concepts of applied mathematics and their application in computer science
28 | * Find out how to choose the most suitable package, tool, or technique to solve a problem
29 | * Implement basic mathematical plotting, change plot styles, and add labels to plots using Matplotlib
30 | * Get to grips with probability theory with the Bayesian inference and Markov Chain Monte Carlo (MCMC) methods
31 | 
32 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1804618373) today!
33 | 
34 | <a href="https://www.packtpub.com/?utm_source=github&utm_medium=banner&utm_campaign=GitHubBanner"><img src="https://raw.githubusercontent.com/PacktPublishing/GitHub/master/GitHub.png" alt="https://www.packtpub.com/" border="5" /></a>
35 | 
36 | ## Instructions and Navigations
37 | All of the code is organized into folders.
38 | 
39 | The code will look like the following:
40 | ```
41 | from decimal import getcontext
42 | ctx = getcontext()
43 | num = Decimal('1.1')
44 | num**4 # Decimal('1.4641')
45 | ctx.prec=4 # set the new precision
46 | num**4 # Decimal('1.464')
47 | ```
48 | 
49 | **Following is what you need for this book:**
50 | Whether you are a professional programmer or a student looking to solve mathematical problems computationally using Python, this is the book for you. Advanced mathematics proficiency is not a prerequisite, but basic knowledge of mathematics will help you to get the most out of this Python math book. Familiarity with the concepts of data structures in Python is assumed.
51 | 
52 | With the following software and hardware list you can run all code files present in the book (Chapter 1-10).
53 | 
54 | ### Software and Hardware List
55 | 
56 | | Chapter  | Software required                                                                    | OS required                        |
57 | | -------- | -------------------------------------------------------------------------------------| -----------------------------------|
58 | |  1-10		 | Python 3.6 or higher							                                            			  | Windows, Mac OS X, and Linux (Any) |
59 | 
60 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](http://packt.link/OxkXD).
61 | 
62 | 
63 | ### Related products <Other books you may enjoy>
64 | * Hands-On Mathematics for Deep Learning [[Packt]](https://www.packtpub.com/product/hands-on-mathematics-for-deep-learning/9781838647292) [[Amazon]](https://www.amazon.com/dp/1838647295)
65 | 
66 | * Essential Mathematics for Quantum Computing [[Packt]](https://www.packtpub.com/product/essential-mathematics-for-quantum-computing/9781801073141?_ga=2.176321189.1855619319.1669697710-1347501151.1654864057) [[Amazon]](https://www.amazon.com/dp/1801073147)
67 | 
68 | ## Get to Know the Author
69 | **Sam Morley**  is an experienced lecturer in mathematics and a researcher in pure mathematics. He is currently a research software engineer at the University of Oxford working on the DataSig project. He was previously a lecturer in mathematics at the University of East Anglia and Nottingham Trent University. His research interests lie in functional analysis, especially Banach algebras. Sam has a firm commitment to providing high-quality, inclusive, and enjoyable teaching, with the aim of inspiring his students and spreading his enthusiasm for mathematics.
70 | 
71 | ### Download a free PDF
72 | 
73 |  <i>If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.<br>Simply click on the link to claim your free PDF.</i>
74 | <p align="center"> <a href="https://packt.link/free-ebook/9781804618370">https://packt.link/free-ebook/9781804618370 </a> </p>
75 | 


--------------------------------------------------------------------------------