├── LICENSE
├── README.md
├── chapter_01
    └── c01_sklearn.py
├── chapter_02
    ├── birthday.py
    ├── boston.py
    └── coin_flips.py
├── chapter_03
    ├── continuous.py
    ├── discrete.py
    └── ricky.py
├── chapter_04
    ├── correlation.py
    ├── exams.npy
    ├── hypothesis.py
    ├── missing.py
    └── quantiles.py
├── chapter_05
    ├── matrixmul.py
    └── numpy_matmul.py
├── chapter_06
    ├── bc_mahalanobis.py
    ├── iris_pca.py
    └── kl_divergence.py
├── chapter_07
    └── c07_figure.py
├── chapter_08
    ├── newton_1d.py
    ├── newton_2d.py
    └── spiral.py
├── chapter_09
    ├── c09_nn.py
    └── convolve_example.py
├── chapter_10
    ├── NN.py
    ├── build_dataset.py
    ├── iris.py
    ├── mnist.py
    └── nn_by_hand.py
├── chapter_11
    ├── NN.py
    ├── NNm.py
    ├── fmnist.py
    ├── fmnist_analyze.py
    ├── fmnist_momentum.py
    ├── fmnist_no_momentum_runs.npy
    ├── fmnist_repeat.py
    ├── fmnist_w_momentum_runs.npy
    ├── gd_1d.py
    ├── gd_1d_momentum.py
    ├── gd_2d.py
    ├── gd_momentum.py
    ├── gd_multiple.py
    └── gd_nesterov.py
├── dataset
    ├── cifar10_test_images.npy
    ├── cifar10_test_labels.npy
    ├── fmnist_test_images_small.npy
    ├── fmnist_test_labels.npy
    ├── fmnist_train_images_small.npy
    ├── fmnist_train_labels.npy
    ├── fmnist_train_labels_vector.npy
    ├── test_images_full.npy
    ├── test_images_small.npy
    ├── test_labels.npy
    ├── train_images_full.npy
    ├── train_images_small.npy
    ├── train_labels.npy
    └── train_labels_vector.npy
└── tutorial.pdf


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 rkneusel9
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MathForDeepLearning
 2 | Source code for the book "Math for Deep Learning"
 3 | 
 4 | Source code is organized by chapter.  If you have questions
 5 | or comments, please contact me:
 6 | 
 7 | rkneuselbooks@gmail.com
 8 | 
 9 | **Updates**
10 | - p 300, the last sentence of the penultimate paragraph should read "Here, t, an integer starting at *one*, is the timestep."
11 | - The file *boston.py* in Chapter 2 was sampling the same person repeatedly at times (thanks to ikimmit for the catch!)
12 | - The file *tutorial.pdf* is a beginner's guide to NumPy, SciPy, Matplotlib, and Pillow.
13 | - p 29, the upper limit on randint should be 365, not 364 (code updated).
14 | - p 198, the derivative of a matrix function should be scalar $\partial x$, not $\partial\mathbf{x}$.
15 | - p 257, the line above Equation 10.10 should be $\left[\frac{\partial E}{\partial y_0}\sigma'(x_0)\ \frac{\partial E}{\partial y_1}\sigma'(x_1)\ \ldots\ \right]^\top$.
16 | - Tweaked the Ch 10 code in *build_dataset.py* to conform to newer Keras versions
17 | 
18 | 


--------------------------------------------------------------------------------
/chapter_01/c01_sklearn.py:
--------------------------------------------------------------------------------
 1 | # c01_sklearn.py
 2 | 
 3 | import numpy as np
 4 | from sklearn.datasets import load_digits
 5 | from sklearn.neural_network import MLPClassifier
 6 | 
 7 | d = load_digits()
 8 | digits = d["data"]
 9 | labels = d["target"]
10 | 
11 | N = 200
12 | idx = np.argsort(np.random.random(len(labels)))
13 | x_test, y_test = digits[idx[:N]], labels[idx[:N]]
14 | x_train, y_train = digits[idx[N:]], labels[idx[N:]]
15 | 
16 | clf = MLPClassifier(hidden_layer_sizes=(128,))
17 | clf.fit(x_train, y_train)
18 | score = clf.score(x_test, y_test)
19 | pred = clf.predict(x_test)
20 | err = np.where(y_test != pred)[0]
21 | print()
22 | print("score      : ", score)
23 | print("errors:")
24 | print("  actual   : ", y_test[err])
25 | print("  predicted: ", pred[err])
26 | print()
27 | 
28 | 


--------------------------------------------------------------------------------
/chapter_02/birthday.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  birthday.py
 3 | #
 4 | #  Simulate the birthday paradox
 5 | #
 6 | #  RTK, 04-Jun-2020
 7 | #  Last update:  04-Jun-2020
 8 | #
 9 | ################################################################
10 | 
11 | import numpy as np
12 | 
13 | # Simulate picking two people at random, probability of sharing a birthday
14 | N = 100000
15 | match = 0
16 | for i in range(N):
17 |     a = np.random.randint(0,365)
18 |     b = np.random.randint(0,365)
19 |     if (a == b):
20 |         match += 1
21 | print()
22 | print("Probability of a random match = %0.6f" % (match/N,))
23 | print()
24 | 
25 | # Simulate people in a room, N tests per M
26 | M = 30
27 | N = 100000
28 | for m in range(2,M+1):
29 |     matches = 0
30 |     for n in range(N): 
31 |         match = 0
32 |         b = np.random.randint(0,365,m)
33 |         for i in range(m):
34 |             for j in range(m):
35 |                 if (i != j) and (b[i] == b[j]):
36 |                     match += 1
37 |         if (match != 0):
38 |             matches += 1
39 |     print("%2d people: probability of at least one match %0.6f" % (m, matches/N))
40 | 
41 | 


--------------------------------------------------------------------------------
/chapter_02/boston.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | P = 50
 3 | B = 4
 4 | M = 3
 5 | N = 100000
 6 | 
 7 | nb = 0
 8 | 
 9 | for i in range(N):
10 |     s = np.random.choice(P,M, replace=False)
11 |     fail = False
12 |     for t in range(M):
13 |         if (s[t] < B):
14 |             fail = True
15 |     if (not fail):
16 |         nb += 1
17 | 
18 | print()
19 | print("Prob no Boston in the fall = %0.4f" % (nb/N,))
20 | print()
21 | 
22 | 


--------------------------------------------------------------------------------
/chapter_02/coin_flips.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  coin_flips.py
 3 | #
 4 | #  Probability of getting 0,1,2, or 3 heads
 5 | #  in three flips of a coin.
 6 | #
 7 | #  RTK, 05-Jun-2020
 8 | #  Last update: 05-Jun-2020
 9 | #
10 | ################################################################
11 | 
12 | import numpy as np
13 | 
14 | N = 1000000
15 | M = 4
16 | 
17 | heads = np.zeros(M+1)
18 | 
19 | for i in range(N):
20 |     flips = np.random.randint(0,2,M)
21 |     h, _ = np.bincount(flips, minlength=2)
22 |     heads[h] += 1
23 | 
24 | prob = heads / N
25 | 
26 | print()
27 | print("Probabilities: %s" % np.array2string(prob))
28 | print()
29 | 
30 | 


--------------------------------------------------------------------------------
/chapter_03/continuous.py:
--------------------------------------------------------------------------------
  1 | # plots of key continuous distributions
  2 | import numpy as np
  3 | import matplotlib.pylab as plt
  4 | 
  5 | N = 10000000
  6 | B = 100
  7 | x = np.arange(B)/B
  8 | 
  9 | #  uniform
 10 | t = np.random.random(N)
 11 | u = np.histogram(t, bins=B)[0]
 12 | u = u / u.sum()
 13 | 
 14 | #  normal
 15 | t = np.random.normal(0, 1, size=N)
 16 | n = np.histogram(t, bins=B)[0]
 17 | n = n / n.sum()
 18 | 
 19 | #  gamma
 20 | t = np.random.gamma(5.0, size=N)
 21 | g = np.histogram(t, bins=B)[0]
 22 | g = g / g.sum()
 23 | 
 24 | #  beta
 25 | t = np.random.beta(5,2, size=N)
 26 | b = np.histogram(t, bins=B)[0]
 27 | b = b / b.sum()
 28 | 
 29 | plt.plot(x,u,color='k',linestyle='solid')
 30 | plt.plot(x,n,color='k',linestyle='dotted')
 31 | plt.plot(x,g,color='k',linestyle='dashed')
 32 | plt.plot(x,b,color='k',linestyle='dashdot')
 33 | plt.ylabel("Probability")
 34 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 35 | #plt.savefig("continuous.png", dpi=300)
 36 | plt.show()
 37 | plt.close()
 38 | 
 39 | #  central limit theorem
 40 | M = 10000
 41 | m = np.zeros(M)
 42 | for i in range(M):
 43 |     t = np.random.beta(5,2,size=M)
 44 |     m[i] = t.mean()
 45 | print("Mean of the means = %0.7f" % m.mean())
 46 | 
 47 | h,x = np.histogram(m, bins=B)
 48 | h = h / h.sum()
 49 | plt.bar(x[:-1]+0.5*(x[1]-x[0]), h, width=0.8*(x[1]-x[0]))
 50 | plt.xlabel("Mean")
 51 | plt.ylabel("Probability")
 52 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 53 | #plt.savefig("central_limit.png", dpi=300)
 54 | plt.show()
 55 | plt.close()
 56 | 
 57 | from fldrf import fldr_preprocess_float_c
 58 | from fldr import fldr_sample
 59 | 
 60 | z = fldr_preprocess_float_c([0.1,0.6,0.1,0.1,0.1])
 61 | m = np.zeros(M)
 62 | for i in range(M):
 63 |     t = np.array([fldr_sample(z) for i in range(M)])
 64 |     m[i] = t.mean()
 65 | print("Mean of the means = %0.7f" % m.mean())
 66 | 
 67 | h,x = np.histogram(m, bins=B)
 68 | h = h / h.sum()
 69 | plt.bar(x[:-1]+0.5*(x[1]-x[0]), h, width=0.8*(x[1]-x[0]))
 70 | plt.xlabel("Mean")
 71 | plt.ylabel("Probability")
 72 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 73 | #plt.savefig("central_limit_fldr.png", dpi=300)
 74 | plt.show()
 75 | plt.close()
 76 | 
 77 | t = np.array([fldr_sample(z) for i in range(M)])
 78 | h = np.bincount(t)
 79 | h = h / h.sum()
 80 | plt.bar(np.arange(5),h, width=0.8)
 81 | plt.xlabel("Value")
 82 | plt.ylabel("Probability")
 83 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 84 | #plt.savefig("pmf_fldr.png", dpi=300)
 85 | plt.show()
 86 | plt.close()
 87 | 
 88 | #  Law of large numbers
 89 | m = []
 90 | for n in np.linspace(1,8,30):
 91 |     t = np.random.normal(1,1,size=int(10**n))
 92 |     m.append(t.mean())
 93 | 
 94 | plt.plot(np.linspace(1,8,30), m)
 95 | plt.plot([1,8],[1,1], linestyle="--", color='k')
 96 | plt.xlabel("Exponent $10^n$")
 97 | plt.ylabel("Single sample mean")
 98 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 99 | #plt.savefig("large_numbers.png", dpi=300)
100 | plt.show()
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/chapter_03/discrete.py:
--------------------------------------------------------------------------------
 1 | # discrete probability distributions
 2 | import numpy as np
 3 | import matplotlib.pylab as plt
 4 | from scipy.datasets import face
 5 | from fldr import *
 6 | from fldrf import *
 7 | 
 8 | #  binomial
 9 | q = np.random.binomial(10, 0.7, 1000)
10 | h = np.histogram(q, bins=q.max()-q.min()+1)[0]
11 | h = h / h.sum()
12 | x = np.arange(q.min(), q.max()+1)
13 | plt.bar(x,h,width=0.8)
14 | q = np.random.binomial(10, 0.3, 1000)
15 | h = np.histogram(q, bins=q.max()-q.min()+1)[0]
16 | h = h / h.sum()
17 | x = np.arange(q.min(), q.max()+1)
18 | plt.bar(x,h,width=0.8)
19 | plt.show()
20 | 
21 | #  FLDR
22 | im = face(True)
23 | b = np.bincount(im.ravel(), minlength=256)
24 | b = b / b.sum()
25 | x = fldr_preprocess_float_c(list(b))
26 | t = [fldr_sample(x) for i in range(500000)]
27 | q = np.bincount(t, minlength=256)
28 | q = q / q.sum()
29 | 
30 | plt.plot(b, color='k')
31 | plt.plot(q, linestyle=(0, (1,1)), color='k')
32 | plt.xlabel("Sample")
33 | plt.ylabel("Probability")
34 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
35 | #plt.savefig("fldr_samples.png", dpi=300)
36 | plt.show()
37 | 
38 | 


--------------------------------------------------------------------------------
/chapter_03/ricky.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pylab as plt
 3 | import scipy.datasets
 4 | from PIL import Image
 5 | 
 6 | im = scipy.datasets.face(True)[:512,512:]
 7 | Image.fromarray(im).save("ricky.png")
 8 | hr,xr = np.histogram(im, bins=256)
 9 | hr = hr/hr.sum()
10 | im = scipy.datasets.ascent().astype("uint8")
11 | Image.fromarray(im).save("ascent.png")
12 | ha,xa = np.histogram(im, bins=256)
13 | ha = ha/ha.sum()
14 | plt.plot(xr[:-1],hr, color='k', label="Face")
15 | plt.plot(xa[:-1],ha, linestyle=(0,(1,1)), color='k', label="Ascent")
16 | plt.legend(loc="upper right")
17 | plt.xlabel("Gray level")
18 | plt.ylabel("Probability")
19 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
20 | plt.savefig("ricky_probability.png", dpi=300)
21 | plt.show()
22 | plt.close()
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/chapter_04/correlation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pylab as plt
  3 | 
  4 | np.random.seed(8675309)
  5 | 
  6 | N = 100
  7 | x = np.linspace(0,1,N) + (np.random.random(N)-0.5)
  8 | y = np.random.random(N)*x
  9 | z = -0.1*np.random.random(N)*x
 10 | 
 11 | plt.plot(np.linspace(0,1,N),x,color='r')
 12 | plt.plot(np.linspace(0,1,N),y,color='g')
 13 | plt.plot(np.linspace(0,1,N),z,color='b')
 14 | plt.plot(np.linspace(0,1,N)[::5],x[::5],color='r',marker='o',linestyle='none',label='X')
 15 | plt.plot(np.linspace(0,1,N)[::5],y[::5],color='g',marker='s',linestyle='none',label='Y')
 16 | plt.plot(np.linspace(0,1,N)[::5],z[::5],color='b',marker='*',linestyle='none',label='Z')
 17 | plt.legend(loc="upper left")
 18 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 19 | plt.savefig("correlation_xyz_plot.png", dpi=300)
 20 | plt.close()
 21 | 
 22 | plt.plot(x,y,marker='o',linestyle='none',color='r',label="X,Y")
 23 | plt.plot(x,z,marker='s',linestyle='none',color='g',label="X,Z")
 24 | plt.plot(y,z,marker='*',linestyle='none',color='b',label="Y,Z")
 25 | plt.legend(loc="upper left")
 26 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 27 | plt.savefig("correlation_scatter_plot.png", dpi=300)
 28 | plt.close()
 29 | 
 30 | 
 31 | from scipy.stats import pearsonr, spearmanr
 32 | 
 33 | print("Pearson(x,y) :", pearsonr(x,y)[0])
 34 | print("Spearman(x,y):", spearmanr(x,y)[0])
 35 | print()
 36 | print("Pearson(x,z) :", pearsonr(x,z)[0])
 37 | print("Spearman(x,z):", spearmanr(x,z)[0])
 38 | print()
 39 | print("Pearson(y,z) :", pearsonr(y,z)[0])
 40 | print("Spearman(y,z):", spearmanr(y,z)[0])
 41 | print()
 42 | 
 43 | def pearson(x,y):
 44 |     exy = (x*y).mean()
 45 |     ex = x.mean()
 46 |     ey = y.mean()
 47 |     exx = (x*x).mean()
 48 |     ex2 = x.mean()**2
 49 |     eyy = (y*y).mean()
 50 |     ey2 = y.mean()**2
 51 |     return (exy - ex*ey)/(np.sqrt(exx-ex2)*np.sqrt(eyy-ey2))
 52 | 
 53 | print("pearson(x,y):", pearson(x,y))
 54 | print("pearson(x,z):", pearson(x,z))
 55 | print("pearson(y,z):", pearson(y,z))
 56 | print()
 57 | 
 58 | d = np.vstack((x,y,z))
 59 | print(np.corrcoef(d))
 60 | print()
 61 | 
 62 | from sklearn.datasets import load_sample_image
 63 | china = load_sample_image('china.jpg')
 64 | a = china[230,:,1].astype("float64")
 65 | b = china[231,:,1].astype("float64")
 66 | c = china[400,:,1].astype("float64")
 67 | d = np.random.random(640)
 68 | print("china(a,b): ", pearson(a,b))
 69 | print("china(a,c): ", pearson(a,c))
 70 | print("china(a,d): ", pearson(a,d))
 71 | print()
 72 | 
 73 | #  spearman
 74 | def spearman(x,y):
 75 |     n = len(x)
 76 |     t = x[np.argsort(x)]
 77 |     rx = []
 78 |     for i in range(n):
 79 |         rx.append(np.where(x[i] == t)[0][0])
 80 |     rx = np.array(rx, dtype="float64")
 81 |     t = y[np.argsort(y)]
 82 |     ry = []
 83 |     for i in range(n):
 84 |         ry.append(np.where(y[i] == t)[0][0])
 85 |     ry = np.array(ry, dtype="float64")
 86 |     d = rx - ry
 87 |     return 1.0 - (6.0/(n*(n*n-1)))*(d**2).sum()
 88 | 
 89 | print(spearman(x,y), spearmanr(x,y)[0])
 90 | print(spearman(x,z), spearmanr(x,z)[0])
 91 | print(spearman(y,z), spearmanr(y,z)[0])
 92 | print()
 93 | 
 94 | a = np.linspace(-20,20,1000)
 95 | b = 1.0 / (1.0 + np.exp(-a))
 96 | print(pearson(a,b))
 97 | print(spearman(a,b))
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/chapter_04/exams.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/chapter_04/exams.npy


--------------------------------------------------------------------------------
/chapter_04/hypothesis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pylab as plt
  3 | 
  4 | np.random.seed(65535)
  5 | a = np.random.normal(85,6,50).astype("int32")
  6 | a[np.where(a > 100)] = 100
  7 | b = np.random.normal(82,7,50).astype("int32")
  8 | b[np.where(b > 100)] = 100
  9 | 
 10 | print(a)
 11 | print()
 12 | print(b)
 13 | print()
 14 | 
 15 | print("With means of 82 & 85:")
 16 | from scipy.stats import ttest_ind
 17 | t,p = ttest_ind(a,b,equal_var=False)
 18 | print("(t=%0.5f, p=%0.5f)" % (t,p))
 19 | 
 20 | from scipy.stats import mannwhitneyu
 21 | u,p = mannwhitneyu(a,b)
 22 | print("(U=%0.5f, p=%0.5f)" % (u,p))
 23 | 
 24 | plt.boxplot((a,b))
 25 | plt.xlabel("Group")
 26 | plt.ylabel("Test score")
 27 | plt.savefig("hypothesis_box_plot.png", dpi=300)
 28 | plt.close()
 29 | 
 30 | h,x = np.histogram(a, bins=10)
 31 | plt.bar(x[:-1],h, width=0.4*(x[1]-x[0]), label='Group A')
 32 | h,y = np.histogram(b, bins=10)
 33 | plt.bar(y[:-1]+(x[1]-x[0])/2, h, width=0.4*(x[1]-x[0]), label='Group B')
 34 | plt.legend(loc='upper left')
 35 | plt.ylabel('Counts')
 36 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 37 | #plt.savefig("hypothesis_bar_plot.png", dpi=300)
 38 | plt.close()
 39 | 
 40 | # CI for Welch's t-test
 41 | from scipy import stats
 42 | 
 43 | def CI(a, b, alpha=0.05):
 44 |     n1, n2 = len(a), len(b) 
 45 |     s1, s2 = np.std(a, ddof=1)**2, np.std(b, ddof=1)**2
 46 |     df = (s1/n1 + s2/n2)**2 / ((s1/n1)**2/(n1-1) + (s2/n2)**2/(n2-1))
 47 |     tc = stats.t.ppf(1 - alpha/2, df)
 48 |     lo = (a.mean()-b.mean()) - tc*np.sqrt(s1/n1 + s2/n2)
 49 |     hi = (a.mean()-b.mean()) + tc*np.sqrt(s1/n1 + s2/n2)
 50 |     return lo, hi
 51 | 
 52 | lo, hi = CI(a, b)
 53 | print("CI95 = (%0.5f, %0.5f)" % (lo,hi))
 54 | 
 55 | #  Cohen's d
 56 | def Cohen_d(a,b):
 57 |     s1 = np.std(a, ddof=1)**2
 58 |     s2 = np.std(b, ddof=1)**2
 59 |     return (a.mean() - b.mean()) / np.sqrt(0.5*(s1+s2))
 60 | 
 61 | print("Cohen's d = %0.5f" % Cohen_d(a,b))
 62 | 
 63 | #  change the means to be one step closer
 64 | np.random.seed(65535)
 65 | a = np.random.normal(85,6,50).astype("int32")
 66 | a[np.where(a > 100)] = 100
 67 | b = np.random.normal(83,7,50).astype("int32")
 68 | b[np.where(b > 100)] = 100
 69 | 
 70 | print("With means of 83 & 85:")
 71 | t,p = ttest_ind(a,b,equal_var=False)
 72 | print("(t=%0.5f, p=%0.5f)" % (t,p))
 73 | u,p = mannwhitneyu(a,b)
 74 | print("(U=%0.5f, p=%0.5f)" % (u,p))
 75 | 
 76 | #  means one step further apart
 77 | np.random.seed(65535)
 78 | a = np.random.normal(85,6,50).astype("int32")
 79 | a[np.where(a > 100)] = 100
 80 | b = np.random.normal(81,7,50).astype("int32")
 81 | b[np.where(b > 100)] = 100
 82 | 
 83 | print("With means of 81 & 85:")
 84 | t,p = ttest_ind(a,b,equal_var=False)
 85 | print("(t=%0.5f, p=%0.5f)" % (t,p))
 86 | u,p = mannwhitneyu(a,b)
 87 | print("(U=%0.5f, p=%0.5f)" % (u,p))
 88 | 
 89 | #  Effect of sample size
 90 | np.random.seed(65535)
 91 | pt = []
 92 | et = []
 93 | pm = []
 94 | em = []
 95 | M = 25
 96 | n = [20,40,60,80,100,120,140,160,180,200,250,300,350,400,450,500,750,1000]
 97 | for i in n:
 98 |     p = []
 99 |     t = []
100 |     for j in range(M):
101 |         a = np.random.normal(85,6,i).astype("int32")
102 |         a[np.where(a > 100)] = 100
103 |         b = np.random.normal(84,7,i).astype("int32")
104 |         b[np.where(b > 100)] = 100
105 |         t.append(ttest_ind(a,b,equal_var=False)[1])
106 |         p.append(mannwhitneyu(a,b)[1])
107 |     pt.append(np.array(t).mean())
108 |     et.append(np.array(t).std(ddof=1)/np.sqrt(M))
109 |     pm.append(np.array(p).mean())
110 |     em.append(np.array(p).std(ddof=1)/np.sqrt(M))
111 |     if (i==1000):
112 |         print("n=1000 Cohen's d = %0.5f" % Cohen_d(a,b))
113 | pt = np.array(pt)
114 | pm = np.array(pm)
115 | et = np.array(et)
116 | em = np.array(em)
117 | plt.errorbar(n,pt,et,marker='o',label='t-test')
118 | plt.errorbar(n,pm,em,marker='s',label='Mann-Whitney U')
119 | plt.xlabel('Sample size')
120 | plt.ylabel("$p$-value")
121 | plt.legend(loc="upper right")
122 | plt.tight_layout(pad=0,w_pad=0,h_pad=0)
123 | plt.savefig("hypothesis_pvalue_plot.png", dpi=300)
124 | 
125 | 


--------------------------------------------------------------------------------
/chapter_04/missing.py:
--------------------------------------------------------------------------------
 1 | #  Missing data example
 2 | import numpy as np
 3 | import matplotlib.pylab as plt
 4 | 
 5 | N = 1000
 6 | np.random.seed(73939133)
 7 | x = np.zeros((N,4))
 8 | x[:,0] = 5*np.random.random(N)
 9 | x[:,1] = np.random.normal(10,1,size=N)
10 | x[:,2] = 3*np.random.beta(5,2,N)
11 | x[:,3] = 0.3*np.random.lognormal(size=N)
12 | 
13 | plt.boxplot(x)
14 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
15 | plt.savefig("missing_box_plot.png", dpi=300)
16 | plt.close()
17 | 
18 | #  Make 5% of the values NaN
19 | i = np.random.randint(0,N, size=int(0.05*N))
20 | x[i,0] = np.nan
21 | i = np.random.randint(0,N, size=int(0.05*N))
22 | x[i,1] = np.nan
23 | i = np.random.randint(0,N, size=int(0.05*N))
24 | x[i,2] = np.nan
25 | i = np.random.randint(0,N, size=int(0.05*N))
26 | x[i,3] = np.nan
27 | 
28 | #  Do we have NaNs in feature 2?
29 | if (np.isnan(x[:,2]).sum() != 0):
30 |     print("NaNs present")
31 |     i = np.where(np.isnan(x[:,2]) == False)
32 |     z = x[i,2]
33 |     mn,md,s = z.mean(), np.median(z), z.std(ddof=1)
34 |     hh,xx = np.histogram(z, bins=40)
35 |     plt.bar(xx[:-1],hh, width=0.8*(xx[1]-xx[0]))
36 |     plt.xlabel("x")
37 |     plt.ylabel("Count")
38 |     plt.tight_layout(pad=0, w_pad=0, h_pad=0)
39 |     plt.savefig("missing_feature_2_plot.png", dpi=300)
40 |     plt.close()
41 | 
42 |     i = np.where(np.isnan(x[:,2]) == True)
43 |     x[i,2] = md  # replace w/median
44 |     
45 |     print("non-NaN mean, std = ", z.mean(), z.std(ddof=1))
46 |     print("updated mean, std = ", x[:,2].mean(), x[:,2].std(ddof=1))
47 | 
48 |     hh,xx = np.histogram(x[:,2], bins=40)
49 |     plt.bar(xx[:-1],hh, width=0.8*(xx[1]-xx[0]))
50 |     plt.xlabel("x")
51 |     plt.ylabel("Count")
52 |     plt.tight_layout(pad=0, w_pad=0, h_pad=0)
53 |     plt.savefig("missing_feature_2_updated_plot.png", dpi=300)
54 |     plt.close()
55 | 
56 | #  Do the same to the others
57 | i = np.where(np.isnan(x[:,0]) == False)
58 | m = np.median(x[i,0])
59 | i = np.where(np.isnan(x[:,0]) == True)
60 | x[i,0] = m
61 | 
62 | i = np.where(np.isnan(x[:,1]) == False)
63 | m = np.median(x[i,1])
64 | i = np.where(np.isnan(x[:,1]) == True)
65 | x[i,1] = m
66 | 
67 | i = np.where(np.isnan(x[:,3]) == False)
68 | m = np.median(x[i,3])
69 | i = np.where(np.isnan(x[:,3]) == True)
70 | x[i,3] = m
71 | 
72 | plt.boxplot(x)
73 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
74 | plt.savefig("missing_updated_box_plot.png", dpi=300)
75 | plt.close()
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/chapter_04/quantiles.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  Use a synthetic exam dataset to illustrate quantiles
 3 | #
 4 | #  RTK, 03-Jul-2020
 5 | #  Last update:  03-Jul-2020
 6 | #
 7 | ################################################################
 8 | 
 9 | import numpy as np
10 | import matplotlib.pylab as plt
11 | 
12 | d = np.load("exams.npy")
13 | p = d[:,0].astype("uint32")
14 | q = np.quantile(p, [0.0, 0.25, 0.5, 0.75, 1.0])
15 | 
16 | print()
17 | print("Quartiles: ", q)
18 | print()
19 | print("Counts by quartile:")
20 | print("    %d" % ((q[0] <= p) & (p < q[1])).sum())
21 | print("    %d" % ((q[1] <= p) & (p < q[2])).sum())
22 | print("    %d" % ((q[2] <= p) & (p < q[3])).sum())
23 | print("    %d" % ((q[3] <= p) & (p < q[4])).sum())
24 | print()
25 | 
26 | h = np.bincount(p, minlength=100)
27 | x = np.arange(101)
28 | plt.bar(x,h, width=0.8*(x[1]-x[0]))
29 | n = 1.1*h.max()
30 | plt.plot([q[1],q[1]],[0,n], linewidth=3, color='k')
31 | plt.plot([q[2],q[2]],[0,n], linewidth=3, color='k')
32 | plt.plot([q[3],q[3]],[0,n], linewidth=3, color='k')
33 | plt.xlim((p.min()-1,p.max()+1))
34 | plt.ylabel("Count")
35 | plt.tight_layout(pad=0,w_pad=0,h_pad=0)
36 | plt.savefig("quantiles_plot.png", dpi=300)
37 | #plt.show()
38 | plt.close()
39 | 
40 | # box plot
41 | plt.boxplot(d)
42 | plt.xlabel("Test")
43 | plt.ylabel("Scores")
44 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
45 | plt.savefig("box_plot.png", dpi=300)
46 | #plt.show()
47 | plt.close()
48 | 
49 | plt.boxplot(p)
50 | plt.ylabel("Scores")
51 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
52 | plt.savefig("box_plot_1.png", dpi=300)
53 | plt.show()
54 | plt.close()
55 | 
56 | 


--------------------------------------------------------------------------------
/chapter_05/matrixmul.py:
--------------------------------------------------------------------------------
 1 | #  O(n^3) matrix multiplication
 2 | 
 3 | import time
 4 | import numpy as np
 5 | 
 6 | def matrixmul(A,B):
 7 |     I,K = A.shape
 8 |     J = B.shape[1]
 9 |     C = np.zeros((I,J), dtype=A.dtype)
10 |     for i in range(I):
11 |         for j in range(J):
12 |             for k in range(K):
13 |                 C[i,j] += A[i,k]*B[k,j]
14 |     return C
15 |     
16 | A = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
17 | B = np.array([[1,2],[3,4],[5,6]])
18 | N = 100000
19 | 
20 | s = time.time()
21 | for i in range(N):
22 |     C = np.matmul(A,B)
23 | e = time.time()
24 | print("np.matmul: %0.6f" % (e-s,))
25 | 
26 | s = time.time()
27 | for i in range(N):
28 |     C = matrixmul(A,B)
29 | e = time.time()
30 | print("matrixmul: %0.6f" % (e-s,))
31 | 
32 | 


--------------------------------------------------------------------------------
/chapter_05/numpy_matmul.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  numpy_matmul.py
 3 | #
 4 | #  NumPy matrix multiplication examples
 5 | #
 6 | #  RTK, 12-Apr-2020
 7 | #  Last update:  12-Apr-2020
 8 | #
 9 | ################################################################
10 | 
11 | import numpy as np
12 | 
13 | def dot(a,b):
14 |     try:
15 |         return np.dot(a,b)
16 |     except:
17 |         return "fails"
18 | 
19 | def matmul(a,b):
20 |     try:
21 |         return np.matmul(a,b)
22 |     except:
23 |         return "fails"
24 | 
25 | #  the different vectors and matrices
26 | a1 = np.array([1,2,3])
27 | ar = a1.reshape((1,3))
28 | ac = a1.reshape((3,1))
29 | b1 = np.array([1,2,3])
30 | br = b1.reshape((1,3))
31 | bc = b1.reshape((3,1))
32 | A = np.array([[1,2,3],[4,5,6],[7,8,9]])
33 | B = np.array([[9,8,7],[6,5,4],[3,2,1]])
34 | 
35 | print()
36 | print("np.dot examples:")
37 | print("dot(a1,b1):"); print(dot(a1,b1))
38 | print("dot(a1,br):"); print(dot(a1,br))
39 | print("dot(a1,bc):"); print(dot(a1,bc))
40 | print("dot(ar,b1):"); print(dot(ar,b1))
41 | print("dot(ar,br):"); print(dot(ar,br))
42 | print("dot(ar,bc):"); print(dot(ar,bc))
43 | print("dot(ac,b1):"); print(dot(ac,b1))
44 | print("dot(ac,br):"); print(dot(ac,br))
45 | print("dot(ac,bc):"); print(dot(ac,bc))
46 | print("dot(A,a1):"); print(dot(A,a1))
47 | print("dot(A,ar):"); print(dot(A,ar))
48 | print("dot(A,ac):"); print(dot(A,ac))
49 | print("dot(a1,A):"); print(dot(a1,A))
50 | print("dot(ar,A):"); print(dot(ar,A))
51 | print("dot(ac,A):"); print(dot(ac,A))
52 | print("dot(A,B):"); print(dot(A,B))
53 | print()
54 | 
55 | print()
56 | print("np.matmul examples:")
57 | print("matmul(a1,b1):"); print(matmul(a1,b1))
58 | print("matmul(a1,br):"); print(matmul(a1,br))
59 | print("matmul(a1,bc):"); print(matmul(a1,bc))
60 | print("matmul(ar,b1):"); print(matmul(ar,b1))
61 | print("matmul(ar,br):"); print(matmul(ar,br))
62 | print("matmul(ar,bc):"); print(matmul(ar,bc))
63 | print("matmul(ac,b1):"); print(matmul(ac,b1))
64 | print("matmul(ac,br):"); print(matmul(ac,br))
65 | print("matmul(ac,bc):"); print(matmul(ac,bc))
66 | print("matmul(A,a1):"); print(matmul(A,a1))
67 | print("matmul(A,ar):"); print(matmul(A,ar))
68 | print("matmul(A,ac):"); print(matmul(A,ac))
69 | print("matmul(a1,A):"); print(matmul(a1,A))
70 | print("matmul(ar,A):"); print(matmul(ar,A))
71 | print("matmul(ac,A):"); print(matmul(ac,A))
72 | print("matmul(A,B):"); print(matmul(A,B))
73 | print()
74 | 
75 | 


--------------------------------------------------------------------------------
/chapter_06/bc_mahalanobis.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import datasets
 3 | from scipy.spatial.distance import mahalanobis
 4 | 
 5 | bc = datasets.load_breast_cancer()
 6 | d = bc.data
 7 | l = bc.target
 8 | i = np.argsort(np.random.random(len(d)))
 9 | d = d[i]
10 | l = l[i]
11 | xtrn, ytrn = d[:400], l[:400]
12 | xtst, ytst = d[400:], l[400:]
13 | 
14 | i = np.where(ytrn == 0)
15 | m0 = xtrn[i].mean(axis=0)
16 | i = np.where(ytrn == 1)
17 | m1 = xtrn[i].mean(axis=0)
18 | S = np.cov(xtrn, rowvar=False)
19 | SI= np.linalg.inv(S)
20 | 
21 | def score(xtst, ytst, m, SI):
22 |     nc = 0
23 |     for i in range(len(ytst)):
24 |         d = np.array([mahalanobis(xtst[i],m[0],SI),
25 |                       mahalanobis(xtst[i],m[1],SI)])
26 |         c = np.argmin(d)
27 |         if (c == ytst[i]):
28 |             nc += 1
29 |     return nc / len(ytst)
30 | 
31 | mscore = score(xtst, ytst, [m0,m1], SI)
32 | escore = score(xtst, ytst, [m0,m1], np.identity(30))
33 | print("Mahalanobis score = %0.4f" % mscore)
34 | print("Euclidean   score = %0.4f" % escore)
35 | 
36 | 


--------------------------------------------------------------------------------
/chapter_06/iris_pca.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pylab as plt
 3 | from sklearn.decomposition import PCA, TruncatedSVD
 4 | from sklearn.datasets import load_iris
 5 | from scipy.linalg import svd as SVD
 6 | 
 7 | iris = load_iris().data.copy()
 8 | labels = load_iris().target.copy()
 9 | m = iris.mean(axis=0)
10 | s = iris.std(axis=0)
11 | ir = iris - m
12 | cv = np.cov(ir, rowvar=False)
13 | val, vec = np.linalg.eig(cv)
14 | val = np.abs(val)
15 | idx = np.argsort(val)[::-1]
16 | ex = val[idx] / val.sum()
17 | print("fraction explained: ", ex)
18 | w = np.vstack((vec[:,idx[0]],vec[:,idx[1]]))
19 | d = np.zeros((ir.shape[0],2))
20 | for i in range(ir.shape[0]):
21 |     d[i,:] = np.dot(w,ir[i])
22 | 
23 | markers = np.array(["o","s","+"])[labels]
24 | for i in range(len(labels)):
25 |     plt.plot(d[i,0], d[i,1], marker=markers[i], color='k', linestyle='none')
26 | plt.xlabel("$x_0$")
27 | plt.ylabel("$x_1$")
28 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
29 | plt.savefig("iris_pca.png", dpi=300)
30 | plt.close()
31 | 
32 | pca = PCA(n_components=2)
33 | pca.fit(ir)
34 | dd = pca.fit_transform(ir)
35 | for i in range(len(labels)):
36 |     plt.plot(dd[i,0], dd[i,1], marker=markers[i], color='k', linestyle='none')
37 | plt.xlabel("$x_0$")
38 | plt.ylabel("$x_1$")
39 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
40 | plt.savefig("iris_pca_sklearn.png", dpi=300)
41 | plt.close()
42 | 
43 | svd = TruncatedSVD(n_components=2)
44 | svd.fit(ir)
45 | s = svd.fit_transform(ir)
46 | for i in range(len(labels)):
47 |     plt.plot(s[i,0], s[i,1], marker=markers[i], color='k', linestyle='none')
48 | plt.xlabel("$x_0$")
49 | plt.ylabel("$x_1$")
50 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
51 | plt.savefig("iris_pca_sklearn_svd.png", dpi=300)
52 | plt.close()
53 | 
54 | #  truncate manually - exact result as sklearn PCA
55 | n_elements = 2
56 | u,s,vt = SVD(ir)
57 | S = np.zeros((ir.shape[0], ir.shape[1]))
58 | for i in range(4):
59 |     S[i,i] = s[i]
60 | S = S[:, :n_elements]
61 | T = u @ S
62 | for i in range(len(labels)):
63 |     plt.plot(T[i,0], T[i,1], marker=markers[i], color='k', linestyle='none')
64 | plt.xlabel("$x_0$")
65 | plt.ylabel("$x_1$")
66 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
67 | plt.savefig("iris_pca_truncated_svd.png", dpi=300)
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/chapter_06/kl_divergence.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.special import rel_entr
 3 | import matplotlib.pylab as plt
 4 | 
 5 | N = 1000000
 6 | p = np.random.randint(0,13,size=N)
 7 | p = np.bincount(p)
 8 | p = p / p.sum()
 9 | q = np.random.binomial(12,0.9,size=N)
10 | q = np.bincount(q)
11 | q = q / q.sum()
12 | w = np.random.binomial(12,0.4,size=N)
13 | w = np.bincount(w)
14 | w = w / w.sum()
15 | print(rel_entr(q,p).sum())
16 | print(rel_entr(w,p).sum())
17 | plt.bar(np.arange(13),p,0.333,hatch="///",edgecolor='k')
18 | plt.bar(np.arange(13)+0.333,q,0.333,hatch="---",edgecolor='k')
19 | plt.bar(np.arange(13)+0.666,w,0.333,hatch="\\\\",edgecolor='k')
20 | plt.xlabel("Value")
21 | plt.ylabel("Proportion")
22 | plt.tight_layout(pad=0,h_pad=0,w_pad=0)
23 | plt.savefig("kl_divergence.png", dpi=300)
24 | plt.show()
25 | 
26 | 


--------------------------------------------------------------------------------
/chapter_07/c07_figure.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file: c07_figure.py
 3 | #
 4 | #  Plot of x^2+xy+y^2 and gradient field.
 5 | #
 6 | #  RTK, 25-Mar-2020
 7 | #  Last update:  26-Mar-2020
 8 | #
 9 | ################################################################
10 | 
11 | import numpy as np
12 | from mpl_toolkits.mplot3d import Axes3D
13 | import matplotlib.pylab as plt
14 | 
15 | #  Function plot
16 | x = np.linspace(-1.0,1.0,50)
17 | y = np.linspace(-1.0,1.0,50)
18 | xx = []
19 | yy = []
20 | zz = []
21 | 
22 | for i in range(50):
23 |     for j in range(50):
24 |         xx.append(x[i])
25 |         yy.append(y[j])
26 |         zz.append(x[i]*x[i]+x[i]*y[j]+y[j]*y[j])
27 | x = np.array(xx)
28 | y = np.array(yy)
29 | z = np.array(zz)
30 | 
31 | fig = plt.figure()
32 | ax = fig.add_subplot(111, projection='3d')
33 | ax.scatter(x, y, z, marker='.', s=2, color='b')
34 | ax.view_init(30, 50)
35 | ax.set_xlabel("$x$")
36 | ax.set_ylabel("$y$")
37 | ax.set_zlabel("$z$")
38 | plt.draw()
39 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
40 | plt.savefig("c05fig03a.png", dpi=300)
41 | ax.view_init(30,20)
42 | ax.set_xlabel("$x$")
43 | ax.set_ylabel("$y$")
44 | ax.set_zlabel("$z$")
45 | plt.draw()
46 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
47 | plt.savefig("c05fig03b.png", dpi=300)
48 | plt.close()
49 | 
50 | # Quiver plot - 2D
51 | fig = plt.figure()
52 | ax = fig.add_subplot(111)
53 | x = np.linspace(-1.0,1.0,20)
54 | y = np.linspace(-1.0,1.0,20)
55 | xv, yv = np.meshgrid(x, y, indexing='ij', sparse=False)
56 | dx = 2*xv + yv
57 | dy = 2*yv + xv
58 | ax.quiver(xv, yv, dx, dy, color='b')
59 | ax.set_xlabel("$x$")
60 | ax.set_ylabel("$y$")
61 | plt.axis('equal')
62 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
63 | plt.savefig("c05fig03c.png", dpi=300)
64 | plt.close()
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/chapter_08/newton_1d.py:
--------------------------------------------------------------------------------
 1 | #  Newton's method in 1D
 2 | import numpy as np
 3 | 
 4 | def f(x):
 5 |     return 2.0 - x*x
 6 | 
 7 | def d(x):
 8 |     return -2.0*x
 9 | 
10 | x = 1.0
11 | 
12 | for i in range(5):
13 |     x = x - f(x)/d(x)
14 |     print("%2d: %0.16f" % (i+1,x))
15 | 
16 | print()
17 | print("NumPy says sqrt(2) = %0.16f for a deviation of %0.16f" % (np.sqrt(2), np.abs(np.sqrt(2)-x)))
18 | print()
19 | 
20 | 


--------------------------------------------------------------------------------
/chapter_08/newton_2d.py:
--------------------------------------------------------------------------------
 1 | # 2D Newton's method
 2 | import numpy as np
 3 | 
 4 | def f(x):
 5 |     x0,x1 = x[0,0],x[1,0]
 6 |     return np.array([[4*x0-2*x0*x1],[2*x1+x0*x1-2*x1**2]])
 7 | 
 8 | def JI(x):
 9 |     x0,x1 = x[0,0],x[1,0]
10 |     d = (4-2*x1)*(2-x0-4*x1)+2*x0*x1
11 |     return (1/d)*np.array([[2-x0-4*x1,2*x0],[-x1,4-2*x0]])
12 | 
13 | x0 = float(input("x0: "))
14 | x1 = float(input("x1: "))
15 | x = np.array([[x0],[x1]])
16 | 
17 | N = 20
18 | for i in range(N):
19 |     x = x - JI(x) @ f(x)
20 |     if (i > (N-10)):
21 |         print("%4d: (%0.8f, %0.8f)" % (i, x[0,0],x[1,0]))
22 | 
23 | 


--------------------------------------------------------------------------------
/chapter_08/spiral.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mpl_toolkits.mplot3d import Axes3D
 3 | import matplotlib.pylab as plt
 4 | 
 5 | t = np.linspace(0,50,1000)
 6 | x = t*np.cos(t)
 7 | y = t*np.sin(t)
 8 | z = t
 9 | 
10 | fig = plt.figure()
11 | ax = fig.add_subplot(111, projection='3d')
12 | ax.plot(x, y, z, color='k')
13 | ax.set_xlabel("$x$")
14 | ax.set_ylabel("$y$")
15 | ax.set_zlabel("$z$")
16 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
17 | plt.savefig("spiral.png", dpi=300)
18 | plt.show()
19 | 
20 | 


--------------------------------------------------------------------------------
/chapter_09/c09_nn.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  Simple matrix-vector operations example
 3 | #
 4 | #  RTK, 11-Apr-2020 (Happy bday, Peter!)
 5 | #  Last update:  11-Apr-2020
 6 | #
 7 | ################################################################
 8 | 
 9 | import matplotlib.pylab as plt
10 | import numpy as np
11 | from sklearn.neural_network import MLPClassifier
12 | 
13 | #  Build the dataset
14 | np.random.seed(8675309)
15 | x0 = np.random.random(50)-0.3
16 | y0 = np.random.random(50)+0.3
17 | x1 = np.random.random(50)+0.3
18 | y1 = np.random.random(50)-0.3
19 | print("x0,y0: %0.6f, %0.6f" % (x0.mean(), y0.mean()))
20 | print("x1,y1: %0.6f, %0.6f" % (x1.mean(), y1.mean()))
21 | print()
22 | x = np.zeros((100,2))
23 | x[:50,0] = x0; x[:50,1] = y0
24 | x[50:,0] = x1; x[50:,1] = y1
25 | y = np.array([0]*50+[1]*50)
26 | 
27 | #  Randomize and make train/test split
28 | idx = np.argsort(np.random.random(100))
29 | x = x[idx]
30 | y = y[idx]
31 | x_train = x[:75]
32 | y_train = y[:75]
33 | x_test = x[75:]
34 | y_test = y[75:]
35 | 
36 | #  Show the dataset
37 | plt.plot(x0,y0,marker='o',linestyle='none')
38 | plt.plot(x1,y1,marker='s',linestyle='none')
39 | plt.xlabel(r'$x_0$')
40 | plt.ylabel(r'$x_1$')
41 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
42 | plt.savefig("c04_nn.png", dpi=300)
43 | 
44 | #  Train a simple model
45 | clf = MLPClassifier(hidden_layer_sizes=(5,))
46 | clf.fit(x_train, y_train)
47 | score = clf.score(x_test, y_test)
48 | prob = clf.predict_proba(x_test)
49 | print("Model accuracy on test set: %0.4f" % score)
50 | W0 = clf.coefs_[0].T
51 | b0 = clf.intercepts_[0].reshape((5,1))
52 | W1 = clf.coefs_[1].T
53 | b1 = clf.intercepts_[1]
54 | 
55 | print("Weights and biases:")
56 | print(W0)
57 | print(b0)
58 | print()
59 | print(W1)
60 | print(b1)
61 | print()
62 | 
63 | z = x_test[0].reshape((2,1))
64 | print("x_test:", z)
65 | print("W0 @ z + b0", W0 @ z + b0)
66 | print("a0 = relu(W0 @ z + b0)", np.maximum(0,W0@z+b0))
67 | a0 = np.maximum(0,W0@z+b0)
68 | print("a1 = W1@a0 + b1", W1@a0+b1)
69 | a1 = W1@a0+b1
70 | print("sigmoid(a1)", 1.0/(1.0+np.exp(-a1)))
71 | print()
72 | print("prob: ", prob[0][1])
73 | print("y_test: ", y_test[0])
74 | print()
75 | 
76 | 


--------------------------------------------------------------------------------
/chapter_09/convolve_example.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  Illustrate how different NumPy and SciPy convolution
 3 | #  and correlation routines work.
 4 | #
 5 | import numpy as np
 6 | from scipy.signal import convolve2d
 7 | from scipy.datasets import face
 8 | from PIL import Image
 9 | 
10 | #  Get Ricky's face
11 | img = face(True)
12 | img = img[:512,(img.shape[1]-612):(img.shape[1]-100)]
13 | 
14 | #  An asymmetric kernel
15 | k = np.array([[1,0,0],[0,-8,0],[0,0,3]])
16 | c = convolve2d(img, k, mode='same')
17 | 
18 | #  Results
19 | print("Original:")
20 | print(img[:8,:8])
21 | print()
22 | print("Kernel:")
23 | print(k)
24 | print()
25 | print("convolve2d(img,k,mode='same'):")
26 | print(c[1:8,1:8])
27 | print()
28 | 
29 | if (c.min() < 0):
30 |     c = c + np.abs(c.min())
31 | c = (255*(c / c.max())).astype("uint8")
32 | 
33 | Image.fromarray(c).save("ricky_convol.png")
34 | Image.fromarray(img).save("ricky_orig.png")
35 | 
36 | 


--------------------------------------------------------------------------------
/chapter_10/NN.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  file:  nn.py
  3 | #
  4 | #  Generic fully connected neural network code using NumPy.
  5 | #
  6 | #  Based on code by Omar Aflak,
  7 | #
  8 | #  https://github.com/OmarAflak/Medium-Python-Neural-Network
  9 | #
 10 | #  used and modified with his permission.
 11 | #
 12 | #  RTK, 03-Feb-2021
 13 | #  Last update:  06-Feb-2021
 14 | #
 15 | ################################################################
 16 | 
 17 | import numpy as np
 18 | 
 19 | #  Activation function and derivative
 20 | def sigmoid(x):
 21 |     return 1.0 / (1.0 + np.exp(-x))
 22 | 
 23 | def sigmoid_prime(x):
 24 |     return sigmoid(x)*(1.0 - sigmoid(x))
 25 | 
 26 | #  Loss function and derivative
 27 | def mse(y_true, y_pred):
 28 |     return (0.5*(y_true - y_pred)**2).mean()
 29 | 
 30 | def mse_prime(y_true, y_pred):
 31 |     return y_pred - y_true
 32 | 
 33 | 
 34 | ################################################################
 35 | #  ActivationLayer
 36 | #
 37 | class ActivationLayer:
 38 |     def forward(self, input_data):
 39 |         self.input = input_data
 40 |         return sigmoid(input_data)
 41 | 
 42 |     def backward(self, output_error):
 43 |         return sigmoid_prime(self.input) * output_error
 44 |     
 45 |     def step(self, eta):
 46 |         return
 47 | 
 48 | 
 49 | ################################################################
 50 | #  FullyConnectedLayer
 51 | #
 52 | class FullyConnectedLayer:
 53 |     def __init__(self, input_size, output_size):
 54 |         #  for accumulating error over a minibatch
 55 |         self.delta_w = np.zeros((input_size, output_size))
 56 |         self.delta_b = np.zeros((1,output_size))
 57 |         self.passes = 0
 58 | 
 59 |         #  initialize the weights and biases w/small random values
 60 |         self.weights = np.random.rand(input_size, output_size) - 0.5
 61 |         self.bias = np.random.rand(1, output_size) - 0.5
 62 | 
 63 |     def forward(self, input_data):
 64 |         self.input = input_data
 65 |         return np.dot(self.input, self.weights) + self.bias
 66 | 
 67 |     def backward(self, output_error):
 68 |         input_error = np.dot(output_error, self.weights.T)
 69 |         weights_error = np.dot(self.input.T, output_error)
 70 | 
 71 |         #  accumulate the error over the minibatch
 72 |         self.delta_w += weights_error
 73 |         self.delta_b += output_error
 74 |         self.passes += 1
 75 |         return input_error
 76 | 
 77 |     def step(self, eta):
 78 |         #  update the weights and biases by the mean error
 79 |         #  over the minibatch
 80 |         self.weights -= eta * self.delta_w / self.passes
 81 |         self.bias -= eta * self.delta_b / self.passes
 82 | 
 83 |         #  reset for the next minibatch
 84 |         self.delta_w = np.zeros(self.weights.shape)
 85 |         self.delta_b = np.zeros(self.bias.shape)
 86 |         self.passes = 0
 87 | 
 88 | 
 89 | ################################################################
 90 | #  Network
 91 | #
 92 | class Network:
 93 |     def __init__(self, verbose=True):
 94 |         self.verbose = verbose
 95 |         self.layers = []
 96 | 
 97 |     def add(self, layer):
 98 |         self.layers.append(layer)
 99 | 
100 |     def predict(self, input_data):
101 |         result = []
102 |         for i in range(input_data.shape[0]):
103 |             output = input_data[i]
104 |             for layer in self.layers:
105 |                 output = layer.forward(output)
106 |             result.append(output)
107 |         return result
108 | 
109 |     def fit(self, x_train, y_train, minibatches, learning_rate, batch_size=64):
110 |         for i in range(minibatches):
111 |             err = 0
112 | 
113 |             # select a random minibatch
114 |             idx = np.argsort(np.random.random(x_train.shape[0]))[:batch_size]
115 |             x_batch = x_train[idx]
116 |             y_batch = y_train[idx]
117 | 
118 |             for j in range(batch_size):
119 |                 # forward propagation
120 |                 output = x_batch[j]
121 |                 for layer in self.layers:
122 |                     output = layer.forward(output)
123 | 
124 |                 # accumulate loss
125 |                 err += mse(y_batch[j], output)
126 | 
127 |                 # backward propagation
128 |                 error = mse_prime(y_batch[j], output)
129 |                 for layer in reversed(self.layers):
130 |                     error = layer.backward(error)
131 |             
132 |             #  update weights and biases
133 |             for layer in self.layers:
134 |                 layer.step(learning_rate)
135 | 
136 |             # report mean loss over minibatch
137 |             if (self.verbose) and ((i%10) == 0):
138 |                 err /= batch_size
139 |                 print('minibatch %5d/%d   error=%0.9f' % (i, minibatches, err))
140 | 
141 | # end NN.py
142 | 
143 | 


--------------------------------------------------------------------------------
/chapter_10/build_dataset.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  build_dataset.py
 3 | #
 4 | #  Build the small MNIST dataset.
 5 | #
 6 | #  RTK, 03-Feb-2021
 7 | #  Last update:  24-Mar-2024
 8 | #
 9 | ################################################################
10 | 
11 | import cv2
12 | import numpy as np
13 | from keras.datasets import mnist
14 | from keras.utils import to_categorical
15 | 
16 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
17 | ytrn = to_categorical(y_train)
18 | 
19 | np.save("dataset/train_images_full.npy", x_train)
20 | np.save("dataset/test_images_full.npy", x_test)
21 | np.save("dataset/train_labels_vector.npy", ytrn)
22 | np.save("dataset/train_labels.npy", y_train)
23 | np.save("dataset/test_labels.npy", y_test)
24 | 
25 | #  Build 14x14 versions
26 | xtrn = np.zeros((60000,14,14), dtype="float32")
27 | for i in range(60000):
28 |     xtrn[i,:,:] = cv2.resize(x_train[i], (14,14), interpolation=cv2.INTER_LINEAR)
29 | xtst = np.zeros((10000,14,14), dtype="float32")
30 | for i in range(10000):
31 |     xtst[i,:,:] = cv2.resize(x_test[i], (14,14), interpolation=cv2.INTER_LINEAR)
32 | 
33 | np.save("dataset/train_images_small.npy", xtrn)
34 | np.save("dataset/test_images_small.npy", xtst)
35 | 
36 | 


--------------------------------------------------------------------------------
/chapter_10/iris.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file: iris.py
 3 | #
 4 | #  Train and test the 2-feature iris dataset
 5 | #
 6 | #  RTK, 06-Feb-2021
 7 | #  Last update:  06-Feb-2021
 8 | #
 9 | ################################################################
10 | 
11 | import numpy as np
12 | from NN import *
13 | from sklearn.datasets import load_iris
14 | 
15 | def BuildDataset():
16 |     """Create the dataset"""
17 | 
18 |     #  Get the dataset keeping the first two features
19 |     iris = load_iris()
20 |     x = iris["data"][:,:2]
21 |     y = iris["target"]
22 | 
23 |     #  Standardize and keep only classes 0 and 1
24 |     x = (x - x.mean(axis=0)) / x.std(axis=0)
25 |     i0 = np.where(y == 0)[0]
26 |     i1 = np.where(y == 1)[0]
27 |     x = np.vstack((x[i0],x[i1]))
28 | 
29 |     #  Train and test data
30 |     xtrn = np.vstack((x[:35],x[50:85]))
31 |     ytrn = np.array([0]*35 + [1]*35)
32 |     xtst = np.vstack((x[35:50],x[85:]))
33 |     ytst = np.array([0]*15+[1]*15)
34 |     
35 |     idx = np.argsort(np.random.random(70))
36 |     xtrn = xtrn[idx]
37 |     ytrn = ytrn[idx]
38 |     idx = np.argsort(np.random.random(30))
39 |     xtst = xtst[idx]
40 |     ytst = ytst[idx]
41 | 
42 |     y_train = np.zeros((len(ytrn),2))
43 |     for i in range(len(ytrn)):
44 |         if (ytrn[i] == 1):
45 |             y_train[i,:] = [0,1]
46 |         else:
47 |             y_train[i,:] = [1,0]
48 | 
49 |     y_test = np.zeros((len(ytst),2))
50 |     for i in range(len(ytst)):
51 |         if (ytst[i] == 1):
52 |             y_test[i,:] = [0,1]
53 |         else:
54 |             y_test[i,:] = [1,0]
55 | 
56 |     return (xtrn.reshape((xtrn.shape[0],1,2)), y_train,
57 |             xtst.reshape((xtst.shape[0],1,2)), y_test)
58 | 
59 | 
60 | def main():
61 |     """Train a model"""
62 | 
63 |     x_train, y_train, x_test, y_test = BuildDataset()
64 | 
65 |     #  Build the network using sigmoid activations
66 |     net = Network()
67 |     net.add(FullyConnectedLayer(2,2))
68 |     net.add(ActivationLayer())
69 |     net.add(FullyConnectedLayer(2,2))
70 | 
71 |     #  Loss and train
72 |     net.fit(x_train, y_train, minibatches=4000, learning_rate=0.1, batch_size=len(y_train))
73 | 
74 |     #  Build the confusion matrix using the test set predictions
75 |     out = net.predict(x_test)
76 |     cm = np.zeros((2,2), dtype="uint32")
77 |     for i in range(len(y_test)):
78 |         cm[np.argmax(y_test[i]),np.argmax(out[i])] += 1
79 | 
80 |     #  Show the results
81 |     print()
82 |     print(np.array2string(cm))
83 |     print()
84 |     print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),))
85 |     print()
86 | 
87 | 
88 | if (__name__ == "__main__"):
89 |     main()
90 | 
91 | 


--------------------------------------------------------------------------------
/chapter_10/mnist.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file: mnist.py
 3 | #
 4 | #  Train and test the small 14x14 MNIST dataset.
 5 | #
 6 | #  RTK, 03-Feb-2021
 7 | #  Last update:  06-Feb-2021
 8 | #
 9 | ################################################################
10 | 
11 | import numpy as np
12 | from NN import *
13 | 
14 | #  Load, reshape, and scale the data
15 | x_train = np.load("../dataset/train_images_small.npy")
16 | x_test  = np.load("../dataset/test_images_small.npy")
17 | y_train = np.load("../dataset/train_labels_vector.npy")
18 | y_test  = np.load("../dataset/test_labels.npy")
19 | 
20 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14)
21 | x_train /= 255
22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14)
23 | x_test /= 255
24 | 
25 | #  Build the network using sigmoid activations
26 | net = Network()
27 | net.add(FullyConnectedLayer(14*14, 100))
28 | net.add(ActivationLayer())
29 | net.add(FullyConnectedLayer(100, 50))
30 | net.add(ActivationLayer())
31 | net.add(FullyConnectedLayer(50, 10))
32 | net.add(ActivationLayer())
33 | 
34 | #  Loss and train
35 | net.fit(x_train, y_train, minibatches=40000, learning_rate=1.0)
36 | 
37 | #  Build the confusion matrix using the test set predictions
38 | out = net.predict(x_test)
39 | cm = np.zeros((10,10), dtype="uint32")
40 | for i in range(len(y_test)):
41 |     cm[y_test[i],np.argmax(out[i])] += 1
42 | 
43 | #  Show the results
44 | print()
45 | print(np.array2string(cm))
46 | print()
47 | print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),))
48 | print()
49 | 
50 | 


--------------------------------------------------------------------------------
/chapter_10/nn_by_hand.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  file:  nn_by_hand.py
  3 | #
  4 | #  Implement a simple feedforward neural network with
  5 | #  backprop and gradient descent.
  6 | #
  7 | #  RTK, 02-Feb-2021
  8 | #  Last update:  02-Feb-2021
  9 | #
 10 | ################################################################
 11 | 
 12 | import numpy as np
 13 | from sklearn.datasets import load_iris
 14 | 
 15 | def BuildDataset():
 16 |     """Create the dataset"""
 17 | 
 18 |     #  Get the dataset keeping the first two features
 19 |     iris = load_iris()
 20 |     x = iris["data"][:,:2]
 21 |     y = iris["target"]
 22 | 
 23 |     #  Standardize and keep only classes 0 and 1
 24 |     x = (x - x.mean(axis=0)) / x.std(axis=0)
 25 |     i0 = np.where(y == 0)[0]
 26 |     i1 = np.where(y == 1)[0]
 27 |     x = np.vstack((x[i0],x[i1]))
 28 | 
 29 |     #  Train and test data
 30 |     xtrn = np.vstack((x[:35],x[50:85]))
 31 |     ytrn = np.array([0]*35 + [1]*35)
 32 |     xtst = np.vstack((x[35:50],x[85:]))
 33 |     ytst = np.array([0]*15+[1]*15)
 34 |     
 35 |     idx = np.argsort(np.random.random(70))
 36 |     xtrn = xtrn[idx]
 37 |     ytrn = ytrn[idx]
 38 |     idx = np.argsort(np.random.random(30))
 39 |     xtst = xtst[idx]
 40 |     ytst = ytst[idx]
 41 | 
 42 |     return xtrn, ytrn, xtst, ytst
 43 | 
 44 | 
 45 | ################################################################
 46 | #  sigmoid
 47 | #
 48 | def sigmoid(x):
 49 |     return 1.0 / (1.0 + np.exp(-x))
 50 | 
 51 | 
 52 | ################################################################
 53 | #  Forward
 54 | #
 55 | def Forward(net, x):
 56 |     """Pass the data through the network"""
 57 | 
 58 |     out = np.zeros(x.shape[0])
 59 | 
 60 |     for k in range(x.shape[0]):
 61 |         z0 = net["w0"]*x[k,0] + net["w2"]*x[k,1] + net["b0"]
 62 |         a0 = sigmoid(z0)
 63 |         z1 = net["w1"]*x[k,0] + net["w3"]*x[k,1] + net["b1"]
 64 |         a1 = sigmoid(z1)
 65 |         out[k] = net["w4"]*a0 + net["w5"]*a1 + net["b2"]
 66 | 
 67 |     return out
 68 | 
 69 | 
 70 | ################################################################
 71 | #  Evaluate
 72 | #
 73 | def Evaluate(net, x, y):
 74 |     """Evaluate the network"""
 75 | 
 76 |     out = Forward(net, x)
 77 |     tn = fp = fn = tp = 0
 78 |     pred = []
 79 |     
 80 |     for i in range(len(y)):
 81 |         c = 0 if (out[i] < 0.5) else 1
 82 |         pred.append(c)
 83 |         if (c == 0) and (y[i] == 0):
 84 |             tn += 1
 85 |         elif (c == 0) and (y[i] == 1):
 86 |             fn += 1
 87 |         elif (c == 1) and (y[i] == 0):
 88 |             fp += 1
 89 |         else:
 90 |             tp += 1
 91 |     
 92 |     return tn,fp,fn,tp,pred
 93 | 
 94 | 
 95 | 
 96 | ################################################################
 97 | #  GradientDescent
 98 | #
 99 | def GradientDescent(net, x, y, epochs, eta):
100 |     """Perform gradient descent"""
101 | 
102 |     for e in range(epochs):
103 |         #  Pass over training set accumulating deltas
104 |         dw0 = dw1 = dw2 = dw3 = dw4 = dw5 = db0 = db1 = db2 = 0.0
105 | 
106 |         for k in range(len(y)):
107 |             #  Forward pass
108 |             z0 = net["w0"]*x[k,0] + net["w2"]*x[k,1] + net["b0"]
109 |             a0 = sigmoid(z0)
110 |             z1 = net["w1"]*x[k,0] + net["w3"]*x[k,1] + net["b1"]
111 |             a1 = sigmoid(z1)
112 |             a2 = net["w4"]*a0 + net["w5"]*a1 + net["b2"]
113 | 
114 |             #  Backward pass
115 |             db2 += a2 - y[k]
116 |             dw4 += (a2 - y[k]) * a0
117 |             dw5 += (a2 - y[k]) * a1
118 |             db1 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1)
119 |             dw1 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1) * x[k,0]
120 |             dw3 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1) * x[k,1]
121 |             db0 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0)
122 |             dw0 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0) * x[k,0]
123 |             dw2 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0) * x[k,1]
124 | 
125 |         #  Use average deltas to update the network
126 |         m = len(y)
127 |         net["b2"] = net["b2"] - eta * db2 / m
128 |         net["w4"] = net["w4"] - eta * dw4 / m
129 |         net["w5"] = net["w5"] - eta * dw5 / m
130 |         net["b1"] = net["b1"] - eta * db1 / m
131 |         net["w1"] = net["w1"] - eta * dw1 / m
132 |         net["w3"] = net["w3"] - eta * dw3 / m
133 |         net["b0"] = net["b0"] - eta * db0 / m
134 |         net["w0"] = net["w0"] - eta * dw0 / m
135 |         net["w2"] = net["w2"] - eta * dw2 / m
136 | 
137 |     #  Training done, return the updated network
138 |     return net
139 | 
140 | 
141 | ################################################################
142 | #  main
143 | #
144 | def main():
145 |     """Build and train a simple neural network"""
146 | 
147 |     epochs = 1000  # training epochs
148 |     eta = 0.1      # learning rate
149 | 
150 |     #  Get the train/test data
151 |     xtrn, ytrn, xtst, ytst = BuildDataset()
152 | 
153 |     #  Initialize the network
154 |     net = {}
155 |     net["b2"] = 0.0
156 |     net["b1"] = 0.0
157 |     net["b0"] = 0.0
158 |     net["w5"] = 0.0001*(np.random.random() - 0.5)
159 |     net["w4"] = 0.0001*(np.random.random() - 0.5)
160 |     net["w3"] = 0.0001*(np.random.random() - 0.5)
161 |     net["w2"] = 0.0001*(np.random.random() - 0.5)
162 |     net["w1"] = 0.0001*(np.random.random() - 0.5)
163 |     net["w0"] = 0.0001*(np.random.random() - 0.5)
164 | 
165 |     #  Do a forward pass to get initial performance
166 |     tn0,fp0,fn0,tp0,pred0 = Evaluate(net, xtst, ytst)
167 | 
168 |     #  Gradient descent
169 |     net = GradientDescent(net, xtrn, ytrn, epochs, eta)
170 | 
171 |     #  Final model performance
172 |     tn,fp,fn,tp,pred = Evaluate(net, xtst, ytst)
173 | 
174 |     #  Summarize performance
175 |     print()
176 |     print("Training for %d epochs, learning rate %0.5f" % (epochs, eta))
177 |     print()
178 |     print("Before training:")
179 |     print("    TN:%3d  FP:%3d" % (tn0, fp0))
180 |     print("    FN:%3d  TP:%3d" % (fn0, tp0))
181 |     print()
182 |     print("After training:")
183 |     print("    TN:%3d  FP:%3d" % (tn, fp))
184 |     print("    FN:%3d  TP:%3d" % (fn, tp))
185 |     print()
186 | 
187 | 
188 | if (__name__ == "__main__"):
189 |     main()
190 | 
191 | 
192 | 


--------------------------------------------------------------------------------
/chapter_11/NN.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  file:  nn.py
  3 | #
  4 | #  Generic fully connected neural network code using NumPy.
  5 | #
  6 | #  Based on code by Omar Aflak,
  7 | #
  8 | #  https://github.com/OmarAflak/Medium-Python-Neural-Network
  9 | #
 10 | #  used and modified with his permission.
 11 | #
 12 | #  RTK, 03-Feb-2021
 13 | #  Last update:  06-Feb-2021
 14 | #
 15 | ################################################################
 16 | 
 17 | import numpy as np
 18 | 
 19 | #  Activation function and derivative
 20 | def sigmoid(x):
 21 |     return 1.0 / (1.0 + np.exp(-x))
 22 | 
 23 | def sigmoid_prime(x):
 24 |     return sigmoid(x)*(1.0 - sigmoid(x))
 25 | 
 26 | #  Loss function and derivative
 27 | def mse(y_true, y_pred):
 28 |     return (0.5*(y_true - y_pred)**2).mean()
 29 | 
 30 | def mse_prime(y_true, y_pred):
 31 |     return y_pred - y_true
 32 | 
 33 | 
 34 | ################################################################
 35 | #  ActivationLayer
 36 | #
 37 | class ActivationLayer:
 38 |     def forward(self, input_data):
 39 |         self.input = input_data
 40 |         return sigmoid(input_data)
 41 | 
 42 |     def backward(self, output_error):
 43 |         return sigmoid_prime(self.input) * output_error
 44 |     
 45 |     def step(self, eta):
 46 |         return
 47 | 
 48 | 
 49 | ################################################################
 50 | #  FullyConnectedLayer
 51 | #
 52 | class FullyConnectedLayer:
 53 |     def __init__(self, input_size, output_size):
 54 |         #  for accumulating error over a minibatch
 55 |         self.delta_w = np.zeros((input_size, output_size))
 56 |         self.delta_b = np.zeros((1,output_size))
 57 |         self.passes = 0
 58 | 
 59 |         #  initialize the weights and biases w/small random values
 60 |         self.weights = np.random.rand(input_size, output_size) - 0.5
 61 |         self.bias = np.random.rand(1, output_size) - 0.5
 62 | 
 63 |     def forward(self, input_data):
 64 |         self.input = input_data
 65 |         return np.dot(self.input, self.weights) + self.bias
 66 | 
 67 |     def backward(self, output_error):
 68 |         input_error = np.dot(output_error, self.weights.T)
 69 |         weights_error = np.dot(self.input.T, output_error)
 70 | 
 71 |         #  accumulate the error over the minibatch
 72 |         self.delta_w += np.dot(self.input.T, output_error)
 73 |         self.delta_b += output_error
 74 |         self.passes += 1
 75 |         return input_error
 76 | 
 77 |     def step(self, eta):
 78 |         #  update the weights and biases by the mean error
 79 |         #  over the minibatch
 80 |         self.weights -= eta * self.delta_w / self.passes
 81 |         self.bias -= eta * self.delta_b / self.passes
 82 | 
 83 |         #  reset for the next minibatch
 84 |         self.delta_w = np.zeros(self.weights.shape)
 85 |         self.delta_b = np.zeros(self.bias.shape)
 86 |         self.passes = 0
 87 | 
 88 | 
 89 | ################################################################
 90 | #  Network
 91 | #
 92 | class Network:
 93 |     def __init__(self, verbose=True):
 94 |         self.verbose = verbose
 95 |         self.layers = []
 96 | 
 97 |     def add(self, layer):
 98 |         self.layers.append(layer)
 99 | 
100 |     def predict(self, input_data):
101 |         result = []
102 |         for i in range(input_data.shape[0]):
103 |             output = input_data[i]
104 |             for layer in self.layers:
105 |                 output = layer.forward(output)
106 |             result.append(output)
107 |         return result
108 | 
109 |     def fit(self, x_train, y_train, minibatches, learning_rate, batch_size=64):
110 |         for i in range(minibatches):
111 |             err = 0
112 | 
113 |             # select a random minibatch
114 |             idx = np.argsort(np.random.random(x_train.shape[0]))[:batch_size]
115 |             x_batch = x_train[idx]
116 |             y_batch = y_train[idx]
117 | 
118 |             for j in range(batch_size):
119 |                 # forward propagation
120 |                 output = x_batch[j]
121 |                 for layer in self.layers:
122 |                     output = layer.forward(output)
123 | 
124 |                 # accumulate loss
125 |                 err += mse(y_batch[j], output)
126 | 
127 |                 # backward propagation
128 |                 error = mse_prime(y_batch[j], output)
129 |                 for layer in reversed(self.layers):
130 |                     error = layer.backward(error)
131 |             
132 |             #  update weights and biases
133 |             for layer in self.layers:
134 |                 layer.step(learning_rate)
135 | 
136 |             # report mean loss over minibatch
137 |             if (self.verbose) and ((i%10) == 0):
138 |                 err /= batch_size
139 |                 print('minibatch %5d/%d   error=%0.9f' % (i, minibatches, err))
140 | 
141 | # end NN.py
142 | 
143 | 


--------------------------------------------------------------------------------
/chapter_11/NNm.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  file:  NNm.py  (w/momentum)
  3 | #
  4 | #  Generic fully connected neural network code using NumPy.
  5 | #
  6 | #  Based on code by Omar Aflak,
  7 | #
  8 | #  https://github.com/OmarAflak/Medium-Python-Neural-Network
  9 | #
 10 | #  used and modified with his permission.
 11 | #
 12 | #  RTK, 03-Feb-2021
 13 | #  Last update:  18-Feb-2021
 14 | #
 15 | ################################################################
 16 | 
 17 | import numpy as np
 18 | 
 19 | #  Activation function and derivative
 20 | def sigmoid(x):
 21 |     return 1.0 / (1.0 + np.exp(-x))
 22 | 
 23 | def sigmoid_prime(x):
 24 |     return sigmoid(x)*(1.0 - sigmoid(x))
 25 | 
 26 | #  Loss function and derivative
 27 | def mse(y_true, y_pred):
 28 |     return (0.5*(y_true - y_pred)**2).mean()
 29 | 
 30 | def mse_prime(y_true, y_pred):
 31 |     return y_pred - y_true
 32 | 
 33 | 
 34 | ################################################################
 35 | #  ActivationLayer
 36 | #
 37 | class ActivationLayer:
 38 |     def forward(self, input_data):
 39 |         self.input = input_data
 40 |         return sigmoid(input_data)
 41 | 
 42 |     def backward(self, output_error):
 43 |         return sigmoid_prime(self.input) * output_error
 44 |     
 45 |     def step(self, eta):
 46 |         return
 47 | 
 48 | 
 49 | ################################################################
 50 | #  FullyConnectedLayer
 51 | #
 52 | class FullyConnectedLayer:
 53 |     def __init__(self, input_size, output_size, momentum=0.0):
 54 |         #  for accumulating error over a minibatch
 55 |         self.delta_w = np.zeros((input_size, output_size))
 56 |         self.delta_b = np.zeros((1,output_size))
 57 |         self.passes = 0
 58 | 
 59 |         #  initialize the weights and biases w/small random values
 60 |         self.weights = np.random.rand(input_size, output_size) - 0.5
 61 |         self.bias = np.random.rand(1, output_size) - 0.5
 62 |         
 63 |         #  initial velocities
 64 |         self.vw = np.zeros((input_size, output_size))
 65 |         self.vb = np.zeros((1, output_size))
 66 |         self.momentum = momentum
 67 | 
 68 |     def forward(self, input_data):
 69 |         self.input = input_data
 70 |         return np.dot(self.input, self.weights) + self.bias
 71 | 
 72 |     def backward(self, output_error):
 73 |         input_error = np.dot(output_error, self.weights.T)
 74 |         weights_error = np.dot(self.input.T, output_error)
 75 | 
 76 |         #  accumulate the error over the minibatch
 77 |         self.delta_w += np.dot(self.input.T, output_error)
 78 |         self.delta_b += output_error
 79 |         self.passes += 1
 80 |         return input_error
 81 | 
 82 |     def step(self, eta):
 83 |         #  update the weights and biases by the mean error
 84 |         #  over the minibatch
 85 |         self.vw = self.momentum * self.vw - eta * self.delta_w / self.passes
 86 |         self.vb = self.momentum * self.vb - eta * self.delta_b / self.passes
 87 |         self.weights = self.weights + self.vw
 88 |         self.bias = self.bias + self.vb
 89 | 
 90 |         #  reset for the next minibatch
 91 |         self.delta_w = np.zeros(self.weights.shape)
 92 |         self.delta_b = np.zeros(self.bias.shape)
 93 |         self.passes = 0
 94 | 
 95 | 
 96 | ################################################################
 97 | #  Network
 98 | #
 99 | class Network:
100 |     def __init__(self, verbose=True):
101 |         self.verbose = verbose
102 |         self.layers = []
103 | 
104 |     def add(self, layer):
105 |         self.layers.append(layer)
106 | 
107 |     def predict(self, input_data):
108 |         result = []
109 |         for i in range(input_data.shape[0]):
110 |             output = input_data[i]
111 |             for layer in self.layers:
112 |                 output = layer.forward(output)
113 |             result.append(output)
114 |         return result
115 | 
116 |     def fit(self, x_train, y_train, minibatches, learning_rate, batch_size=64):
117 |         for i in range(minibatches):
118 |             err = 0
119 | 
120 |             # select a random minibatch
121 |             idx = np.argsort(np.random.random(x_train.shape[0]))[:batch_size]
122 |             x_batch = x_train[idx]
123 |             y_batch = y_train[idx]
124 | 
125 |             for j in range(batch_size):
126 |                 # forward propagation
127 |                 output = x_batch[j]
128 |                 for layer in self.layers:
129 |                     output = layer.forward(output)
130 | 
131 |                 # accumulate loss
132 |                 err += mse(y_batch[j], output)
133 | 
134 |                 # backward propagation
135 |                 error = mse_prime(y_batch[j], output)
136 |                 for layer in reversed(self.layers):
137 |                     error = layer.backward(error)
138 |             
139 |             #  update weights and biases
140 |             for layer in self.layers:
141 |                 layer.step(learning_rate)
142 | 
143 |             # report mean loss over minibatch
144 |             if (self.verbose) and ((i%10) == 0):
145 |                 err /= batch_size
146 |                 print('minibatch %5d/%d   error=%0.9f' % (i, minibatches, err))
147 | 
148 | # end NNm.py
149 | 
150 | 


--------------------------------------------------------------------------------
/chapter_11/fmnist.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file: fmnist.py
 3 | #
 4 | #  Train and test the small 14x14 FMNIST dataset.
 5 | #
 6 | #  RTK, 03-Feb-2021
 7 | #  Last update:  19-Feb-2021
 8 | #
 9 | ################################################################
10 | 
11 | from sklearn.metrics import matthews_corrcoef
12 | import numpy as np
13 | from NN import *
14 | 
15 | #  Load, reshape, and scale the data
16 | x_train = np.load("../dataset/fmnist_train_images_small.npy")/255
17 | x_test  = np.load("../dataset/fmnist_test_images_small.npy")/255
18 | y_train = np.load("../dataset/fmnist_train_labels_vector.npy")
19 | y_test  = np.load("../dataset/fmnist_test_labels.npy")
20 | 
21 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14)
22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14)
23 | 
24 | #  Build the network using sigmoid activations
25 | net = Network(verbose=True)
26 | net.add(FullyConnectedLayer(14*14, 100))
27 | net.add(ActivationLayer())
28 | net.add(FullyConnectedLayer(100, 50))
29 | net.add(ActivationLayer())
30 | net.add(FullyConnectedLayer(50, 10))
31 | net.add(ActivationLayer())
32 | 
33 | #  Loss and train
34 | net.fit(x_train, y_train, minibatches=40000, learning_rate=1.0)
35 | 
36 | #  Build the confusion matrix using the test set predictions
37 | out = net.predict(x_test)
38 | pred = np.array(out)[:,0,:]
39 | cm = np.zeros((10,10), dtype="uint32")
40 | for i in range(len(y_test)):
41 |     cm[y_test[i],np.argmax(out[i])] += 1
42 | 
43 | #  Show the results
44 | print()
45 | print(np.array2string(cm))
46 | print()
47 | print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),))
48 | print("MCC = %0.7f" % matthews_corrcoef(y_test, np.argmax(pred, axis=1)))
49 | print()
50 | 
51 | 


--------------------------------------------------------------------------------
/chapter_11/fmnist_analyze.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pylab as plt
 3 | from scipy.stats import ttest_ind, mannwhitneyu
 4 | 
 5 | def Cohen_d(a,b):
 6 |     s1 = np.std(a, ddof=1)**2
 7 |     s2 = np.std(b, ddof=1)**2
 8 |     return (a.mean() - b.mean()) / np.sqrt(0.5*(s1+s2))
 9 | 
10 | #  Load the MCC for repeated trainings
11 | m_no = np.load("fmnist_no_momentum_runs.npy")
12 | m_w = np.load("fmnist_w_momentum_runs.npy")
13 | 
14 | hn,xn = np.histogram(m_no, bins=5)
15 | hw,xw = np.histogram(m_w, bins=5)
16 | b = plt.bar(xn[:-1], hn, width=0.8*(xn[1]-xn[0]), hatch="/", color="#5f5f5f")
17 | b = plt.bar(xw[:-1], hw, width=0.8*(xn[1]-xn[0]), hatch="\\", color="#7f7f7f")
18 | plt.xlabel("MCC")
19 | plt.ylabel("Count")
20 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
21 | plt.savefig("fmnist_mcc_plot.png", dpi=300)
22 | plt.show()
23 | 
24 | print()
25 | print("no momentum: %0.5f +/- %0.5f" % (m_no.mean(), m_no.std(ddof=1)/np.sqrt(len(m_no))))
26 | print("momentum   : %0.5f +/- %0.5f" % (m_w.mean(), m_w.std(ddof=1)/np.sqrt(len(m_w))))
27 | print()
28 | t,p = ttest_ind(m_w, m_no)
29 | print("t-test momentum vs no (t,p): (%0.8f, %0.8f)" % (t,p))
30 | U,p = mannwhitneyu(m_w, m_no)
31 | print("Mann-Whitney U             : (%0.8f, %0.8f)" % (U,p))
32 | print("Cohen's d                  : %0.5f" % Cohen_d(m_w, m_no))
33 | print()
34 | 
35 | 


--------------------------------------------------------------------------------
/chapter_11/fmnist_momentum.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file: fmnist_momentum.py
 3 | #
 4 | #  Train and test the small 14x14 FMNIST dataset.
 5 | #
 6 | #  RTK, 03-Feb-2021
 7 | #  Last update:  19-Feb-2021
 8 | #
 9 | ################################################################
10 | 
11 | from sklearn.metrics import matthews_corrcoef
12 | import numpy as np
13 | from NNm import *
14 | 
15 | #  Load, reshape, and scale the data
16 | x_train = np.load("../dataset/fmnist_train_images_small.npy")/255
17 | x_test  = np.load("../dataset/fmnist_test_images_small.npy")/255
18 | y_train = np.load("../dataset/fmnist_train_labels_vector.npy")
19 | y_test  = np.load("../dataset/fmnist_test_labels.npy")
20 | 
21 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14)
22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14)
23 | 
24 | #  Build the network using sigmoid activations
25 | net = Network(verbose=True)
26 | net.add(FullyConnectedLayer(14*14, 100, momentum=0.9))
27 | net.add(ActivationLayer())
28 | net.add(FullyConnectedLayer(100, 50, momentum=0.9))
29 | net.add(ActivationLayer())
30 | net.add(FullyConnectedLayer(50, 10, momentum=0.9))
31 | net.add(ActivationLayer())
32 | 
33 | #  Loss and train
34 | net.fit(x_train, y_train, minibatches=40000, learning_rate=0.2)
35 | 
36 | #  Build the confusion matrix using the test set predictions
37 | out = net.predict(x_test)
38 | pred = np.array(out)[:,0,:]
39 | cm = np.zeros((10,10), dtype="uint32")
40 | for i in range(len(y_test)):
41 |     cm[y_test[i],np.argmax(out[i])] += 1
42 | 
43 | #  Show the results
44 | print()
45 | print(np.array2string(cm))
46 | print()
47 | print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),))
48 | print("MCC = %0.7f" % matthews_corrcoef(y_test, np.argmax(pred, axis=1)))
49 | print()
50 | 
51 | 


--------------------------------------------------------------------------------
/chapter_11/fmnist_no_momentum_runs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/chapter_11/fmnist_no_momentum_runs.npy


--------------------------------------------------------------------------------
/chapter_11/fmnist_repeat.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file: fmnist_repeat.py
 3 | #
 4 | #  Train and test the small 14x14 FMNIST dataset.
 5 | #
 6 | #  RTK, 03-Feb-2021
 7 | #  Last update:  21-Feb-2021
 8 | #
 9 | ################################################################
10 | 
11 | from sklearn.metrics import matthews_corrcoef
12 | import numpy as np
13 | from NNm import *
14 | 
15 | #  Load, reshape, and scale the data
16 | x_train = np.load("../dataset/fmnist_train_images_small.npy")/255
17 | x_test  = np.load("../dataset/fmnist_test_images_small.npy")/255
18 | y_train = np.load("../dataset/fmnist_train_labels_vector.npy")
19 | y_test  = np.load("../dataset/fmnist_test_labels.npy")
20 | 
21 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14)
22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14)
23 | 
24 | def train_test(x_train, x_test, y_train, y_test):
25 |     #  Build the network using sigmoid activations
26 |     net = Network(verbose=False)
27 |     net.add(FullyConnectedLayer(14*14, 100, momentum=0.9))
28 |     net.add(ActivationLayer())
29 |     net.add(FullyConnectedLayer(100, 50, momentum=0.9))
30 |     net.add(ActivationLayer())
31 |     net.add(FullyConnectedLayer(50, 10, momentum=0.9))
32 |     net.add(ActivationLayer())
33 | 
34 |     #  Loss and train
35 |     net.fit(x_train, y_train, minibatches=10000, learning_rate=0.2)
36 | 
37 |     out = net.predict(x_test)
38 |     pred = np.array(out)[:,0,:]
39 |     return matthews_corrcoef(y_test, np.argmax(pred, axis=1))
40 | 
41 | 
42 | M = 100
43 | mcc = np.zeros(M)
44 | 
45 | for i in range(M):
46 |     mcc[i] = train_test(x_train, x_test, y_train, y_test)
47 |     print("%03d: MCC = %0.8f" % (i, mcc[i]), flush=True)
48 | 
49 | np.save("fmnist_repeat_mcc.npy", mcc)
50 | 
51 | print()
52 | print("Overall MCC %0.6f +/- %0.6f" % (mcc.mean(), mcc.std(ddof=1)/np.sqrt(M)))
53 | print()
54 | 
55 | 


--------------------------------------------------------------------------------
/chapter_11/fmnist_w_momentum_runs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/chapter_11/fmnist_w_momentum_runs.npy


--------------------------------------------------------------------------------
/chapter_11/gd_1d.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  gd_1d.py
 3 | #
 4 | #  1D example of GD
 5 | #
 6 | #  RTK, 14-Feb-2021
 7 | #  Last update:  14-Feb-2021
 8 | #
 9 | ################################################################
10 | 
11 | import sys
12 | import os
13 | import numpy as np
14 | import matplotlib.pylab as plt
15 | 
16 | #  The function and its derivative
17 | def f(x):
18 |     return 6*x**2 - 12*x + 3
19 | 
20 | def d(x):
21 |     return 12*x - 12
22 | 
23 | 
24 | #  Show the function, derivative, and minimum
25 | x = np.linspace(-1,3,1000)
26 | y = f(x)
27 | plt.plot(x,y,color='#1f77b4')
28 | x = np.linspace(0,3,10)
29 | z = d(x)
30 | plt.plot(x,z,color='#ff7f0e')
31 | plt.plot([-1,3],[0,0],linestyle=(0,(1,1)),color='k')
32 | plt.plot([1,1],[-10,25],linestyle=(0,(1,1)),color='k')
33 | plt.plot([1,1],[f(1),f(1)],marker='o',color='#1f77b4')
34 | plt.xlabel("$x$")
35 | plt.ylabel("$y$")
36 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
37 | plt.savefig("gd_1d_plot.png", dpi=300)
38 | #plt.show()
39 | plt.close()
40 | 
41 | #  Show a series of gradient descent steps
42 | x = np.linspace(-1,3,1000)
43 | plt.plot(x,f(x))
44 | 
45 | x = -0.9
46 | eta = 0.03
47 | for i in range(15):
48 |     plt.plot(x, f(x), marker='o', color='r')
49 |     x = x - eta * d(x)
50 | 
51 | plt.xlabel("$x$")
52 | plt.ylabel("$y$")
53 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
54 | plt.savefig("gd_1d_steps.png", dpi=300)
55 | #plt.show()
56 | plt.close()
57 | print("Minimum at (%0.6f, %0.6f)" % (x, f(x)))
58 | 
59 | #  Show oscillation if step size too large
60 | x = np.linspace(0.75,1.25,1000)
61 | plt.plot(x,f(x))
62 | x = xold = 0.75
63 | for i in range(14):
64 |     plt.plot([xold,x], [f(xold),f(x)], marker='o', linestyle='dotted', color='r')
65 |     xold = x
66 |     x = x - 0.15 * d(x)
67 | 
68 | plt.xlabel("$x$")
69 | plt.ylabel("$y$")
70 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
71 | plt.savefig("gd_1d_oscillating.png", dpi=300)
72 | #plt.show()
73 | 
74 | 


--------------------------------------------------------------------------------
/chapter_11/gd_1d_momentum.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  gd_1d_momentum.py
 3 | #
 4 | #  1D example of GD
 5 | #
 6 | #  RTK, 14-Feb-2021
 7 | #  Last update:  14-Feb-2021
 8 | #
 9 | ################################################################
10 | 
11 | import sys
12 | import os
13 | import numpy as np
14 | import matplotlib.pylab as plt
15 | 
16 | #  The function and its derivative
17 | def f(x):
18 |     return 6*x**2 - 12*x + 3
19 | 
20 | def d(x):
21 |     return 12*x - 12
22 | 
23 | m = ['o','s','>','<','*','+','p','h','P','D']
24 | x = np.linspace(0.75,1.25,1000)
25 | plt.plot(x,f(x))
26 | x = xold = 0.75
27 | eta = 0.09
28 | mu = 0.8
29 | v = 0.0
30 | for i in range(10):
31 |     plt.plot([xold,x], [f(xold),f(x)], marker=m[i], linestyle='dotted', color='r')
32 |     xold = x
33 |     v = mu*v - eta * d(x)
34 |     x = x + v
35 | for i in range(40):
36 |     v = mu*v - eta * d(x)
37 |     x = x + v
38 | plt.plot(x,f(x),marker='X', color='k')
39 | 
40 | plt.xlabel("$x$")
41 | plt.ylabel("$y$")
42 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
43 | plt.savefig("gd_1d_momentum.png", dpi=300)
44 | plt.show()
45 | 
46 | 


--------------------------------------------------------------------------------
/chapter_11/gd_2d.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  file:  gd_2d.py
  3 | #
  4 | #  2D example of gradient descent
  5 | #
  6 | #  RTK, 14-Feb-2021
  7 | #  Last update:  14-Feb-2021
  8 | #
  9 | ################################################################
 10 | 
 11 | import numpy as np
 12 | import matplotlib.pylab as plt
 13 | 
 14 | #  Function and partial derivatives
 15 | def f(x,y):
 16 |     return 6*x**2 + 9*y**2 - 12*x - 14*y + 3
 17 | 
 18 | def dx(x):
 19 |     return 12*x - 12
 20 | 
 21 | def dy(y):
 22 |     return 18*y - 14
 23 | 
 24 | #  Gradient descent steps
 25 | N = 100
 26 | x,y = np.meshgrid(np.linspace(-1,3,N), np.linspace(-1,3,N))
 27 | z = f(x,y)
 28 | plt.contourf(x,y,z,10, cmap="Greys")
 29 | plt.contour(x,y,z,10, colors='k', linewidths=1)
 30 | plt.plot([0,0],[-1,3],color='k',linewidth=1)
 31 | plt.plot([-1,3],[0,0],color='k',linewidth=1)
 32 | plt.plot(1,0.7777778,color='k',marker='+')
 33 | 
 34 | x = xold = -0.5
 35 | y = yold = 2.9
 36 | for i in range(12):
 37 |     plt.plot([xold,x],[yold,y], marker='o', linestyle='dotted', color='k')
 38 |     xold = x
 39 |     yold = y
 40 |     x = x - 0.02 * dx(x)
 41 |     y = y - 0.02 * dy(y)
 42 | 
 43 | x = xold = 1.5
 44 | y = yold = -0.8
 45 | for i in range(12):
 46 |     plt.plot([xold,x],[yold,y], marker='s', linestyle='dotted', color='k')
 47 |     xold = x
 48 |     yold = y
 49 |     x = x - 0.02 * dx(x)
 50 |     y = y - 0.02 * dy(y)
 51 | 
 52 | x = xold = 2.7
 53 | y = yold = 2.3
 54 | for i in range(12):
 55 |     plt.plot([xold,x],[yold,y], marker='<', linestyle='dotted', color='k')
 56 |     xold = x
 57 |     yold = y
 58 |     x = x - 0.02 * dx(x)
 59 |     y = y - 0.02 * dy(y)
 60 | 
 61 | plt.xlabel("$x$")
 62 | plt.ylabel("$y$")
 63 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
 64 | plt.savefig("gd_2d_steps.png", dpi=300)
 65 | plt.show()
 66 | plt.close()
 67 | 
 68 | #  New function and partial derivatives
 69 | def f(x,y):
 70 |     return 6*x**2 + 40*y**2 - 12*x - 30*y + 3
 71 | 
 72 | def dx(x):
 73 |     return 12*x - 12
 74 | 
 75 | def dy(y):
 76 |     return 80*y - 30
 77 | 
 78 | #  Large stepsize
 79 | N = 100
 80 | x,y = np.meshgrid(np.linspace(-1,3,N), np.linspace(-1,3,N))
 81 | z = f(x,y)
 82 | plt.contourf(x,y,z,10, cmap="Greys")
 83 | plt.contour(x,y,z,10, colors='k', linewidths=1)
 84 | plt.plot([0,0],[-1,3],color='k',linewidth=1)
 85 | plt.plot([-1,3],[0,0],color='k',linewidth=1)
 86 | plt.plot(1,0.375,color='k',marker='+')
 87 | 
 88 | x = xold = -0.5
 89 | y = yold = 2.3
 90 | for i in range(14):
 91 |     plt.plot([xold,x],[yold,y], marker='o', linestyle='dotted', color='k')
 92 |     xold = x
 93 |     yold = y
 94 |     x = x - 0.02 * dx(x)
 95 |     y = y - 0.02 * dy(y)
 96 | 
 97 | x = xold = 2.3
 98 | y = yold = 2.3
 99 | for i in range(14):
100 |     plt.plot([xold,x],[yold,y], marker='s', linestyle='dotted', color='k')
101 |     xold = x
102 |     yold = y
103 |     x = x - 0.01 * dx(x)
104 |     y = y - 0.01 * dy(y)
105 | 
106 | plt.xlabel("$x$")
107 | plt.ylabel("$y$")
108 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
109 | plt.savefig("gd_2d_oscillating.png", dpi=300)
110 | plt.show()
111 | plt.close()
112 | 
113 | 


--------------------------------------------------------------------------------
/chapter_11/gd_momentum.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  gd_momentum.py
 3 | #
 4 | #  2D example of gradient descent for a function
 5 | #  with more than one minimum with momentum
 6 | #
 7 | #  RTK, 14-Feb-2021
 8 | #  Last update:  21-Feb-2021
 9 | #
10 | ################################################################
11 | 
12 | import numpy as np
13 | import matplotlib.pylab as plt
14 | 
15 | #  Function and partial derivatives
16 | def f(x,y):
17 |     return -2*np.exp(-0.5*((x+1)**2+(y-1)**2)) +  \
18 |            -np.exp(-0.5*((x-1)**2+(y+1)**2))
19 | 
20 | def dx(x,y):
21 |     return 2*(x+1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) +  \
22 |            (x-1)*np.exp(-0.5*((x-1)**2+(y+1)**2))
23 | 
24 | def dy(x,y):
25 |     return (y+1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) +  \
26 |            2*(y-1)*np.exp(-0.5*((x+1)**2+(y-1)**2))
27 | 
28 | #  Gradient descent steps
29 | N = 100
30 | x,y = np.meshgrid(np.linspace(-2,2,N), np.linspace(-2,2,N))
31 | z = f(x,y)
32 | plt.contourf(x,y,z,10, cmap="Greys")
33 | plt.contour(x,y,z,10, colors='k', linewidths=1)
34 | plt.plot([0,0],[-2,2],color='k',linewidth=1)
35 | plt.plot([-2,2],[0,0],color='k',linewidth=1)
36 | 
37 | def gd(x,y, eta,mu, steps, marker):
38 |     xold = x
39 |     yold = y
40 |     vx = vy = 0.0
41 |     for i in range(steps):
42 |         plt.plot([xold,x],[yold,y], marker=marker, linestyle='dotted', color='k')
43 |         xold = x
44 |         yold = y
45 |         vx = mu*vx - eta * dx(x,y)
46 |         vy = mu*vy - eta * dy(x,y)
47 |         x = x + vx
48 |         y = y + vy
49 | 
50 |     return x,y
51 | 
52 | 
53 | #gd(-1.5, 1.2,20, 'o')
54 | #gd( 1.5,-1.8,40, 's')
55 | #gd( 0.0, 0.0,30, '<')
56 | print("(x,y) = (%0.8f, %0.8f)" % gd( 0.7,-0.2, 0.1,  0.9, 25, '>'))
57 | print("(x,y) = (%0.8f, %0.8f)" % gd( 1.5, 1.5, 0.02, 0.9, 90, '*'))
58 | 
59 | plt.xlabel("$x$")
60 | plt.ylabel("$y$")
61 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
62 | plt.savefig("gd_momentum_steps.png", dpi=300)
63 | plt.show()
64 | plt.close()
65 | 
66 | 


--------------------------------------------------------------------------------
/chapter_11/gd_multiple.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  gd_multiple.py
 3 | #
 4 | #  2D example of gradient descent for a function
 5 | #  with more than one minimum
 6 | #
 7 | #  RTK, 14-Feb-2021
 8 | #  Last update:  14-Feb-2021
 9 | #
10 | ################################################################
11 | 
12 | import numpy as np
13 | import matplotlib.pylab as plt
14 | 
15 | #  Function and partial derivatives
16 | def f(x,y):
17 |     return -2*np.exp(-0.5*((x+1)**2+(y-1)**2)) +  \
18 |            -np.exp(-0.5*((x-1)**2+(y+1)**2))
19 | 
20 | def dx(x,y):
21 |     return 2*(x+1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) +  \
22 |            (x-1)*np.exp(-0.5*((x-1)**2+(y+1)**2))
23 | 
24 | def dy(x,y):
25 |     return (y+1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) +  \
26 |            2*(y-1)*np.exp(-0.5*((x+1)**2+(y-1)**2))
27 | 
28 | #  Gradient descent steps
29 | N = 100
30 | x,y = np.meshgrid(np.linspace(-2,2,N), np.linspace(-2,2,N))
31 | z = f(x,y)
32 | plt.contourf(x,y,z,10, cmap="Greys")
33 | plt.contour(x,y,z,10, colors='k', linewidths=1)
34 | plt.plot([0,0],[-2,2],color='k',linewidth=1)
35 | plt.plot([-2,2],[0,0],color='k',linewidth=1)
36 | 
37 | eta = 0.4
38 | 
39 | x = xold = -1.5
40 | y = yold = 1.2
41 | for i in range(9):
42 |     plt.plot([xold,x],[yold,y], marker='o', linestyle='dotted', color='k')
43 |     xold = x
44 |     yold = y
45 |     x = x - eta * dx(x,y)
46 |     y = y - eta * dy(x,y)
47 | 
48 | x = xold = 1.5
49 | y = yold = -1.8
50 | for i in range(9):
51 |     plt.plot([xold,x],[yold,y], marker='s', linestyle='dotted', color='k')
52 |     xold = x
53 |     yold = y
54 |     x = x - eta * dx(x,y)
55 |     y = y - eta * dy(x,y)
56 | 
57 | x = xold = 0.0
58 | y = yold = 0.0
59 | for i in range(20):
60 |     plt.plot([xold,x],[yold,y], marker='+', linestyle='dotted', color='k')
61 |     xold = x
62 |     yold = y
63 |     x = x - eta * dx(x,y)
64 |     y = y - eta * dy(x,y)
65 | 
66 | x = xold = 0.7
67 | y = yold = -0.2
68 | for i in range(20):
69 |     plt.plot([xold,x],[yold,y], marker='>', linestyle='dotted', color='k')
70 |     xold = x
71 |     yold = y
72 |     x = x - eta * dx(x,y)
73 |     y = y - eta * dy(x,y)
74 | 
75 | x = xold = 1.5
76 | y = yold = 1.5
77 | for i in range(30):
78 |     plt.plot([xold,x],[yold,y], marker='*', linestyle='dotted', color='k')
79 |     xold = x
80 |     yold = y
81 |     x = x - eta * dx(x,y)
82 |     y = y - eta * dy(x,y)
83 | 
84 | plt.xlabel("$x$")
85 | plt.ylabel("$y$")
86 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
87 | plt.savefig("gd_multiple_steps.png", dpi=300)
88 | plt.show()
89 | plt.close()
90 | 
91 | 


--------------------------------------------------------------------------------
/chapter_11/gd_nesterov.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  file:  gd_nesterov.py
 3 | #
 4 | #  2D example of gradient descent for a function
 5 | #  with more than one minimum and Nesterov momentum
 6 | #
 7 | #  RTK, 14-Feb-2021
 8 | #  Last update:  21-Feb-2021
 9 | #
10 | ################################################################
11 | 
12 | import numpy as np
13 | import matplotlib.pylab as plt
14 | 
15 | #  Function and partial derivatives
16 | def f(x,y):
17 |     return -2*np.exp(-0.5*((x+1)**2+(y-1)**2)) +  \
18 |            -np.exp(-0.5*((x-1)**2+(y+1)**2))
19 | 
20 | def dx(x,y):
21 |     return 2*(x+1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) +  \
22 |            (x-1)*np.exp(-0.5*((x-1)**2+(y+1)**2))
23 | 
24 | def dy(x,y):
25 |     return (y+1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) +  \
26 |            2*(y-1)*np.exp(-0.5*((x+1)**2+(y-1)**2))
27 | 
28 | #  Gradient descent steps
29 | N = 100
30 | x,y = np.meshgrid(np.linspace(-2,2,N), np.linspace(-2,2,N))
31 | z = f(x,y)
32 | plt.contourf(x,y,z,10, cmap="Greys")
33 | plt.contour(x,y,z,10, colors='k', linewidths=1)
34 | plt.plot([0,0],[-2,2],color='k',linewidth=1)
35 | plt.plot([-2,2],[0,0],color='k',linewidth=1)
36 | 
37 | def gd(x,y, eta,mu, steps, marker):
38 |     xold = x
39 |     yold = y
40 |     vx = vy = 0.0
41 |     for i in range(steps):
42 |         plt.plot([xold,x],[yold,y], marker=marker, linestyle='dotted', color='k')
43 |         xold = x
44 |         yold = y
45 |         vx = mu*vx - eta * dx(x+mu*vx,y)
46 |         vy = mu*vy - eta * dy(x,y+mu*vy)
47 |         x = x + vx
48 |         y = y + vy
49 | 
50 |     return x,y
51 | 
52 | #gd(-1.5, 1.2,20, 'o')
53 | #gd( 1.5,-1.8,40, 's')
54 | #gd( 0.0, 0.0,30, '<')
55 | print("(x,y) = (%0.8f, %0.8f)" % gd( 0.7,-0.2, 0.1,  0.9, 25, '>'))
56 | print("(x,y) = (%0.8f, %0.8f)" % gd( 1.5, 1.5, 0.02, 0.9, 90, '*'))
57 | 
58 | plt.xlabel("$x$")
59 | plt.ylabel("$y$")
60 | plt.tight_layout(pad=0, w_pad=0, h_pad=0)
61 | plt.savefig("gd_nesterov_steps.png", dpi=300)
62 | plt.show()
63 | plt.close()
64 | 
65 | 


--------------------------------------------------------------------------------
/dataset/cifar10_test_images.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/cifar10_test_images.npy


--------------------------------------------------------------------------------
/dataset/cifar10_test_labels.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/cifar10_test_labels.npy


--------------------------------------------------------------------------------
/dataset/fmnist_test_images_small.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_test_images_small.npy


--------------------------------------------------------------------------------
/dataset/fmnist_test_labels.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_test_labels.npy


--------------------------------------------------------------------------------
/dataset/fmnist_train_images_small.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_train_images_small.npy


--------------------------------------------------------------------------------
/dataset/fmnist_train_labels.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_train_labels.npy


--------------------------------------------------------------------------------
/dataset/fmnist_train_labels_vector.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_train_labels_vector.npy


--------------------------------------------------------------------------------
/dataset/test_images_full.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/test_images_full.npy


--------------------------------------------------------------------------------
/dataset/test_images_small.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/test_images_small.npy


--------------------------------------------------------------------------------
/dataset/test_labels.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/test_labels.npy


--------------------------------------------------------------------------------
/dataset/train_images_full.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_images_full.npy


--------------------------------------------------------------------------------
/dataset/train_images_small.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_images_small.npy


--------------------------------------------------------------------------------
/dataset/train_labels.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_labels.npy


--------------------------------------------------------------------------------
/dataset/train_labels_vector.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_labels_vector.npy


--------------------------------------------------------------------------------
/tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/tutorial.pdf


--------------------------------------------------------------------------------