├── LICENSE ├── README.md ├── chapter_01 └── c01_sklearn.py ├── chapter_02 ├── birthday.py ├── boston.py └── coin_flips.py ├── chapter_03 ├── continuous.py ├── discrete.py └── ricky.py ├── chapter_04 ├── correlation.py ├── exams.npy ├── hypothesis.py ├── missing.py └── quantiles.py ├── chapter_05 ├── matrixmul.py └── numpy_matmul.py ├── chapter_06 ├── bc_mahalanobis.py ├── iris_pca.py └── kl_divergence.py ├── chapter_07 └── c07_figure.py ├── chapter_08 ├── newton_1d.py ├── newton_2d.py └── spiral.py ├── chapter_09 ├── c09_nn.py └── convolve_example.py ├── chapter_10 ├── NN.py ├── build_dataset.py ├── iris.py ├── mnist.py └── nn_by_hand.py ├── chapter_11 ├── NN.py ├── NNm.py ├── fmnist.py ├── fmnist_analyze.py ├── fmnist_momentum.py ├── fmnist_no_momentum_runs.npy ├── fmnist_repeat.py ├── fmnist_w_momentum_runs.npy ├── gd_1d.py ├── gd_1d_momentum.py ├── gd_2d.py ├── gd_momentum.py ├── gd_multiple.py └── gd_nesterov.py ├── dataset ├── cifar10_test_images.npy ├── cifar10_test_labels.npy ├── fmnist_test_images_small.npy ├── fmnist_test_labels.npy ├── fmnist_train_images_small.npy ├── fmnist_train_labels.npy ├── fmnist_train_labels_vector.npy ├── test_images_full.npy ├── test_images_small.npy ├── test_labels.npy ├── train_images_full.npy ├── train_images_small.npy ├── train_labels.npy └── train_labels_vector.npy └── tutorial.pdf /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 rkneusel9 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MathForDeepLearning 2 | Source code for the book "Math for Deep Learning" 3 | 4 | Source code is organized by chapter. If you have questions 5 | or comments, please contact me: 6 | 7 | rkneuselbooks@gmail.com 8 | 9 | **Updates** 10 | - p 300, the last sentence of the penultimate paragraph should read "Here, t, an integer starting at *one*, is the timestep." 11 | - The file *boston.py* in Chapter 2 was sampling the same person repeatedly at times (thanks to ikimmit for the catch!) 12 | - The file *tutorial.pdf* is a beginner's guide to NumPy, SciPy, Matplotlib, and Pillow. 13 | - p 29, the upper limit on randint should be 365, not 364 (code updated). 14 | - p 198, the derivative of a matrix function should be scalar $\partial x$, not $\partial\mathbf{x}$. 15 | - p 257, the line above Equation 10.10 should be $\left[\frac{\partial E}{\partial y_0}\sigma'(x_0)\ \frac{\partial E}{\partial y_1}\sigma'(x_1)\ \ldots\ \right]^\top$. 16 | - Tweaked the Ch 10 code in *build_dataset.py* to conform to newer Keras versions 17 | 18 | -------------------------------------------------------------------------------- /chapter_01/c01_sklearn.py: -------------------------------------------------------------------------------- 1 | # c01_sklearn.py 2 | 3 | import numpy as np 4 | from sklearn.datasets import load_digits 5 | from sklearn.neural_network import MLPClassifier 6 | 7 | d = load_digits() 8 | digits = d["data"] 9 | labels = d["target"] 10 | 11 | N = 200 12 | idx = np.argsort(np.random.random(len(labels))) 13 | x_test, y_test = digits[idx[:N]], labels[idx[:N]] 14 | x_train, y_train = digits[idx[N:]], labels[idx[N:]] 15 | 16 | clf = MLPClassifier(hidden_layer_sizes=(128,)) 17 | clf.fit(x_train, y_train) 18 | score = clf.score(x_test, y_test) 19 | pred = clf.predict(x_test) 20 | err = np.where(y_test != pred)[0] 21 | print() 22 | print("score : ", score) 23 | print("errors:") 24 | print(" actual : ", y_test[err]) 25 | print(" predicted: ", pred[err]) 26 | print() 27 | 28 | -------------------------------------------------------------------------------- /chapter_02/birthday.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: birthday.py 3 | # 4 | # Simulate the birthday paradox 5 | # 6 | # RTK, 04-Jun-2020 7 | # Last update: 04-Jun-2020 8 | # 9 | ################################################################ 10 | 11 | import numpy as np 12 | 13 | # Simulate picking two people at random, probability of sharing a birthday 14 | N = 100000 15 | match = 0 16 | for i in range(N): 17 | a = np.random.randint(0,365) 18 | b = np.random.randint(0,365) 19 | if (a == b): 20 | match += 1 21 | print() 22 | print("Probability of a random match = %0.6f" % (match/N,)) 23 | print() 24 | 25 | # Simulate people in a room, N tests per M 26 | M = 30 27 | N = 100000 28 | for m in range(2,M+1): 29 | matches = 0 30 | for n in range(N): 31 | match = 0 32 | b = np.random.randint(0,365,m) 33 | for i in range(m): 34 | for j in range(m): 35 | if (i != j) and (b[i] == b[j]): 36 | match += 1 37 | if (match != 0): 38 | matches += 1 39 | print("%2d people: probability of at least one match %0.6f" % (m, matches/N)) 40 | 41 | -------------------------------------------------------------------------------- /chapter_02/boston.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | P = 50 3 | B = 4 4 | M = 3 5 | N = 100000 6 | 7 | nb = 0 8 | 9 | for i in range(N): 10 | s = np.random.choice(P,M, replace=False) 11 | fail = False 12 | for t in range(M): 13 | if (s[t] < B): 14 | fail = True 15 | if (not fail): 16 | nb += 1 17 | 18 | print() 19 | print("Prob no Boston in the fall = %0.4f" % (nb/N,)) 20 | print() 21 | 22 | -------------------------------------------------------------------------------- /chapter_02/coin_flips.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: coin_flips.py 3 | # 4 | # Probability of getting 0,1,2, or 3 heads 5 | # in three flips of a coin. 6 | # 7 | # RTK, 05-Jun-2020 8 | # Last update: 05-Jun-2020 9 | # 10 | ################################################################ 11 | 12 | import numpy as np 13 | 14 | N = 1000000 15 | M = 4 16 | 17 | heads = np.zeros(M+1) 18 | 19 | for i in range(N): 20 | flips = np.random.randint(0,2,M) 21 | h, _ = np.bincount(flips, minlength=2) 22 | heads[h] += 1 23 | 24 | prob = heads / N 25 | 26 | print() 27 | print("Probabilities: %s" % np.array2string(prob)) 28 | print() 29 | 30 | -------------------------------------------------------------------------------- /chapter_03/continuous.py: -------------------------------------------------------------------------------- 1 | # plots of key continuous distributions 2 | import numpy as np 3 | import matplotlib.pylab as plt 4 | 5 | N = 10000000 6 | B = 100 7 | x = np.arange(B)/B 8 | 9 | # uniform 10 | t = np.random.random(N) 11 | u = np.histogram(t, bins=B)[0] 12 | u = u / u.sum() 13 | 14 | # normal 15 | t = np.random.normal(0, 1, size=N) 16 | n = np.histogram(t, bins=B)[0] 17 | n = n / n.sum() 18 | 19 | # gamma 20 | t = np.random.gamma(5.0, size=N) 21 | g = np.histogram(t, bins=B)[0] 22 | g = g / g.sum() 23 | 24 | # beta 25 | t = np.random.beta(5,2, size=N) 26 | b = np.histogram(t, bins=B)[0] 27 | b = b / b.sum() 28 | 29 | plt.plot(x,u,color='k',linestyle='solid') 30 | plt.plot(x,n,color='k',linestyle='dotted') 31 | plt.plot(x,g,color='k',linestyle='dashed') 32 | plt.plot(x,b,color='k',linestyle='dashdot') 33 | plt.ylabel("Probability") 34 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 35 | #plt.savefig("continuous.png", dpi=300) 36 | plt.show() 37 | plt.close() 38 | 39 | # central limit theorem 40 | M = 10000 41 | m = np.zeros(M) 42 | for i in range(M): 43 | t = np.random.beta(5,2,size=M) 44 | m[i] = t.mean() 45 | print("Mean of the means = %0.7f" % m.mean()) 46 | 47 | h,x = np.histogram(m, bins=B) 48 | h = h / h.sum() 49 | plt.bar(x[:-1]+0.5*(x[1]-x[0]), h, width=0.8*(x[1]-x[0])) 50 | plt.xlabel("Mean") 51 | plt.ylabel("Probability") 52 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 53 | #plt.savefig("central_limit.png", dpi=300) 54 | plt.show() 55 | plt.close() 56 | 57 | from fldrf import fldr_preprocess_float_c 58 | from fldr import fldr_sample 59 | 60 | z = fldr_preprocess_float_c([0.1,0.6,0.1,0.1,0.1]) 61 | m = np.zeros(M) 62 | for i in range(M): 63 | t = np.array([fldr_sample(z) for i in range(M)]) 64 | m[i] = t.mean() 65 | print("Mean of the means = %0.7f" % m.mean()) 66 | 67 | h,x = np.histogram(m, bins=B) 68 | h = h / h.sum() 69 | plt.bar(x[:-1]+0.5*(x[1]-x[0]), h, width=0.8*(x[1]-x[0])) 70 | plt.xlabel("Mean") 71 | plt.ylabel("Probability") 72 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 73 | #plt.savefig("central_limit_fldr.png", dpi=300) 74 | plt.show() 75 | plt.close() 76 | 77 | t = np.array([fldr_sample(z) for i in range(M)]) 78 | h = np.bincount(t) 79 | h = h / h.sum() 80 | plt.bar(np.arange(5),h, width=0.8) 81 | plt.xlabel("Value") 82 | plt.ylabel("Probability") 83 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 84 | #plt.savefig("pmf_fldr.png", dpi=300) 85 | plt.show() 86 | plt.close() 87 | 88 | # Law of large numbers 89 | m = [] 90 | for n in np.linspace(1,8,30): 91 | t = np.random.normal(1,1,size=int(10**n)) 92 | m.append(t.mean()) 93 | 94 | plt.plot(np.linspace(1,8,30), m) 95 | plt.plot([1,8],[1,1], linestyle="--", color='k') 96 | plt.xlabel("Exponent $10^n$") 97 | plt.ylabel("Single sample mean") 98 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 99 | #plt.savefig("large_numbers.png", dpi=300) 100 | plt.show() 101 | 102 | 103 | -------------------------------------------------------------------------------- /chapter_03/discrete.py: -------------------------------------------------------------------------------- 1 | # discrete probability distributions 2 | import numpy as np 3 | import matplotlib.pylab as plt 4 | from scipy.datasets import face 5 | from fldr import * 6 | from fldrf import * 7 | 8 | # binomial 9 | q = np.random.binomial(10, 0.7, 1000) 10 | h = np.histogram(q, bins=q.max()-q.min()+1)[0] 11 | h = h / h.sum() 12 | x = np.arange(q.min(), q.max()+1) 13 | plt.bar(x,h,width=0.8) 14 | q = np.random.binomial(10, 0.3, 1000) 15 | h = np.histogram(q, bins=q.max()-q.min()+1)[0] 16 | h = h / h.sum() 17 | x = np.arange(q.min(), q.max()+1) 18 | plt.bar(x,h,width=0.8) 19 | plt.show() 20 | 21 | # FLDR 22 | im = face(True) 23 | b = np.bincount(im.ravel(), minlength=256) 24 | b = b / b.sum() 25 | x = fldr_preprocess_float_c(list(b)) 26 | t = [fldr_sample(x) for i in range(500000)] 27 | q = np.bincount(t, minlength=256) 28 | q = q / q.sum() 29 | 30 | plt.plot(b, color='k') 31 | plt.plot(q, linestyle=(0, (1,1)), color='k') 32 | plt.xlabel("Sample") 33 | plt.ylabel("Probability") 34 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 35 | #plt.savefig("fldr_samples.png", dpi=300) 36 | plt.show() 37 | 38 | -------------------------------------------------------------------------------- /chapter_03/ricky.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pylab as plt 3 | import scipy.datasets 4 | from PIL import Image 5 | 6 | im = scipy.datasets.face(True)[:512,512:] 7 | Image.fromarray(im).save("ricky.png") 8 | hr,xr = np.histogram(im, bins=256) 9 | hr = hr/hr.sum() 10 | im = scipy.datasets.ascent().astype("uint8") 11 | Image.fromarray(im).save("ascent.png") 12 | ha,xa = np.histogram(im, bins=256) 13 | ha = ha/ha.sum() 14 | plt.plot(xr[:-1],hr, color='k', label="Face") 15 | plt.plot(xa[:-1],ha, linestyle=(0,(1,1)), color='k', label="Ascent") 16 | plt.legend(loc="upper right") 17 | plt.xlabel("Gray level") 18 | plt.ylabel("Probability") 19 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 20 | plt.savefig("ricky_probability.png", dpi=300) 21 | plt.show() 22 | plt.close() 23 | 24 | 25 | -------------------------------------------------------------------------------- /chapter_04/correlation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pylab as plt 3 | 4 | np.random.seed(8675309) 5 | 6 | N = 100 7 | x = np.linspace(0,1,N) + (np.random.random(N)-0.5) 8 | y = np.random.random(N)*x 9 | z = -0.1*np.random.random(N)*x 10 | 11 | plt.plot(np.linspace(0,1,N),x,color='r') 12 | plt.plot(np.linspace(0,1,N),y,color='g') 13 | plt.plot(np.linspace(0,1,N),z,color='b') 14 | plt.plot(np.linspace(0,1,N)[::5],x[::5],color='r',marker='o',linestyle='none',label='X') 15 | plt.plot(np.linspace(0,1,N)[::5],y[::5],color='g',marker='s',linestyle='none',label='Y') 16 | plt.plot(np.linspace(0,1,N)[::5],z[::5],color='b',marker='*',linestyle='none',label='Z') 17 | plt.legend(loc="upper left") 18 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 19 | plt.savefig("correlation_xyz_plot.png", dpi=300) 20 | plt.close() 21 | 22 | plt.plot(x,y,marker='o',linestyle='none',color='r',label="X,Y") 23 | plt.plot(x,z,marker='s',linestyle='none',color='g',label="X,Z") 24 | plt.plot(y,z,marker='*',linestyle='none',color='b',label="Y,Z") 25 | plt.legend(loc="upper left") 26 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 27 | plt.savefig("correlation_scatter_plot.png", dpi=300) 28 | plt.close() 29 | 30 | 31 | from scipy.stats import pearsonr, spearmanr 32 | 33 | print("Pearson(x,y) :", pearsonr(x,y)[0]) 34 | print("Spearman(x,y):", spearmanr(x,y)[0]) 35 | print() 36 | print("Pearson(x,z) :", pearsonr(x,z)[0]) 37 | print("Spearman(x,z):", spearmanr(x,z)[0]) 38 | print() 39 | print("Pearson(y,z) :", pearsonr(y,z)[0]) 40 | print("Spearman(y,z):", spearmanr(y,z)[0]) 41 | print() 42 | 43 | def pearson(x,y): 44 | exy = (x*y).mean() 45 | ex = x.mean() 46 | ey = y.mean() 47 | exx = (x*x).mean() 48 | ex2 = x.mean()**2 49 | eyy = (y*y).mean() 50 | ey2 = y.mean()**2 51 | return (exy - ex*ey)/(np.sqrt(exx-ex2)*np.sqrt(eyy-ey2)) 52 | 53 | print("pearson(x,y):", pearson(x,y)) 54 | print("pearson(x,z):", pearson(x,z)) 55 | print("pearson(y,z):", pearson(y,z)) 56 | print() 57 | 58 | d = np.vstack((x,y,z)) 59 | print(np.corrcoef(d)) 60 | print() 61 | 62 | from sklearn.datasets import load_sample_image 63 | china = load_sample_image('china.jpg') 64 | a = china[230,:,1].astype("float64") 65 | b = china[231,:,1].astype("float64") 66 | c = china[400,:,1].astype("float64") 67 | d = np.random.random(640) 68 | print("china(a,b): ", pearson(a,b)) 69 | print("china(a,c): ", pearson(a,c)) 70 | print("china(a,d): ", pearson(a,d)) 71 | print() 72 | 73 | # spearman 74 | def spearman(x,y): 75 | n = len(x) 76 | t = x[np.argsort(x)] 77 | rx = [] 78 | for i in range(n): 79 | rx.append(np.where(x[i] == t)[0][0]) 80 | rx = np.array(rx, dtype="float64") 81 | t = y[np.argsort(y)] 82 | ry = [] 83 | for i in range(n): 84 | ry.append(np.where(y[i] == t)[0][0]) 85 | ry = np.array(ry, dtype="float64") 86 | d = rx - ry 87 | return 1.0 - (6.0/(n*(n*n-1)))*(d**2).sum() 88 | 89 | print(spearman(x,y), spearmanr(x,y)[0]) 90 | print(spearman(x,z), spearmanr(x,z)[0]) 91 | print(spearman(y,z), spearmanr(y,z)[0]) 92 | print() 93 | 94 | a = np.linspace(-20,20,1000) 95 | b = 1.0 / (1.0 + np.exp(-a)) 96 | print(pearson(a,b)) 97 | print(spearman(a,b)) 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /chapter_04/exams.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/chapter_04/exams.npy -------------------------------------------------------------------------------- /chapter_04/hypothesis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pylab as plt 3 | 4 | np.random.seed(65535) 5 | a = np.random.normal(85,6,50).astype("int32") 6 | a[np.where(a > 100)] = 100 7 | b = np.random.normal(82,7,50).astype("int32") 8 | b[np.where(b > 100)] = 100 9 | 10 | print(a) 11 | print() 12 | print(b) 13 | print() 14 | 15 | print("With means of 82 & 85:") 16 | from scipy.stats import ttest_ind 17 | t,p = ttest_ind(a,b,equal_var=False) 18 | print("(t=%0.5f, p=%0.5f)" % (t,p)) 19 | 20 | from scipy.stats import mannwhitneyu 21 | u,p = mannwhitneyu(a,b) 22 | print("(U=%0.5f, p=%0.5f)" % (u,p)) 23 | 24 | plt.boxplot((a,b)) 25 | plt.xlabel("Group") 26 | plt.ylabel("Test score") 27 | plt.savefig("hypothesis_box_plot.png", dpi=300) 28 | plt.close() 29 | 30 | h,x = np.histogram(a, bins=10) 31 | plt.bar(x[:-1],h, width=0.4*(x[1]-x[0]), label='Group A') 32 | h,y = np.histogram(b, bins=10) 33 | plt.bar(y[:-1]+(x[1]-x[0])/2, h, width=0.4*(x[1]-x[0]), label='Group B') 34 | plt.legend(loc='upper left') 35 | plt.ylabel('Counts') 36 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 37 | #plt.savefig("hypothesis_bar_plot.png", dpi=300) 38 | plt.close() 39 | 40 | # CI for Welch's t-test 41 | from scipy import stats 42 | 43 | def CI(a, b, alpha=0.05): 44 | n1, n2 = len(a), len(b) 45 | s1, s2 = np.std(a, ddof=1)**2, np.std(b, ddof=1)**2 46 | df = (s1/n1 + s2/n2)**2 / ((s1/n1)**2/(n1-1) + (s2/n2)**2/(n2-1)) 47 | tc = stats.t.ppf(1 - alpha/2, df) 48 | lo = (a.mean()-b.mean()) - tc*np.sqrt(s1/n1 + s2/n2) 49 | hi = (a.mean()-b.mean()) + tc*np.sqrt(s1/n1 + s2/n2) 50 | return lo, hi 51 | 52 | lo, hi = CI(a, b) 53 | print("CI95 = (%0.5f, %0.5f)" % (lo,hi)) 54 | 55 | # Cohen's d 56 | def Cohen_d(a,b): 57 | s1 = np.std(a, ddof=1)**2 58 | s2 = np.std(b, ddof=1)**2 59 | return (a.mean() - b.mean()) / np.sqrt(0.5*(s1+s2)) 60 | 61 | print("Cohen's d = %0.5f" % Cohen_d(a,b)) 62 | 63 | # change the means to be one step closer 64 | np.random.seed(65535) 65 | a = np.random.normal(85,6,50).astype("int32") 66 | a[np.where(a > 100)] = 100 67 | b = np.random.normal(83,7,50).astype("int32") 68 | b[np.where(b > 100)] = 100 69 | 70 | print("With means of 83 & 85:") 71 | t,p = ttest_ind(a,b,equal_var=False) 72 | print("(t=%0.5f, p=%0.5f)" % (t,p)) 73 | u,p = mannwhitneyu(a,b) 74 | print("(U=%0.5f, p=%0.5f)" % (u,p)) 75 | 76 | # means one step further apart 77 | np.random.seed(65535) 78 | a = np.random.normal(85,6,50).astype("int32") 79 | a[np.where(a > 100)] = 100 80 | b = np.random.normal(81,7,50).astype("int32") 81 | b[np.where(b > 100)] = 100 82 | 83 | print("With means of 81 & 85:") 84 | t,p = ttest_ind(a,b,equal_var=False) 85 | print("(t=%0.5f, p=%0.5f)" % (t,p)) 86 | u,p = mannwhitneyu(a,b) 87 | print("(U=%0.5f, p=%0.5f)" % (u,p)) 88 | 89 | # Effect of sample size 90 | np.random.seed(65535) 91 | pt = [] 92 | et = [] 93 | pm = [] 94 | em = [] 95 | M = 25 96 | n = [20,40,60,80,100,120,140,160,180,200,250,300,350,400,450,500,750,1000] 97 | for i in n: 98 | p = [] 99 | t = [] 100 | for j in range(M): 101 | a = np.random.normal(85,6,i).astype("int32") 102 | a[np.where(a > 100)] = 100 103 | b = np.random.normal(84,7,i).astype("int32") 104 | b[np.where(b > 100)] = 100 105 | t.append(ttest_ind(a,b,equal_var=False)[1]) 106 | p.append(mannwhitneyu(a,b)[1]) 107 | pt.append(np.array(t).mean()) 108 | et.append(np.array(t).std(ddof=1)/np.sqrt(M)) 109 | pm.append(np.array(p).mean()) 110 | em.append(np.array(p).std(ddof=1)/np.sqrt(M)) 111 | if (i==1000): 112 | print("n=1000 Cohen's d = %0.5f" % Cohen_d(a,b)) 113 | pt = np.array(pt) 114 | pm = np.array(pm) 115 | et = np.array(et) 116 | em = np.array(em) 117 | plt.errorbar(n,pt,et,marker='o',label='t-test') 118 | plt.errorbar(n,pm,em,marker='s',label='Mann-Whitney U') 119 | plt.xlabel('Sample size') 120 | plt.ylabel("$p$-value") 121 | plt.legend(loc="upper right") 122 | plt.tight_layout(pad=0,w_pad=0,h_pad=0) 123 | plt.savefig("hypothesis_pvalue_plot.png", dpi=300) 124 | 125 | -------------------------------------------------------------------------------- /chapter_04/missing.py: -------------------------------------------------------------------------------- 1 | # Missing data example 2 | import numpy as np 3 | import matplotlib.pylab as plt 4 | 5 | N = 1000 6 | np.random.seed(73939133) 7 | x = np.zeros((N,4)) 8 | x[:,0] = 5*np.random.random(N) 9 | x[:,1] = np.random.normal(10,1,size=N) 10 | x[:,2] = 3*np.random.beta(5,2,N) 11 | x[:,3] = 0.3*np.random.lognormal(size=N) 12 | 13 | plt.boxplot(x) 14 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 15 | plt.savefig("missing_box_plot.png", dpi=300) 16 | plt.close() 17 | 18 | # Make 5% of the values NaN 19 | i = np.random.randint(0,N, size=int(0.05*N)) 20 | x[i,0] = np.nan 21 | i = np.random.randint(0,N, size=int(0.05*N)) 22 | x[i,1] = np.nan 23 | i = np.random.randint(0,N, size=int(0.05*N)) 24 | x[i,2] = np.nan 25 | i = np.random.randint(0,N, size=int(0.05*N)) 26 | x[i,3] = np.nan 27 | 28 | # Do we have NaNs in feature 2? 29 | if (np.isnan(x[:,2]).sum() != 0): 30 | print("NaNs present") 31 | i = np.where(np.isnan(x[:,2]) == False) 32 | z = x[i,2] 33 | mn,md,s = z.mean(), np.median(z), z.std(ddof=1) 34 | hh,xx = np.histogram(z, bins=40) 35 | plt.bar(xx[:-1],hh, width=0.8*(xx[1]-xx[0])) 36 | plt.xlabel("x") 37 | plt.ylabel("Count") 38 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 39 | plt.savefig("missing_feature_2_plot.png", dpi=300) 40 | plt.close() 41 | 42 | i = np.where(np.isnan(x[:,2]) == True) 43 | x[i,2] = md # replace w/median 44 | 45 | print("non-NaN mean, std = ", z.mean(), z.std(ddof=1)) 46 | print("updated mean, std = ", x[:,2].mean(), x[:,2].std(ddof=1)) 47 | 48 | hh,xx = np.histogram(x[:,2], bins=40) 49 | plt.bar(xx[:-1],hh, width=0.8*(xx[1]-xx[0])) 50 | plt.xlabel("x") 51 | plt.ylabel("Count") 52 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 53 | plt.savefig("missing_feature_2_updated_plot.png", dpi=300) 54 | plt.close() 55 | 56 | # Do the same to the others 57 | i = np.where(np.isnan(x[:,0]) == False) 58 | m = np.median(x[i,0]) 59 | i = np.where(np.isnan(x[:,0]) == True) 60 | x[i,0] = m 61 | 62 | i = np.where(np.isnan(x[:,1]) == False) 63 | m = np.median(x[i,1]) 64 | i = np.where(np.isnan(x[:,1]) == True) 65 | x[i,1] = m 66 | 67 | i = np.where(np.isnan(x[:,3]) == False) 68 | m = np.median(x[i,3]) 69 | i = np.where(np.isnan(x[:,3]) == True) 70 | x[i,3] = m 71 | 72 | plt.boxplot(x) 73 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 74 | plt.savefig("missing_updated_box_plot.png", dpi=300) 75 | plt.close() 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /chapter_04/quantiles.py: -------------------------------------------------------------------------------- 1 | # 2 | # Use a synthetic exam dataset to illustrate quantiles 3 | # 4 | # RTK, 03-Jul-2020 5 | # Last update: 03-Jul-2020 6 | # 7 | ################################################################ 8 | 9 | import numpy as np 10 | import matplotlib.pylab as plt 11 | 12 | d = np.load("exams.npy") 13 | p = d[:,0].astype("uint32") 14 | q = np.quantile(p, [0.0, 0.25, 0.5, 0.75, 1.0]) 15 | 16 | print() 17 | print("Quartiles: ", q) 18 | print() 19 | print("Counts by quartile:") 20 | print(" %d" % ((q[0] <= p) & (p < q[1])).sum()) 21 | print(" %d" % ((q[1] <= p) & (p < q[2])).sum()) 22 | print(" %d" % ((q[2] <= p) & (p < q[3])).sum()) 23 | print(" %d" % ((q[3] <= p) & (p < q[4])).sum()) 24 | print() 25 | 26 | h = np.bincount(p, minlength=100) 27 | x = np.arange(101) 28 | plt.bar(x,h, width=0.8*(x[1]-x[0])) 29 | n = 1.1*h.max() 30 | plt.plot([q[1],q[1]],[0,n], linewidth=3, color='k') 31 | plt.plot([q[2],q[2]],[0,n], linewidth=3, color='k') 32 | plt.plot([q[3],q[3]],[0,n], linewidth=3, color='k') 33 | plt.xlim((p.min()-1,p.max()+1)) 34 | plt.ylabel("Count") 35 | plt.tight_layout(pad=0,w_pad=0,h_pad=0) 36 | plt.savefig("quantiles_plot.png", dpi=300) 37 | #plt.show() 38 | plt.close() 39 | 40 | # box plot 41 | plt.boxplot(d) 42 | plt.xlabel("Test") 43 | plt.ylabel("Scores") 44 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 45 | plt.savefig("box_plot.png", dpi=300) 46 | #plt.show() 47 | plt.close() 48 | 49 | plt.boxplot(p) 50 | plt.ylabel("Scores") 51 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 52 | plt.savefig("box_plot_1.png", dpi=300) 53 | plt.show() 54 | plt.close() 55 | 56 | -------------------------------------------------------------------------------- /chapter_05/matrixmul.py: -------------------------------------------------------------------------------- 1 | # O(n^3) matrix multiplication 2 | 3 | import time 4 | import numpy as np 5 | 6 | def matrixmul(A,B): 7 | I,K = A.shape 8 | J = B.shape[1] 9 | C = np.zeros((I,J), dtype=A.dtype) 10 | for i in range(I): 11 | for j in range(J): 12 | for k in range(K): 13 | C[i,j] += A[i,k]*B[k,j] 14 | return C 15 | 16 | A = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]]) 17 | B = np.array([[1,2],[3,4],[5,6]]) 18 | N = 100000 19 | 20 | s = time.time() 21 | for i in range(N): 22 | C = np.matmul(A,B) 23 | e = time.time() 24 | print("np.matmul: %0.6f" % (e-s,)) 25 | 26 | s = time.time() 27 | for i in range(N): 28 | C = matrixmul(A,B) 29 | e = time.time() 30 | print("matrixmul: %0.6f" % (e-s,)) 31 | 32 | -------------------------------------------------------------------------------- /chapter_05/numpy_matmul.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: numpy_matmul.py 3 | # 4 | # NumPy matrix multiplication examples 5 | # 6 | # RTK, 12-Apr-2020 7 | # Last update: 12-Apr-2020 8 | # 9 | ################################################################ 10 | 11 | import numpy as np 12 | 13 | def dot(a,b): 14 | try: 15 | return np.dot(a,b) 16 | except: 17 | return "fails" 18 | 19 | def matmul(a,b): 20 | try: 21 | return np.matmul(a,b) 22 | except: 23 | return "fails" 24 | 25 | # the different vectors and matrices 26 | a1 = np.array([1,2,3]) 27 | ar = a1.reshape((1,3)) 28 | ac = a1.reshape((3,1)) 29 | b1 = np.array([1,2,3]) 30 | br = b1.reshape((1,3)) 31 | bc = b1.reshape((3,1)) 32 | A = np.array([[1,2,3],[4,5,6],[7,8,9]]) 33 | B = np.array([[9,8,7],[6,5,4],[3,2,1]]) 34 | 35 | print() 36 | print("np.dot examples:") 37 | print("dot(a1,b1):"); print(dot(a1,b1)) 38 | print("dot(a1,br):"); print(dot(a1,br)) 39 | print("dot(a1,bc):"); print(dot(a1,bc)) 40 | print("dot(ar,b1):"); print(dot(ar,b1)) 41 | print("dot(ar,br):"); print(dot(ar,br)) 42 | print("dot(ar,bc):"); print(dot(ar,bc)) 43 | print("dot(ac,b1):"); print(dot(ac,b1)) 44 | print("dot(ac,br):"); print(dot(ac,br)) 45 | print("dot(ac,bc):"); print(dot(ac,bc)) 46 | print("dot(A,a1):"); print(dot(A,a1)) 47 | print("dot(A,ar):"); print(dot(A,ar)) 48 | print("dot(A,ac):"); print(dot(A,ac)) 49 | print("dot(a1,A):"); print(dot(a1,A)) 50 | print("dot(ar,A):"); print(dot(ar,A)) 51 | print("dot(ac,A):"); print(dot(ac,A)) 52 | print("dot(A,B):"); print(dot(A,B)) 53 | print() 54 | 55 | print() 56 | print("np.matmul examples:") 57 | print("matmul(a1,b1):"); print(matmul(a1,b1)) 58 | print("matmul(a1,br):"); print(matmul(a1,br)) 59 | print("matmul(a1,bc):"); print(matmul(a1,bc)) 60 | print("matmul(ar,b1):"); print(matmul(ar,b1)) 61 | print("matmul(ar,br):"); print(matmul(ar,br)) 62 | print("matmul(ar,bc):"); print(matmul(ar,bc)) 63 | print("matmul(ac,b1):"); print(matmul(ac,b1)) 64 | print("matmul(ac,br):"); print(matmul(ac,br)) 65 | print("matmul(ac,bc):"); print(matmul(ac,bc)) 66 | print("matmul(A,a1):"); print(matmul(A,a1)) 67 | print("matmul(A,ar):"); print(matmul(A,ar)) 68 | print("matmul(A,ac):"); print(matmul(A,ac)) 69 | print("matmul(a1,A):"); print(matmul(a1,A)) 70 | print("matmul(ar,A):"); print(matmul(ar,A)) 71 | print("matmul(ac,A):"); print(matmul(ac,A)) 72 | print("matmul(A,B):"); print(matmul(A,B)) 73 | print() 74 | 75 | -------------------------------------------------------------------------------- /chapter_06/bc_mahalanobis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import datasets 3 | from scipy.spatial.distance import mahalanobis 4 | 5 | bc = datasets.load_breast_cancer() 6 | d = bc.data 7 | l = bc.target 8 | i = np.argsort(np.random.random(len(d))) 9 | d = d[i] 10 | l = l[i] 11 | xtrn, ytrn = d[:400], l[:400] 12 | xtst, ytst = d[400:], l[400:] 13 | 14 | i = np.where(ytrn == 0) 15 | m0 = xtrn[i].mean(axis=0) 16 | i = np.where(ytrn == 1) 17 | m1 = xtrn[i].mean(axis=0) 18 | S = np.cov(xtrn, rowvar=False) 19 | SI= np.linalg.inv(S) 20 | 21 | def score(xtst, ytst, m, SI): 22 | nc = 0 23 | for i in range(len(ytst)): 24 | d = np.array([mahalanobis(xtst[i],m[0],SI), 25 | mahalanobis(xtst[i],m[1],SI)]) 26 | c = np.argmin(d) 27 | if (c == ytst[i]): 28 | nc += 1 29 | return nc / len(ytst) 30 | 31 | mscore = score(xtst, ytst, [m0,m1], SI) 32 | escore = score(xtst, ytst, [m0,m1], np.identity(30)) 33 | print("Mahalanobis score = %0.4f" % mscore) 34 | print("Euclidean score = %0.4f" % escore) 35 | 36 | -------------------------------------------------------------------------------- /chapter_06/iris_pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pylab as plt 3 | from sklearn.decomposition import PCA, TruncatedSVD 4 | from sklearn.datasets import load_iris 5 | from scipy.linalg import svd as SVD 6 | 7 | iris = load_iris().data.copy() 8 | labels = load_iris().target.copy() 9 | m = iris.mean(axis=0) 10 | s = iris.std(axis=0) 11 | ir = iris - m 12 | cv = np.cov(ir, rowvar=False) 13 | val, vec = np.linalg.eig(cv) 14 | val = np.abs(val) 15 | idx = np.argsort(val)[::-1] 16 | ex = val[idx] / val.sum() 17 | print("fraction explained: ", ex) 18 | w = np.vstack((vec[:,idx[0]],vec[:,idx[1]])) 19 | d = np.zeros((ir.shape[0],2)) 20 | for i in range(ir.shape[0]): 21 | d[i,:] = np.dot(w,ir[i]) 22 | 23 | markers = np.array(["o","s","+"])[labels] 24 | for i in range(len(labels)): 25 | plt.plot(d[i,0], d[i,1], marker=markers[i], color='k', linestyle='none') 26 | plt.xlabel("$x_0$") 27 | plt.ylabel("$x_1$") 28 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 29 | plt.savefig("iris_pca.png", dpi=300) 30 | plt.close() 31 | 32 | pca = PCA(n_components=2) 33 | pca.fit(ir) 34 | dd = pca.fit_transform(ir) 35 | for i in range(len(labels)): 36 | plt.plot(dd[i,0], dd[i,1], marker=markers[i], color='k', linestyle='none') 37 | plt.xlabel("$x_0$") 38 | plt.ylabel("$x_1$") 39 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 40 | plt.savefig("iris_pca_sklearn.png", dpi=300) 41 | plt.close() 42 | 43 | svd = TruncatedSVD(n_components=2) 44 | svd.fit(ir) 45 | s = svd.fit_transform(ir) 46 | for i in range(len(labels)): 47 | plt.plot(s[i,0], s[i,1], marker=markers[i], color='k', linestyle='none') 48 | plt.xlabel("$x_0$") 49 | plt.ylabel("$x_1$") 50 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 51 | plt.savefig("iris_pca_sklearn_svd.png", dpi=300) 52 | plt.close() 53 | 54 | # truncate manually - exact result as sklearn PCA 55 | n_elements = 2 56 | u,s,vt = SVD(ir) 57 | S = np.zeros((ir.shape[0], ir.shape[1])) 58 | for i in range(4): 59 | S[i,i] = s[i] 60 | S = S[:, :n_elements] 61 | T = u @ S 62 | for i in range(len(labels)): 63 | plt.plot(T[i,0], T[i,1], marker=markers[i], color='k', linestyle='none') 64 | plt.xlabel("$x_0$") 65 | plt.ylabel("$x_1$") 66 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 67 | plt.savefig("iris_pca_truncated_svd.png", dpi=300) 68 | 69 | 70 | -------------------------------------------------------------------------------- /chapter_06/kl_divergence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.special import rel_entr 3 | import matplotlib.pylab as plt 4 | 5 | N = 1000000 6 | p = np.random.randint(0,13,size=N) 7 | p = np.bincount(p) 8 | p = p / p.sum() 9 | q = np.random.binomial(12,0.9,size=N) 10 | q = np.bincount(q) 11 | q = q / q.sum() 12 | w = np.random.binomial(12,0.4,size=N) 13 | w = np.bincount(w) 14 | w = w / w.sum() 15 | print(rel_entr(q,p).sum()) 16 | print(rel_entr(w,p).sum()) 17 | plt.bar(np.arange(13),p,0.333,hatch="///",edgecolor='k') 18 | plt.bar(np.arange(13)+0.333,q,0.333,hatch="---",edgecolor='k') 19 | plt.bar(np.arange(13)+0.666,w,0.333,hatch="\\\\",edgecolor='k') 20 | plt.xlabel("Value") 21 | plt.ylabel("Proportion") 22 | plt.tight_layout(pad=0,h_pad=0,w_pad=0) 23 | plt.savefig("kl_divergence.png", dpi=300) 24 | plt.show() 25 | 26 | -------------------------------------------------------------------------------- /chapter_07/c07_figure.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: c07_figure.py 3 | # 4 | # Plot of x^2+xy+y^2 and gradient field. 5 | # 6 | # RTK, 25-Mar-2020 7 | # Last update: 26-Mar-2020 8 | # 9 | ################################################################ 10 | 11 | import numpy as np 12 | from mpl_toolkits.mplot3d import Axes3D 13 | import matplotlib.pylab as plt 14 | 15 | # Function plot 16 | x = np.linspace(-1.0,1.0,50) 17 | y = np.linspace(-1.0,1.0,50) 18 | xx = [] 19 | yy = [] 20 | zz = [] 21 | 22 | for i in range(50): 23 | for j in range(50): 24 | xx.append(x[i]) 25 | yy.append(y[j]) 26 | zz.append(x[i]*x[i]+x[i]*y[j]+y[j]*y[j]) 27 | x = np.array(xx) 28 | y = np.array(yy) 29 | z = np.array(zz) 30 | 31 | fig = plt.figure() 32 | ax = fig.add_subplot(111, projection='3d') 33 | ax.scatter(x, y, z, marker='.', s=2, color='b') 34 | ax.view_init(30, 50) 35 | ax.set_xlabel("$x$") 36 | ax.set_ylabel("$y$") 37 | ax.set_zlabel("$z$") 38 | plt.draw() 39 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 40 | plt.savefig("c05fig03a.png", dpi=300) 41 | ax.view_init(30,20) 42 | ax.set_xlabel("$x$") 43 | ax.set_ylabel("$y$") 44 | ax.set_zlabel("$z$") 45 | plt.draw() 46 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 47 | plt.savefig("c05fig03b.png", dpi=300) 48 | plt.close() 49 | 50 | # Quiver plot - 2D 51 | fig = plt.figure() 52 | ax = fig.add_subplot(111) 53 | x = np.linspace(-1.0,1.0,20) 54 | y = np.linspace(-1.0,1.0,20) 55 | xv, yv = np.meshgrid(x, y, indexing='ij', sparse=False) 56 | dx = 2*xv + yv 57 | dy = 2*yv + xv 58 | ax.quiver(xv, yv, dx, dy, color='b') 59 | ax.set_xlabel("$x$") 60 | ax.set_ylabel("$y$") 61 | plt.axis('equal') 62 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 63 | plt.savefig("c05fig03c.png", dpi=300) 64 | plt.close() 65 | 66 | 67 | -------------------------------------------------------------------------------- /chapter_08/newton_1d.py: -------------------------------------------------------------------------------- 1 | # Newton's method in 1D 2 | import numpy as np 3 | 4 | def f(x): 5 | return 2.0 - x*x 6 | 7 | def d(x): 8 | return -2.0*x 9 | 10 | x = 1.0 11 | 12 | for i in range(5): 13 | x = x - f(x)/d(x) 14 | print("%2d: %0.16f" % (i+1,x)) 15 | 16 | print() 17 | print("NumPy says sqrt(2) = %0.16f for a deviation of %0.16f" % (np.sqrt(2), np.abs(np.sqrt(2)-x))) 18 | print() 19 | 20 | -------------------------------------------------------------------------------- /chapter_08/newton_2d.py: -------------------------------------------------------------------------------- 1 | # 2D Newton's method 2 | import numpy as np 3 | 4 | def f(x): 5 | x0,x1 = x[0,0],x[1,0] 6 | return np.array([[4*x0-2*x0*x1],[2*x1+x0*x1-2*x1**2]]) 7 | 8 | def JI(x): 9 | x0,x1 = x[0,0],x[1,0] 10 | d = (4-2*x1)*(2-x0-4*x1)+2*x0*x1 11 | return (1/d)*np.array([[2-x0-4*x1,2*x0],[-x1,4-2*x0]]) 12 | 13 | x0 = float(input("x0: ")) 14 | x1 = float(input("x1: ")) 15 | x = np.array([[x0],[x1]]) 16 | 17 | N = 20 18 | for i in range(N): 19 | x = x - JI(x) @ f(x) 20 | if (i > (N-10)): 21 | print("%4d: (%0.8f, %0.8f)" % (i, x[0,0],x[1,0])) 22 | 23 | -------------------------------------------------------------------------------- /chapter_08/spiral.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mpl_toolkits.mplot3d import Axes3D 3 | import matplotlib.pylab as plt 4 | 5 | t = np.linspace(0,50,1000) 6 | x = t*np.cos(t) 7 | y = t*np.sin(t) 8 | z = t 9 | 10 | fig = plt.figure() 11 | ax = fig.add_subplot(111, projection='3d') 12 | ax.plot(x, y, z, color='k') 13 | ax.set_xlabel("$x$") 14 | ax.set_ylabel("$y$") 15 | ax.set_zlabel("$z$") 16 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 17 | plt.savefig("spiral.png", dpi=300) 18 | plt.show() 19 | 20 | -------------------------------------------------------------------------------- /chapter_09/c09_nn.py: -------------------------------------------------------------------------------- 1 | # 2 | # Simple matrix-vector operations example 3 | # 4 | # RTK, 11-Apr-2020 (Happy bday, Peter!) 5 | # Last update: 11-Apr-2020 6 | # 7 | ################################################################ 8 | 9 | import matplotlib.pylab as plt 10 | import numpy as np 11 | from sklearn.neural_network import MLPClassifier 12 | 13 | # Build the dataset 14 | np.random.seed(8675309) 15 | x0 = np.random.random(50)-0.3 16 | y0 = np.random.random(50)+0.3 17 | x1 = np.random.random(50)+0.3 18 | y1 = np.random.random(50)-0.3 19 | print("x0,y0: %0.6f, %0.6f" % (x0.mean(), y0.mean())) 20 | print("x1,y1: %0.6f, %0.6f" % (x1.mean(), y1.mean())) 21 | print() 22 | x = np.zeros((100,2)) 23 | x[:50,0] = x0; x[:50,1] = y0 24 | x[50:,0] = x1; x[50:,1] = y1 25 | y = np.array([0]*50+[1]*50) 26 | 27 | # Randomize and make train/test split 28 | idx = np.argsort(np.random.random(100)) 29 | x = x[idx] 30 | y = y[idx] 31 | x_train = x[:75] 32 | y_train = y[:75] 33 | x_test = x[75:] 34 | y_test = y[75:] 35 | 36 | # Show the dataset 37 | plt.plot(x0,y0,marker='o',linestyle='none') 38 | plt.plot(x1,y1,marker='s',linestyle='none') 39 | plt.xlabel(r'$x_0$') 40 | plt.ylabel(r'$x_1$') 41 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 42 | plt.savefig("c04_nn.png", dpi=300) 43 | 44 | # Train a simple model 45 | clf = MLPClassifier(hidden_layer_sizes=(5,)) 46 | clf.fit(x_train, y_train) 47 | score = clf.score(x_test, y_test) 48 | prob = clf.predict_proba(x_test) 49 | print("Model accuracy on test set: %0.4f" % score) 50 | W0 = clf.coefs_[0].T 51 | b0 = clf.intercepts_[0].reshape((5,1)) 52 | W1 = clf.coefs_[1].T 53 | b1 = clf.intercepts_[1] 54 | 55 | print("Weights and biases:") 56 | print(W0) 57 | print(b0) 58 | print() 59 | print(W1) 60 | print(b1) 61 | print() 62 | 63 | z = x_test[0].reshape((2,1)) 64 | print("x_test:", z) 65 | print("W0 @ z + b0", W0 @ z + b0) 66 | print("a0 = relu(W0 @ z + b0)", np.maximum(0,W0@z+b0)) 67 | a0 = np.maximum(0,W0@z+b0) 68 | print("a1 = W1@a0 + b1", W1@a0+b1) 69 | a1 = W1@a0+b1 70 | print("sigmoid(a1)", 1.0/(1.0+np.exp(-a1))) 71 | print() 72 | print("prob: ", prob[0][1]) 73 | print("y_test: ", y_test[0]) 74 | print() 75 | 76 | -------------------------------------------------------------------------------- /chapter_09/convolve_example.py: -------------------------------------------------------------------------------- 1 | # 2 | # Illustrate how different NumPy and SciPy convolution 3 | # and correlation routines work. 4 | # 5 | import numpy as np 6 | from scipy.signal import convolve2d 7 | from scipy.datasets import face 8 | from PIL import Image 9 | 10 | # Get Ricky's face 11 | img = face(True) 12 | img = img[:512,(img.shape[1]-612):(img.shape[1]-100)] 13 | 14 | # An asymmetric kernel 15 | k = np.array([[1,0,0],[0,-8,0],[0,0,3]]) 16 | c = convolve2d(img, k, mode='same') 17 | 18 | # Results 19 | print("Original:") 20 | print(img[:8,:8]) 21 | print() 22 | print("Kernel:") 23 | print(k) 24 | print() 25 | print("convolve2d(img,k,mode='same'):") 26 | print(c[1:8,1:8]) 27 | print() 28 | 29 | if (c.min() < 0): 30 | c = c + np.abs(c.min()) 31 | c = (255*(c / c.max())).astype("uint8") 32 | 33 | Image.fromarray(c).save("ricky_convol.png") 34 | Image.fromarray(img).save("ricky_orig.png") 35 | 36 | -------------------------------------------------------------------------------- /chapter_10/NN.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: nn.py 3 | # 4 | # Generic fully connected neural network code using NumPy. 5 | # 6 | # Based on code by Omar Aflak, 7 | # 8 | # https://github.com/OmarAflak/Medium-Python-Neural-Network 9 | # 10 | # used and modified with his permission. 11 | # 12 | # RTK, 03-Feb-2021 13 | # Last update: 06-Feb-2021 14 | # 15 | ################################################################ 16 | 17 | import numpy as np 18 | 19 | # Activation function and derivative 20 | def sigmoid(x): 21 | return 1.0 / (1.0 + np.exp(-x)) 22 | 23 | def sigmoid_prime(x): 24 | return sigmoid(x)*(1.0 - sigmoid(x)) 25 | 26 | # Loss function and derivative 27 | def mse(y_true, y_pred): 28 | return (0.5*(y_true - y_pred)**2).mean() 29 | 30 | def mse_prime(y_true, y_pred): 31 | return y_pred - y_true 32 | 33 | 34 | ################################################################ 35 | # ActivationLayer 36 | # 37 | class ActivationLayer: 38 | def forward(self, input_data): 39 | self.input = input_data 40 | return sigmoid(input_data) 41 | 42 | def backward(self, output_error): 43 | return sigmoid_prime(self.input) * output_error 44 | 45 | def step(self, eta): 46 | return 47 | 48 | 49 | ################################################################ 50 | # FullyConnectedLayer 51 | # 52 | class FullyConnectedLayer: 53 | def __init__(self, input_size, output_size): 54 | # for accumulating error over a minibatch 55 | self.delta_w = np.zeros((input_size, output_size)) 56 | self.delta_b = np.zeros((1,output_size)) 57 | self.passes = 0 58 | 59 | # initialize the weights and biases w/small random values 60 | self.weights = np.random.rand(input_size, output_size) - 0.5 61 | self.bias = np.random.rand(1, output_size) - 0.5 62 | 63 | def forward(self, input_data): 64 | self.input = input_data 65 | return np.dot(self.input, self.weights) + self.bias 66 | 67 | def backward(self, output_error): 68 | input_error = np.dot(output_error, self.weights.T) 69 | weights_error = np.dot(self.input.T, output_error) 70 | 71 | # accumulate the error over the minibatch 72 | self.delta_w += weights_error 73 | self.delta_b += output_error 74 | self.passes += 1 75 | return input_error 76 | 77 | def step(self, eta): 78 | # update the weights and biases by the mean error 79 | # over the minibatch 80 | self.weights -= eta * self.delta_w / self.passes 81 | self.bias -= eta * self.delta_b / self.passes 82 | 83 | # reset for the next minibatch 84 | self.delta_w = np.zeros(self.weights.shape) 85 | self.delta_b = np.zeros(self.bias.shape) 86 | self.passes = 0 87 | 88 | 89 | ################################################################ 90 | # Network 91 | # 92 | class Network: 93 | def __init__(self, verbose=True): 94 | self.verbose = verbose 95 | self.layers = [] 96 | 97 | def add(self, layer): 98 | self.layers.append(layer) 99 | 100 | def predict(self, input_data): 101 | result = [] 102 | for i in range(input_data.shape[0]): 103 | output = input_data[i] 104 | for layer in self.layers: 105 | output = layer.forward(output) 106 | result.append(output) 107 | return result 108 | 109 | def fit(self, x_train, y_train, minibatches, learning_rate, batch_size=64): 110 | for i in range(minibatches): 111 | err = 0 112 | 113 | # select a random minibatch 114 | idx = np.argsort(np.random.random(x_train.shape[0]))[:batch_size] 115 | x_batch = x_train[idx] 116 | y_batch = y_train[idx] 117 | 118 | for j in range(batch_size): 119 | # forward propagation 120 | output = x_batch[j] 121 | for layer in self.layers: 122 | output = layer.forward(output) 123 | 124 | # accumulate loss 125 | err += mse(y_batch[j], output) 126 | 127 | # backward propagation 128 | error = mse_prime(y_batch[j], output) 129 | for layer in reversed(self.layers): 130 | error = layer.backward(error) 131 | 132 | # update weights and biases 133 | for layer in self.layers: 134 | layer.step(learning_rate) 135 | 136 | # report mean loss over minibatch 137 | if (self.verbose) and ((i%10) == 0): 138 | err /= batch_size 139 | print('minibatch %5d/%d error=%0.9f' % (i, minibatches, err)) 140 | 141 | # end NN.py 142 | 143 | -------------------------------------------------------------------------------- /chapter_10/build_dataset.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: build_dataset.py 3 | # 4 | # Build the small MNIST dataset. 5 | # 6 | # RTK, 03-Feb-2021 7 | # Last update: 24-Mar-2024 8 | # 9 | ################################################################ 10 | 11 | import cv2 12 | import numpy as np 13 | from keras.datasets import mnist 14 | from keras.utils import to_categorical 15 | 16 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 17 | ytrn = to_categorical(y_train) 18 | 19 | np.save("dataset/train_images_full.npy", x_train) 20 | np.save("dataset/test_images_full.npy", x_test) 21 | np.save("dataset/train_labels_vector.npy", ytrn) 22 | np.save("dataset/train_labels.npy", y_train) 23 | np.save("dataset/test_labels.npy", y_test) 24 | 25 | # Build 14x14 versions 26 | xtrn = np.zeros((60000,14,14), dtype="float32") 27 | for i in range(60000): 28 | xtrn[i,:,:] = cv2.resize(x_train[i], (14,14), interpolation=cv2.INTER_LINEAR) 29 | xtst = np.zeros((10000,14,14), dtype="float32") 30 | for i in range(10000): 31 | xtst[i,:,:] = cv2.resize(x_test[i], (14,14), interpolation=cv2.INTER_LINEAR) 32 | 33 | np.save("dataset/train_images_small.npy", xtrn) 34 | np.save("dataset/test_images_small.npy", xtst) 35 | 36 | -------------------------------------------------------------------------------- /chapter_10/iris.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: iris.py 3 | # 4 | # Train and test the 2-feature iris dataset 5 | # 6 | # RTK, 06-Feb-2021 7 | # Last update: 06-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | import numpy as np 12 | from NN import * 13 | from sklearn.datasets import load_iris 14 | 15 | def BuildDataset(): 16 | """Create the dataset""" 17 | 18 | # Get the dataset keeping the first two features 19 | iris = load_iris() 20 | x = iris["data"][:,:2] 21 | y = iris["target"] 22 | 23 | # Standardize and keep only classes 0 and 1 24 | x = (x - x.mean(axis=0)) / x.std(axis=0) 25 | i0 = np.where(y == 0)[0] 26 | i1 = np.where(y == 1)[0] 27 | x = np.vstack((x[i0],x[i1])) 28 | 29 | # Train and test data 30 | xtrn = np.vstack((x[:35],x[50:85])) 31 | ytrn = np.array([0]*35 + [1]*35) 32 | xtst = np.vstack((x[35:50],x[85:])) 33 | ytst = np.array([0]*15+[1]*15) 34 | 35 | idx = np.argsort(np.random.random(70)) 36 | xtrn = xtrn[idx] 37 | ytrn = ytrn[idx] 38 | idx = np.argsort(np.random.random(30)) 39 | xtst = xtst[idx] 40 | ytst = ytst[idx] 41 | 42 | y_train = np.zeros((len(ytrn),2)) 43 | for i in range(len(ytrn)): 44 | if (ytrn[i] == 1): 45 | y_train[i,:] = [0,1] 46 | else: 47 | y_train[i,:] = [1,0] 48 | 49 | y_test = np.zeros((len(ytst),2)) 50 | for i in range(len(ytst)): 51 | if (ytst[i] == 1): 52 | y_test[i,:] = [0,1] 53 | else: 54 | y_test[i,:] = [1,0] 55 | 56 | return (xtrn.reshape((xtrn.shape[0],1,2)), y_train, 57 | xtst.reshape((xtst.shape[0],1,2)), y_test) 58 | 59 | 60 | def main(): 61 | """Train a model""" 62 | 63 | x_train, y_train, x_test, y_test = BuildDataset() 64 | 65 | # Build the network using sigmoid activations 66 | net = Network() 67 | net.add(FullyConnectedLayer(2,2)) 68 | net.add(ActivationLayer()) 69 | net.add(FullyConnectedLayer(2,2)) 70 | 71 | # Loss and train 72 | net.fit(x_train, y_train, minibatches=4000, learning_rate=0.1, batch_size=len(y_train)) 73 | 74 | # Build the confusion matrix using the test set predictions 75 | out = net.predict(x_test) 76 | cm = np.zeros((2,2), dtype="uint32") 77 | for i in range(len(y_test)): 78 | cm[np.argmax(y_test[i]),np.argmax(out[i])] += 1 79 | 80 | # Show the results 81 | print() 82 | print(np.array2string(cm)) 83 | print() 84 | print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),)) 85 | print() 86 | 87 | 88 | if (__name__ == "__main__"): 89 | main() 90 | 91 | -------------------------------------------------------------------------------- /chapter_10/mnist.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: mnist.py 3 | # 4 | # Train and test the small 14x14 MNIST dataset. 5 | # 6 | # RTK, 03-Feb-2021 7 | # Last update: 06-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | import numpy as np 12 | from NN import * 13 | 14 | # Load, reshape, and scale the data 15 | x_train = np.load("../dataset/train_images_small.npy") 16 | x_test = np.load("../dataset/test_images_small.npy") 17 | y_train = np.load("../dataset/train_labels_vector.npy") 18 | y_test = np.load("../dataset/test_labels.npy") 19 | 20 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14) 21 | x_train /= 255 22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14) 23 | x_test /= 255 24 | 25 | # Build the network using sigmoid activations 26 | net = Network() 27 | net.add(FullyConnectedLayer(14*14, 100)) 28 | net.add(ActivationLayer()) 29 | net.add(FullyConnectedLayer(100, 50)) 30 | net.add(ActivationLayer()) 31 | net.add(FullyConnectedLayer(50, 10)) 32 | net.add(ActivationLayer()) 33 | 34 | # Loss and train 35 | net.fit(x_train, y_train, minibatches=40000, learning_rate=1.0) 36 | 37 | # Build the confusion matrix using the test set predictions 38 | out = net.predict(x_test) 39 | cm = np.zeros((10,10), dtype="uint32") 40 | for i in range(len(y_test)): 41 | cm[y_test[i],np.argmax(out[i])] += 1 42 | 43 | # Show the results 44 | print() 45 | print(np.array2string(cm)) 46 | print() 47 | print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),)) 48 | print() 49 | 50 | -------------------------------------------------------------------------------- /chapter_10/nn_by_hand.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: nn_by_hand.py 3 | # 4 | # Implement a simple feedforward neural network with 5 | # backprop and gradient descent. 6 | # 7 | # RTK, 02-Feb-2021 8 | # Last update: 02-Feb-2021 9 | # 10 | ################################################################ 11 | 12 | import numpy as np 13 | from sklearn.datasets import load_iris 14 | 15 | def BuildDataset(): 16 | """Create the dataset""" 17 | 18 | # Get the dataset keeping the first two features 19 | iris = load_iris() 20 | x = iris["data"][:,:2] 21 | y = iris["target"] 22 | 23 | # Standardize and keep only classes 0 and 1 24 | x = (x - x.mean(axis=0)) / x.std(axis=0) 25 | i0 = np.where(y == 0)[0] 26 | i1 = np.where(y == 1)[0] 27 | x = np.vstack((x[i0],x[i1])) 28 | 29 | # Train and test data 30 | xtrn = np.vstack((x[:35],x[50:85])) 31 | ytrn = np.array([0]*35 + [1]*35) 32 | xtst = np.vstack((x[35:50],x[85:])) 33 | ytst = np.array([0]*15+[1]*15) 34 | 35 | idx = np.argsort(np.random.random(70)) 36 | xtrn = xtrn[idx] 37 | ytrn = ytrn[idx] 38 | idx = np.argsort(np.random.random(30)) 39 | xtst = xtst[idx] 40 | ytst = ytst[idx] 41 | 42 | return xtrn, ytrn, xtst, ytst 43 | 44 | 45 | ################################################################ 46 | # sigmoid 47 | # 48 | def sigmoid(x): 49 | return 1.0 / (1.0 + np.exp(-x)) 50 | 51 | 52 | ################################################################ 53 | # Forward 54 | # 55 | def Forward(net, x): 56 | """Pass the data through the network""" 57 | 58 | out = np.zeros(x.shape[0]) 59 | 60 | for k in range(x.shape[0]): 61 | z0 = net["w0"]*x[k,0] + net["w2"]*x[k,1] + net["b0"] 62 | a0 = sigmoid(z0) 63 | z1 = net["w1"]*x[k,0] + net["w3"]*x[k,1] + net["b1"] 64 | a1 = sigmoid(z1) 65 | out[k] = net["w4"]*a0 + net["w5"]*a1 + net["b2"] 66 | 67 | return out 68 | 69 | 70 | ################################################################ 71 | # Evaluate 72 | # 73 | def Evaluate(net, x, y): 74 | """Evaluate the network""" 75 | 76 | out = Forward(net, x) 77 | tn = fp = fn = tp = 0 78 | pred = [] 79 | 80 | for i in range(len(y)): 81 | c = 0 if (out[i] < 0.5) else 1 82 | pred.append(c) 83 | if (c == 0) and (y[i] == 0): 84 | tn += 1 85 | elif (c == 0) and (y[i] == 1): 86 | fn += 1 87 | elif (c == 1) and (y[i] == 0): 88 | fp += 1 89 | else: 90 | tp += 1 91 | 92 | return tn,fp,fn,tp,pred 93 | 94 | 95 | 96 | ################################################################ 97 | # GradientDescent 98 | # 99 | def GradientDescent(net, x, y, epochs, eta): 100 | """Perform gradient descent""" 101 | 102 | for e in range(epochs): 103 | # Pass over training set accumulating deltas 104 | dw0 = dw1 = dw2 = dw3 = dw4 = dw5 = db0 = db1 = db2 = 0.0 105 | 106 | for k in range(len(y)): 107 | # Forward pass 108 | z0 = net["w0"]*x[k,0] + net["w2"]*x[k,1] + net["b0"] 109 | a0 = sigmoid(z0) 110 | z1 = net["w1"]*x[k,0] + net["w3"]*x[k,1] + net["b1"] 111 | a1 = sigmoid(z1) 112 | a2 = net["w4"]*a0 + net["w5"]*a1 + net["b2"] 113 | 114 | # Backward pass 115 | db2 += a2 - y[k] 116 | dw4 += (a2 - y[k]) * a0 117 | dw5 += (a2 - y[k]) * a1 118 | db1 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1) 119 | dw1 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1) * x[k,0] 120 | dw3 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1) * x[k,1] 121 | db0 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0) 122 | dw0 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0) * x[k,0] 123 | dw2 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0) * x[k,1] 124 | 125 | # Use average deltas to update the network 126 | m = len(y) 127 | net["b2"] = net["b2"] - eta * db2 / m 128 | net["w4"] = net["w4"] - eta * dw4 / m 129 | net["w5"] = net["w5"] - eta * dw5 / m 130 | net["b1"] = net["b1"] - eta * db1 / m 131 | net["w1"] = net["w1"] - eta * dw1 / m 132 | net["w3"] = net["w3"] - eta * dw3 / m 133 | net["b0"] = net["b0"] - eta * db0 / m 134 | net["w0"] = net["w0"] - eta * dw0 / m 135 | net["w2"] = net["w2"] - eta * dw2 / m 136 | 137 | # Training done, return the updated network 138 | return net 139 | 140 | 141 | ################################################################ 142 | # main 143 | # 144 | def main(): 145 | """Build and train a simple neural network""" 146 | 147 | epochs = 1000 # training epochs 148 | eta = 0.1 # learning rate 149 | 150 | # Get the train/test data 151 | xtrn, ytrn, xtst, ytst = BuildDataset() 152 | 153 | # Initialize the network 154 | net = {} 155 | net["b2"] = 0.0 156 | net["b1"] = 0.0 157 | net["b0"] = 0.0 158 | net["w5"] = 0.0001*(np.random.random() - 0.5) 159 | net["w4"] = 0.0001*(np.random.random() - 0.5) 160 | net["w3"] = 0.0001*(np.random.random() - 0.5) 161 | net["w2"] = 0.0001*(np.random.random() - 0.5) 162 | net["w1"] = 0.0001*(np.random.random() - 0.5) 163 | net["w0"] = 0.0001*(np.random.random() - 0.5) 164 | 165 | # Do a forward pass to get initial performance 166 | tn0,fp0,fn0,tp0,pred0 = Evaluate(net, xtst, ytst) 167 | 168 | # Gradient descent 169 | net = GradientDescent(net, xtrn, ytrn, epochs, eta) 170 | 171 | # Final model performance 172 | tn,fp,fn,tp,pred = Evaluate(net, xtst, ytst) 173 | 174 | # Summarize performance 175 | print() 176 | print("Training for %d epochs, learning rate %0.5f" % (epochs, eta)) 177 | print() 178 | print("Before training:") 179 | print(" TN:%3d FP:%3d" % (tn0, fp0)) 180 | print(" FN:%3d TP:%3d" % (fn0, tp0)) 181 | print() 182 | print("After training:") 183 | print(" TN:%3d FP:%3d" % (tn, fp)) 184 | print(" FN:%3d TP:%3d" % (fn, tp)) 185 | print() 186 | 187 | 188 | if (__name__ == "__main__"): 189 | main() 190 | 191 | 192 | -------------------------------------------------------------------------------- /chapter_11/NN.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: nn.py 3 | # 4 | # Generic fully connected neural network code using NumPy. 5 | # 6 | # Based on code by Omar Aflak, 7 | # 8 | # https://github.com/OmarAflak/Medium-Python-Neural-Network 9 | # 10 | # used and modified with his permission. 11 | # 12 | # RTK, 03-Feb-2021 13 | # Last update: 06-Feb-2021 14 | # 15 | ################################################################ 16 | 17 | import numpy as np 18 | 19 | # Activation function and derivative 20 | def sigmoid(x): 21 | return 1.0 / (1.0 + np.exp(-x)) 22 | 23 | def sigmoid_prime(x): 24 | return sigmoid(x)*(1.0 - sigmoid(x)) 25 | 26 | # Loss function and derivative 27 | def mse(y_true, y_pred): 28 | return (0.5*(y_true - y_pred)**2).mean() 29 | 30 | def mse_prime(y_true, y_pred): 31 | return y_pred - y_true 32 | 33 | 34 | ################################################################ 35 | # ActivationLayer 36 | # 37 | class ActivationLayer: 38 | def forward(self, input_data): 39 | self.input = input_data 40 | return sigmoid(input_data) 41 | 42 | def backward(self, output_error): 43 | return sigmoid_prime(self.input) * output_error 44 | 45 | def step(self, eta): 46 | return 47 | 48 | 49 | ################################################################ 50 | # FullyConnectedLayer 51 | # 52 | class FullyConnectedLayer: 53 | def __init__(self, input_size, output_size): 54 | # for accumulating error over a minibatch 55 | self.delta_w = np.zeros((input_size, output_size)) 56 | self.delta_b = np.zeros((1,output_size)) 57 | self.passes = 0 58 | 59 | # initialize the weights and biases w/small random values 60 | self.weights = np.random.rand(input_size, output_size) - 0.5 61 | self.bias = np.random.rand(1, output_size) - 0.5 62 | 63 | def forward(self, input_data): 64 | self.input = input_data 65 | return np.dot(self.input, self.weights) + self.bias 66 | 67 | def backward(self, output_error): 68 | input_error = np.dot(output_error, self.weights.T) 69 | weights_error = np.dot(self.input.T, output_error) 70 | 71 | # accumulate the error over the minibatch 72 | self.delta_w += np.dot(self.input.T, output_error) 73 | self.delta_b += output_error 74 | self.passes += 1 75 | return input_error 76 | 77 | def step(self, eta): 78 | # update the weights and biases by the mean error 79 | # over the minibatch 80 | self.weights -= eta * self.delta_w / self.passes 81 | self.bias -= eta * self.delta_b / self.passes 82 | 83 | # reset for the next minibatch 84 | self.delta_w = np.zeros(self.weights.shape) 85 | self.delta_b = np.zeros(self.bias.shape) 86 | self.passes = 0 87 | 88 | 89 | ################################################################ 90 | # Network 91 | # 92 | class Network: 93 | def __init__(self, verbose=True): 94 | self.verbose = verbose 95 | self.layers = [] 96 | 97 | def add(self, layer): 98 | self.layers.append(layer) 99 | 100 | def predict(self, input_data): 101 | result = [] 102 | for i in range(input_data.shape[0]): 103 | output = input_data[i] 104 | for layer in self.layers: 105 | output = layer.forward(output) 106 | result.append(output) 107 | return result 108 | 109 | def fit(self, x_train, y_train, minibatches, learning_rate, batch_size=64): 110 | for i in range(minibatches): 111 | err = 0 112 | 113 | # select a random minibatch 114 | idx = np.argsort(np.random.random(x_train.shape[0]))[:batch_size] 115 | x_batch = x_train[idx] 116 | y_batch = y_train[idx] 117 | 118 | for j in range(batch_size): 119 | # forward propagation 120 | output = x_batch[j] 121 | for layer in self.layers: 122 | output = layer.forward(output) 123 | 124 | # accumulate loss 125 | err += mse(y_batch[j], output) 126 | 127 | # backward propagation 128 | error = mse_prime(y_batch[j], output) 129 | for layer in reversed(self.layers): 130 | error = layer.backward(error) 131 | 132 | # update weights and biases 133 | for layer in self.layers: 134 | layer.step(learning_rate) 135 | 136 | # report mean loss over minibatch 137 | if (self.verbose) and ((i%10) == 0): 138 | err /= batch_size 139 | print('minibatch %5d/%d error=%0.9f' % (i, minibatches, err)) 140 | 141 | # end NN.py 142 | 143 | -------------------------------------------------------------------------------- /chapter_11/NNm.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: NNm.py (w/momentum) 3 | # 4 | # Generic fully connected neural network code using NumPy. 5 | # 6 | # Based on code by Omar Aflak, 7 | # 8 | # https://github.com/OmarAflak/Medium-Python-Neural-Network 9 | # 10 | # used and modified with his permission. 11 | # 12 | # RTK, 03-Feb-2021 13 | # Last update: 18-Feb-2021 14 | # 15 | ################################################################ 16 | 17 | import numpy as np 18 | 19 | # Activation function and derivative 20 | def sigmoid(x): 21 | return 1.0 / (1.0 + np.exp(-x)) 22 | 23 | def sigmoid_prime(x): 24 | return sigmoid(x)*(1.0 - sigmoid(x)) 25 | 26 | # Loss function and derivative 27 | def mse(y_true, y_pred): 28 | return (0.5*(y_true - y_pred)**2).mean() 29 | 30 | def mse_prime(y_true, y_pred): 31 | return y_pred - y_true 32 | 33 | 34 | ################################################################ 35 | # ActivationLayer 36 | # 37 | class ActivationLayer: 38 | def forward(self, input_data): 39 | self.input = input_data 40 | return sigmoid(input_data) 41 | 42 | def backward(self, output_error): 43 | return sigmoid_prime(self.input) * output_error 44 | 45 | def step(self, eta): 46 | return 47 | 48 | 49 | ################################################################ 50 | # FullyConnectedLayer 51 | # 52 | class FullyConnectedLayer: 53 | def __init__(self, input_size, output_size, momentum=0.0): 54 | # for accumulating error over a minibatch 55 | self.delta_w = np.zeros((input_size, output_size)) 56 | self.delta_b = np.zeros((1,output_size)) 57 | self.passes = 0 58 | 59 | # initialize the weights and biases w/small random values 60 | self.weights = np.random.rand(input_size, output_size) - 0.5 61 | self.bias = np.random.rand(1, output_size) - 0.5 62 | 63 | # initial velocities 64 | self.vw = np.zeros((input_size, output_size)) 65 | self.vb = np.zeros((1, output_size)) 66 | self.momentum = momentum 67 | 68 | def forward(self, input_data): 69 | self.input = input_data 70 | return np.dot(self.input, self.weights) + self.bias 71 | 72 | def backward(self, output_error): 73 | input_error = np.dot(output_error, self.weights.T) 74 | weights_error = np.dot(self.input.T, output_error) 75 | 76 | # accumulate the error over the minibatch 77 | self.delta_w += np.dot(self.input.T, output_error) 78 | self.delta_b += output_error 79 | self.passes += 1 80 | return input_error 81 | 82 | def step(self, eta): 83 | # update the weights and biases by the mean error 84 | # over the minibatch 85 | self.vw = self.momentum * self.vw - eta * self.delta_w / self.passes 86 | self.vb = self.momentum * self.vb - eta * self.delta_b / self.passes 87 | self.weights = self.weights + self.vw 88 | self.bias = self.bias + self.vb 89 | 90 | # reset for the next minibatch 91 | self.delta_w = np.zeros(self.weights.shape) 92 | self.delta_b = np.zeros(self.bias.shape) 93 | self.passes = 0 94 | 95 | 96 | ################################################################ 97 | # Network 98 | # 99 | class Network: 100 | def __init__(self, verbose=True): 101 | self.verbose = verbose 102 | self.layers = [] 103 | 104 | def add(self, layer): 105 | self.layers.append(layer) 106 | 107 | def predict(self, input_data): 108 | result = [] 109 | for i in range(input_data.shape[0]): 110 | output = input_data[i] 111 | for layer in self.layers: 112 | output = layer.forward(output) 113 | result.append(output) 114 | return result 115 | 116 | def fit(self, x_train, y_train, minibatches, learning_rate, batch_size=64): 117 | for i in range(minibatches): 118 | err = 0 119 | 120 | # select a random minibatch 121 | idx = np.argsort(np.random.random(x_train.shape[0]))[:batch_size] 122 | x_batch = x_train[idx] 123 | y_batch = y_train[idx] 124 | 125 | for j in range(batch_size): 126 | # forward propagation 127 | output = x_batch[j] 128 | for layer in self.layers: 129 | output = layer.forward(output) 130 | 131 | # accumulate loss 132 | err += mse(y_batch[j], output) 133 | 134 | # backward propagation 135 | error = mse_prime(y_batch[j], output) 136 | for layer in reversed(self.layers): 137 | error = layer.backward(error) 138 | 139 | # update weights and biases 140 | for layer in self.layers: 141 | layer.step(learning_rate) 142 | 143 | # report mean loss over minibatch 144 | if (self.verbose) and ((i%10) == 0): 145 | err /= batch_size 146 | print('minibatch %5d/%d error=%0.9f' % (i, minibatches, err)) 147 | 148 | # end NNm.py 149 | 150 | -------------------------------------------------------------------------------- /chapter_11/fmnist.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: fmnist.py 3 | # 4 | # Train and test the small 14x14 FMNIST dataset. 5 | # 6 | # RTK, 03-Feb-2021 7 | # Last update: 19-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | from sklearn.metrics import matthews_corrcoef 12 | import numpy as np 13 | from NN import * 14 | 15 | # Load, reshape, and scale the data 16 | x_train = np.load("../dataset/fmnist_train_images_small.npy")/255 17 | x_test = np.load("../dataset/fmnist_test_images_small.npy")/255 18 | y_train = np.load("../dataset/fmnist_train_labels_vector.npy") 19 | y_test = np.load("../dataset/fmnist_test_labels.npy") 20 | 21 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14) 22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14) 23 | 24 | # Build the network using sigmoid activations 25 | net = Network(verbose=True) 26 | net.add(FullyConnectedLayer(14*14, 100)) 27 | net.add(ActivationLayer()) 28 | net.add(FullyConnectedLayer(100, 50)) 29 | net.add(ActivationLayer()) 30 | net.add(FullyConnectedLayer(50, 10)) 31 | net.add(ActivationLayer()) 32 | 33 | # Loss and train 34 | net.fit(x_train, y_train, minibatches=40000, learning_rate=1.0) 35 | 36 | # Build the confusion matrix using the test set predictions 37 | out = net.predict(x_test) 38 | pred = np.array(out)[:,0,:] 39 | cm = np.zeros((10,10), dtype="uint32") 40 | for i in range(len(y_test)): 41 | cm[y_test[i],np.argmax(out[i])] += 1 42 | 43 | # Show the results 44 | print() 45 | print(np.array2string(cm)) 46 | print() 47 | print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),)) 48 | print("MCC = %0.7f" % matthews_corrcoef(y_test, np.argmax(pred, axis=1))) 49 | print() 50 | 51 | -------------------------------------------------------------------------------- /chapter_11/fmnist_analyze.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pylab as plt 3 | from scipy.stats import ttest_ind, mannwhitneyu 4 | 5 | def Cohen_d(a,b): 6 | s1 = np.std(a, ddof=1)**2 7 | s2 = np.std(b, ddof=1)**2 8 | return (a.mean() - b.mean()) / np.sqrt(0.5*(s1+s2)) 9 | 10 | # Load the MCC for repeated trainings 11 | m_no = np.load("fmnist_no_momentum_runs.npy") 12 | m_w = np.load("fmnist_w_momentum_runs.npy") 13 | 14 | hn,xn = np.histogram(m_no, bins=5) 15 | hw,xw = np.histogram(m_w, bins=5) 16 | b = plt.bar(xn[:-1], hn, width=0.8*(xn[1]-xn[0]), hatch="/", color="#5f5f5f") 17 | b = plt.bar(xw[:-1], hw, width=0.8*(xn[1]-xn[0]), hatch="\\", color="#7f7f7f") 18 | plt.xlabel("MCC") 19 | plt.ylabel("Count") 20 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 21 | plt.savefig("fmnist_mcc_plot.png", dpi=300) 22 | plt.show() 23 | 24 | print() 25 | print("no momentum: %0.5f +/- %0.5f" % (m_no.mean(), m_no.std(ddof=1)/np.sqrt(len(m_no)))) 26 | print("momentum : %0.5f +/- %0.5f" % (m_w.mean(), m_w.std(ddof=1)/np.sqrt(len(m_w)))) 27 | print() 28 | t,p = ttest_ind(m_w, m_no) 29 | print("t-test momentum vs no (t,p): (%0.8f, %0.8f)" % (t,p)) 30 | U,p = mannwhitneyu(m_w, m_no) 31 | print("Mann-Whitney U : (%0.8f, %0.8f)" % (U,p)) 32 | print("Cohen's d : %0.5f" % Cohen_d(m_w, m_no)) 33 | print() 34 | 35 | -------------------------------------------------------------------------------- /chapter_11/fmnist_momentum.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: fmnist_momentum.py 3 | # 4 | # Train and test the small 14x14 FMNIST dataset. 5 | # 6 | # RTK, 03-Feb-2021 7 | # Last update: 19-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | from sklearn.metrics import matthews_corrcoef 12 | import numpy as np 13 | from NNm import * 14 | 15 | # Load, reshape, and scale the data 16 | x_train = np.load("../dataset/fmnist_train_images_small.npy")/255 17 | x_test = np.load("../dataset/fmnist_test_images_small.npy")/255 18 | y_train = np.load("../dataset/fmnist_train_labels_vector.npy") 19 | y_test = np.load("../dataset/fmnist_test_labels.npy") 20 | 21 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14) 22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14) 23 | 24 | # Build the network using sigmoid activations 25 | net = Network(verbose=True) 26 | net.add(FullyConnectedLayer(14*14, 100, momentum=0.9)) 27 | net.add(ActivationLayer()) 28 | net.add(FullyConnectedLayer(100, 50, momentum=0.9)) 29 | net.add(ActivationLayer()) 30 | net.add(FullyConnectedLayer(50, 10, momentum=0.9)) 31 | net.add(ActivationLayer()) 32 | 33 | # Loss and train 34 | net.fit(x_train, y_train, minibatches=40000, learning_rate=0.2) 35 | 36 | # Build the confusion matrix using the test set predictions 37 | out = net.predict(x_test) 38 | pred = np.array(out)[:,0,:] 39 | cm = np.zeros((10,10), dtype="uint32") 40 | for i in range(len(y_test)): 41 | cm[y_test[i],np.argmax(out[i])] += 1 42 | 43 | # Show the results 44 | print() 45 | print(np.array2string(cm)) 46 | print() 47 | print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),)) 48 | print("MCC = %0.7f" % matthews_corrcoef(y_test, np.argmax(pred, axis=1))) 49 | print() 50 | 51 | -------------------------------------------------------------------------------- /chapter_11/fmnist_no_momentum_runs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/chapter_11/fmnist_no_momentum_runs.npy -------------------------------------------------------------------------------- /chapter_11/fmnist_repeat.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: fmnist_repeat.py 3 | # 4 | # Train and test the small 14x14 FMNIST dataset. 5 | # 6 | # RTK, 03-Feb-2021 7 | # Last update: 21-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | from sklearn.metrics import matthews_corrcoef 12 | import numpy as np 13 | from NNm import * 14 | 15 | # Load, reshape, and scale the data 16 | x_train = np.load("../dataset/fmnist_train_images_small.npy")/255 17 | x_test = np.load("../dataset/fmnist_test_images_small.npy")/255 18 | y_train = np.load("../dataset/fmnist_train_labels_vector.npy") 19 | y_test = np.load("../dataset/fmnist_test_labels.npy") 20 | 21 | x_train = x_train.reshape(x_train.shape[0], 1, 14*14) 22 | x_test = x_test.reshape(x_test.shape[0], 1, 14*14) 23 | 24 | def train_test(x_train, x_test, y_train, y_test): 25 | # Build the network using sigmoid activations 26 | net = Network(verbose=False) 27 | net.add(FullyConnectedLayer(14*14, 100, momentum=0.9)) 28 | net.add(ActivationLayer()) 29 | net.add(FullyConnectedLayer(100, 50, momentum=0.9)) 30 | net.add(ActivationLayer()) 31 | net.add(FullyConnectedLayer(50, 10, momentum=0.9)) 32 | net.add(ActivationLayer()) 33 | 34 | # Loss and train 35 | net.fit(x_train, y_train, minibatches=10000, learning_rate=0.2) 36 | 37 | out = net.predict(x_test) 38 | pred = np.array(out)[:,0,:] 39 | return matthews_corrcoef(y_test, np.argmax(pred, axis=1)) 40 | 41 | 42 | M = 100 43 | mcc = np.zeros(M) 44 | 45 | for i in range(M): 46 | mcc[i] = train_test(x_train, x_test, y_train, y_test) 47 | print("%03d: MCC = %0.8f" % (i, mcc[i]), flush=True) 48 | 49 | np.save("fmnist_repeat_mcc.npy", mcc) 50 | 51 | print() 52 | print("Overall MCC %0.6f +/- %0.6f" % (mcc.mean(), mcc.std(ddof=1)/np.sqrt(M))) 53 | print() 54 | 55 | -------------------------------------------------------------------------------- /chapter_11/fmnist_w_momentum_runs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/chapter_11/fmnist_w_momentum_runs.npy -------------------------------------------------------------------------------- /chapter_11/gd_1d.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: gd_1d.py 3 | # 4 | # 1D example of GD 5 | # 6 | # RTK, 14-Feb-2021 7 | # Last update: 14-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | import sys 12 | import os 13 | import numpy as np 14 | import matplotlib.pylab as plt 15 | 16 | # The function and its derivative 17 | def f(x): 18 | return 6*x**2 - 12*x + 3 19 | 20 | def d(x): 21 | return 12*x - 12 22 | 23 | 24 | # Show the function, derivative, and minimum 25 | x = np.linspace(-1,3,1000) 26 | y = f(x) 27 | plt.plot(x,y,color='#1f77b4') 28 | x = np.linspace(0,3,10) 29 | z = d(x) 30 | plt.plot(x,z,color='#ff7f0e') 31 | plt.plot([-1,3],[0,0],linestyle=(0,(1,1)),color='k') 32 | plt.plot([1,1],[-10,25],linestyle=(0,(1,1)),color='k') 33 | plt.plot([1,1],[f(1),f(1)],marker='o',color='#1f77b4') 34 | plt.xlabel("$x$") 35 | plt.ylabel("$y$") 36 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 37 | plt.savefig("gd_1d_plot.png", dpi=300) 38 | #plt.show() 39 | plt.close() 40 | 41 | # Show a series of gradient descent steps 42 | x = np.linspace(-1,3,1000) 43 | plt.plot(x,f(x)) 44 | 45 | x = -0.9 46 | eta = 0.03 47 | for i in range(15): 48 | plt.plot(x, f(x), marker='o', color='r') 49 | x = x - eta * d(x) 50 | 51 | plt.xlabel("$x$") 52 | plt.ylabel("$y$") 53 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 54 | plt.savefig("gd_1d_steps.png", dpi=300) 55 | #plt.show() 56 | plt.close() 57 | print("Minimum at (%0.6f, %0.6f)" % (x, f(x))) 58 | 59 | # Show oscillation if step size too large 60 | x = np.linspace(0.75,1.25,1000) 61 | plt.plot(x,f(x)) 62 | x = xold = 0.75 63 | for i in range(14): 64 | plt.plot([xold,x], [f(xold),f(x)], marker='o', linestyle='dotted', color='r') 65 | xold = x 66 | x = x - 0.15 * d(x) 67 | 68 | plt.xlabel("$x$") 69 | plt.ylabel("$y$") 70 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 71 | plt.savefig("gd_1d_oscillating.png", dpi=300) 72 | #plt.show() 73 | 74 | -------------------------------------------------------------------------------- /chapter_11/gd_1d_momentum.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: gd_1d_momentum.py 3 | # 4 | # 1D example of GD 5 | # 6 | # RTK, 14-Feb-2021 7 | # Last update: 14-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | import sys 12 | import os 13 | import numpy as np 14 | import matplotlib.pylab as plt 15 | 16 | # The function and its derivative 17 | def f(x): 18 | return 6*x**2 - 12*x + 3 19 | 20 | def d(x): 21 | return 12*x - 12 22 | 23 | m = ['o','s','>','<','*','+','p','h','P','D'] 24 | x = np.linspace(0.75,1.25,1000) 25 | plt.plot(x,f(x)) 26 | x = xold = 0.75 27 | eta = 0.09 28 | mu = 0.8 29 | v = 0.0 30 | for i in range(10): 31 | plt.plot([xold,x], [f(xold),f(x)], marker=m[i], linestyle='dotted', color='r') 32 | xold = x 33 | v = mu*v - eta * d(x) 34 | x = x + v 35 | for i in range(40): 36 | v = mu*v - eta * d(x) 37 | x = x + v 38 | plt.plot(x,f(x),marker='X', color='k') 39 | 40 | plt.xlabel("$x$") 41 | plt.ylabel("$y$") 42 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 43 | plt.savefig("gd_1d_momentum.png", dpi=300) 44 | plt.show() 45 | 46 | -------------------------------------------------------------------------------- /chapter_11/gd_2d.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: gd_2d.py 3 | # 4 | # 2D example of gradient descent 5 | # 6 | # RTK, 14-Feb-2021 7 | # Last update: 14-Feb-2021 8 | # 9 | ################################################################ 10 | 11 | import numpy as np 12 | import matplotlib.pylab as plt 13 | 14 | # Function and partial derivatives 15 | def f(x,y): 16 | return 6*x**2 + 9*y**2 - 12*x - 14*y + 3 17 | 18 | def dx(x): 19 | return 12*x - 12 20 | 21 | def dy(y): 22 | return 18*y - 14 23 | 24 | # Gradient descent steps 25 | N = 100 26 | x,y = np.meshgrid(np.linspace(-1,3,N), np.linspace(-1,3,N)) 27 | z = f(x,y) 28 | plt.contourf(x,y,z,10, cmap="Greys") 29 | plt.contour(x,y,z,10, colors='k', linewidths=1) 30 | plt.plot([0,0],[-1,3],color='k',linewidth=1) 31 | plt.plot([-1,3],[0,0],color='k',linewidth=1) 32 | plt.plot(1,0.7777778,color='k',marker='+') 33 | 34 | x = xold = -0.5 35 | y = yold = 2.9 36 | for i in range(12): 37 | plt.plot([xold,x],[yold,y], marker='o', linestyle='dotted', color='k') 38 | xold = x 39 | yold = y 40 | x = x - 0.02 * dx(x) 41 | y = y - 0.02 * dy(y) 42 | 43 | x = xold = 1.5 44 | y = yold = -0.8 45 | for i in range(12): 46 | plt.plot([xold,x],[yold,y], marker='s', linestyle='dotted', color='k') 47 | xold = x 48 | yold = y 49 | x = x - 0.02 * dx(x) 50 | y = y - 0.02 * dy(y) 51 | 52 | x = xold = 2.7 53 | y = yold = 2.3 54 | for i in range(12): 55 | plt.plot([xold,x],[yold,y], marker='<', linestyle='dotted', color='k') 56 | xold = x 57 | yold = y 58 | x = x - 0.02 * dx(x) 59 | y = y - 0.02 * dy(y) 60 | 61 | plt.xlabel("$x$") 62 | plt.ylabel("$y$") 63 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 64 | plt.savefig("gd_2d_steps.png", dpi=300) 65 | plt.show() 66 | plt.close() 67 | 68 | # New function and partial derivatives 69 | def f(x,y): 70 | return 6*x**2 + 40*y**2 - 12*x - 30*y + 3 71 | 72 | def dx(x): 73 | return 12*x - 12 74 | 75 | def dy(y): 76 | return 80*y - 30 77 | 78 | # Large stepsize 79 | N = 100 80 | x,y = np.meshgrid(np.linspace(-1,3,N), np.linspace(-1,3,N)) 81 | z = f(x,y) 82 | plt.contourf(x,y,z,10, cmap="Greys") 83 | plt.contour(x,y,z,10, colors='k', linewidths=1) 84 | plt.plot([0,0],[-1,3],color='k',linewidth=1) 85 | plt.plot([-1,3],[0,0],color='k',linewidth=1) 86 | plt.plot(1,0.375,color='k',marker='+') 87 | 88 | x = xold = -0.5 89 | y = yold = 2.3 90 | for i in range(14): 91 | plt.plot([xold,x],[yold,y], marker='o', linestyle='dotted', color='k') 92 | xold = x 93 | yold = y 94 | x = x - 0.02 * dx(x) 95 | y = y - 0.02 * dy(y) 96 | 97 | x = xold = 2.3 98 | y = yold = 2.3 99 | for i in range(14): 100 | plt.plot([xold,x],[yold,y], marker='s', linestyle='dotted', color='k') 101 | xold = x 102 | yold = y 103 | x = x - 0.01 * dx(x) 104 | y = y - 0.01 * dy(y) 105 | 106 | plt.xlabel("$x$") 107 | plt.ylabel("$y$") 108 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 109 | plt.savefig("gd_2d_oscillating.png", dpi=300) 110 | plt.show() 111 | plt.close() 112 | 113 | -------------------------------------------------------------------------------- /chapter_11/gd_momentum.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: gd_momentum.py 3 | # 4 | # 2D example of gradient descent for a function 5 | # with more than one minimum with momentum 6 | # 7 | # RTK, 14-Feb-2021 8 | # Last update: 21-Feb-2021 9 | # 10 | ################################################################ 11 | 12 | import numpy as np 13 | import matplotlib.pylab as plt 14 | 15 | # Function and partial derivatives 16 | def f(x,y): 17 | return -2*np.exp(-0.5*((x+1)**2+(y-1)**2)) + \ 18 | -np.exp(-0.5*((x-1)**2+(y+1)**2)) 19 | 20 | def dx(x,y): 21 | return 2*(x+1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) + \ 22 | (x-1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) 23 | 24 | def dy(x,y): 25 | return (y+1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) + \ 26 | 2*(y-1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) 27 | 28 | # Gradient descent steps 29 | N = 100 30 | x,y = np.meshgrid(np.linspace(-2,2,N), np.linspace(-2,2,N)) 31 | z = f(x,y) 32 | plt.contourf(x,y,z,10, cmap="Greys") 33 | plt.contour(x,y,z,10, colors='k', linewidths=1) 34 | plt.plot([0,0],[-2,2],color='k',linewidth=1) 35 | plt.plot([-2,2],[0,0],color='k',linewidth=1) 36 | 37 | def gd(x,y, eta,mu, steps, marker): 38 | xold = x 39 | yold = y 40 | vx = vy = 0.0 41 | for i in range(steps): 42 | plt.plot([xold,x],[yold,y], marker=marker, linestyle='dotted', color='k') 43 | xold = x 44 | yold = y 45 | vx = mu*vx - eta * dx(x,y) 46 | vy = mu*vy - eta * dy(x,y) 47 | x = x + vx 48 | y = y + vy 49 | 50 | return x,y 51 | 52 | 53 | #gd(-1.5, 1.2,20, 'o') 54 | #gd( 1.5,-1.8,40, 's') 55 | #gd( 0.0, 0.0,30, '<') 56 | print("(x,y) = (%0.8f, %0.8f)" % gd( 0.7,-0.2, 0.1, 0.9, 25, '>')) 57 | print("(x,y) = (%0.8f, %0.8f)" % gd( 1.5, 1.5, 0.02, 0.9, 90, '*')) 58 | 59 | plt.xlabel("$x$") 60 | plt.ylabel("$y$") 61 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 62 | plt.savefig("gd_momentum_steps.png", dpi=300) 63 | plt.show() 64 | plt.close() 65 | 66 | -------------------------------------------------------------------------------- /chapter_11/gd_multiple.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: gd_multiple.py 3 | # 4 | # 2D example of gradient descent for a function 5 | # with more than one minimum 6 | # 7 | # RTK, 14-Feb-2021 8 | # Last update: 14-Feb-2021 9 | # 10 | ################################################################ 11 | 12 | import numpy as np 13 | import matplotlib.pylab as plt 14 | 15 | # Function and partial derivatives 16 | def f(x,y): 17 | return -2*np.exp(-0.5*((x+1)**2+(y-1)**2)) + \ 18 | -np.exp(-0.5*((x-1)**2+(y+1)**2)) 19 | 20 | def dx(x,y): 21 | return 2*(x+1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) + \ 22 | (x-1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) 23 | 24 | def dy(x,y): 25 | return (y+1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) + \ 26 | 2*(y-1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) 27 | 28 | # Gradient descent steps 29 | N = 100 30 | x,y = np.meshgrid(np.linspace(-2,2,N), np.linspace(-2,2,N)) 31 | z = f(x,y) 32 | plt.contourf(x,y,z,10, cmap="Greys") 33 | plt.contour(x,y,z,10, colors='k', linewidths=1) 34 | plt.plot([0,0],[-2,2],color='k',linewidth=1) 35 | plt.plot([-2,2],[0,0],color='k',linewidth=1) 36 | 37 | eta = 0.4 38 | 39 | x = xold = -1.5 40 | y = yold = 1.2 41 | for i in range(9): 42 | plt.plot([xold,x],[yold,y], marker='o', linestyle='dotted', color='k') 43 | xold = x 44 | yold = y 45 | x = x - eta * dx(x,y) 46 | y = y - eta * dy(x,y) 47 | 48 | x = xold = 1.5 49 | y = yold = -1.8 50 | for i in range(9): 51 | plt.plot([xold,x],[yold,y], marker='s', linestyle='dotted', color='k') 52 | xold = x 53 | yold = y 54 | x = x - eta * dx(x,y) 55 | y = y - eta * dy(x,y) 56 | 57 | x = xold = 0.0 58 | y = yold = 0.0 59 | for i in range(20): 60 | plt.plot([xold,x],[yold,y], marker='+', linestyle='dotted', color='k') 61 | xold = x 62 | yold = y 63 | x = x - eta * dx(x,y) 64 | y = y - eta * dy(x,y) 65 | 66 | x = xold = 0.7 67 | y = yold = -0.2 68 | for i in range(20): 69 | plt.plot([xold,x],[yold,y], marker='>', linestyle='dotted', color='k') 70 | xold = x 71 | yold = y 72 | x = x - eta * dx(x,y) 73 | y = y - eta * dy(x,y) 74 | 75 | x = xold = 1.5 76 | y = yold = 1.5 77 | for i in range(30): 78 | plt.plot([xold,x],[yold,y], marker='*', linestyle='dotted', color='k') 79 | xold = x 80 | yold = y 81 | x = x - eta * dx(x,y) 82 | y = y - eta * dy(x,y) 83 | 84 | plt.xlabel("$x$") 85 | plt.ylabel("$y$") 86 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 87 | plt.savefig("gd_multiple_steps.png", dpi=300) 88 | plt.show() 89 | plt.close() 90 | 91 | -------------------------------------------------------------------------------- /chapter_11/gd_nesterov.py: -------------------------------------------------------------------------------- 1 | # 2 | # file: gd_nesterov.py 3 | # 4 | # 2D example of gradient descent for a function 5 | # with more than one minimum and Nesterov momentum 6 | # 7 | # RTK, 14-Feb-2021 8 | # Last update: 21-Feb-2021 9 | # 10 | ################################################################ 11 | 12 | import numpy as np 13 | import matplotlib.pylab as plt 14 | 15 | # Function and partial derivatives 16 | def f(x,y): 17 | return -2*np.exp(-0.5*((x+1)**2+(y-1)**2)) + \ 18 | -np.exp(-0.5*((x-1)**2+(y+1)**2)) 19 | 20 | def dx(x,y): 21 | return 2*(x+1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) + \ 22 | (x-1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) 23 | 24 | def dy(x,y): 25 | return (y+1)*np.exp(-0.5*((x-1)**2+(y+1)**2)) + \ 26 | 2*(y-1)*np.exp(-0.5*((x+1)**2+(y-1)**2)) 27 | 28 | # Gradient descent steps 29 | N = 100 30 | x,y = np.meshgrid(np.linspace(-2,2,N), np.linspace(-2,2,N)) 31 | z = f(x,y) 32 | plt.contourf(x,y,z,10, cmap="Greys") 33 | plt.contour(x,y,z,10, colors='k', linewidths=1) 34 | plt.plot([0,0],[-2,2],color='k',linewidth=1) 35 | plt.plot([-2,2],[0,0],color='k',linewidth=1) 36 | 37 | def gd(x,y, eta,mu, steps, marker): 38 | xold = x 39 | yold = y 40 | vx = vy = 0.0 41 | for i in range(steps): 42 | plt.plot([xold,x],[yold,y], marker=marker, linestyle='dotted', color='k') 43 | xold = x 44 | yold = y 45 | vx = mu*vx - eta * dx(x+mu*vx,y) 46 | vy = mu*vy - eta * dy(x,y+mu*vy) 47 | x = x + vx 48 | y = y + vy 49 | 50 | return x,y 51 | 52 | #gd(-1.5, 1.2,20, 'o') 53 | #gd( 1.5,-1.8,40, 's') 54 | #gd( 0.0, 0.0,30, '<') 55 | print("(x,y) = (%0.8f, %0.8f)" % gd( 0.7,-0.2, 0.1, 0.9, 25, '>')) 56 | print("(x,y) = (%0.8f, %0.8f)" % gd( 1.5, 1.5, 0.02, 0.9, 90, '*')) 57 | 58 | plt.xlabel("$x$") 59 | plt.ylabel("$y$") 60 | plt.tight_layout(pad=0, w_pad=0, h_pad=0) 61 | plt.savefig("gd_nesterov_steps.png", dpi=300) 62 | plt.show() 63 | plt.close() 64 | 65 | -------------------------------------------------------------------------------- /dataset/cifar10_test_images.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/cifar10_test_images.npy -------------------------------------------------------------------------------- /dataset/cifar10_test_labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/cifar10_test_labels.npy -------------------------------------------------------------------------------- /dataset/fmnist_test_images_small.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_test_images_small.npy -------------------------------------------------------------------------------- /dataset/fmnist_test_labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_test_labels.npy -------------------------------------------------------------------------------- /dataset/fmnist_train_images_small.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_train_images_small.npy -------------------------------------------------------------------------------- /dataset/fmnist_train_labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_train_labels.npy -------------------------------------------------------------------------------- /dataset/fmnist_train_labels_vector.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/fmnist_train_labels_vector.npy -------------------------------------------------------------------------------- /dataset/test_images_full.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/test_images_full.npy -------------------------------------------------------------------------------- /dataset/test_images_small.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/test_images_small.npy -------------------------------------------------------------------------------- /dataset/test_labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/test_labels.npy -------------------------------------------------------------------------------- /dataset/train_images_full.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_images_full.npy -------------------------------------------------------------------------------- /dataset/train_images_small.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_images_small.npy -------------------------------------------------------------------------------- /dataset/train_labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_labels.npy -------------------------------------------------------------------------------- /dataset/train_labels_vector.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/dataset/train_labels_vector.npy -------------------------------------------------------------------------------- /tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rkneusel9/MathForDeepLearning/baa5d36cad7ebc84da109098707bc8653518e3ad/tutorial.pdf --------------------------------------------------------------------------------