├── .gitattributes ├── bad_xor.py ├── l1_regularization.py ├── logistic1.py ├── logistic2.py ├── logistic3.py ├── logistic4.py ├── logistic_donut.py ├── logistic_visualize.py └── logistic_xor.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /bad_xor.py: -------------------------------------------------------------------------------- 1 | # logisitc regression classifier for the XOR problem. 2 | 3 | 4 | from __future__ import print_function, division 5 | from builtins import range 6 | # Note: you may need to update your version of future 7 | # sudo pip install -U future 8 | 9 | 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | 13 | N = 4 14 | D = 2 15 | 16 | # XOR 17 | X = np.array([ 18 | [0, 0], 19 | [0, 1], 20 | [1, 0], 21 | [1, 1], 22 | ]) 23 | T = np.array([0, 1, 1, 0]) 24 | 25 | # add a column of ones 26 | ones = np.ones((N, 1)) 27 | 28 | # add a column of xy = x*y 29 | Xb = np.concatenate((ones, X), axis=1) 30 | 31 | # randomly initialize the weights 32 | w = np.random.randn(D + 1) 33 | 34 | # calculate the model output 35 | z = Xb.dot(w) 36 | 37 | def sigmoid(z): 38 | return 1/(1 + np.exp(-z)) 39 | 40 | 41 | Y = sigmoid(z) 42 | 43 | # calculate the cross-entropy error 44 | def cross_entropy(T, Y): 45 | return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum() 46 | 47 | 48 | # let's do gradient descent 100 times 49 | learning_rate = 0.001 50 | error = [] 51 | w_mags = [] 52 | for i in range(100000): 53 | e = cross_entropy(T, Y) 54 | error.append(e) 55 | if i % 1000 == 0: 56 | print(e) 57 | 58 | # gradient descent weight udpate with regularization 59 | w += learning_rate * Xb.T.dot(T - Y) 60 | 61 | w_mags.append(w.dot(w)) 62 | 63 | # recalculate Y 64 | Y = sigmoid(Xb.dot(w)) 65 | 66 | plt.plot(error) 67 | plt.title("Cross-entropy per iteration") 68 | plt.show() 69 | 70 | plt.plot(w_mags) 71 | plt.title("w^2 magnitudes") 72 | plt.show() 73 | 74 | print("Final w:", w) 75 | print("Final classification rate:", 1 - np.abs(T - np.round(Y)).sum() / N) 76 | -------------------------------------------------------------------------------- /l1_regularization.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function, division 3 | from builtins import range 4 | # Note: you may need to update your version of future 5 | # sudo pip install -U future 6 | 7 | 8 | import numpy as np 9 | from mpl_toolkits.mplot3d import Axes3D 10 | import matplotlib.pyplot as plt 11 | 12 | def sigmoid(z): 13 | return 1/(1 + np.exp(-z)) 14 | 15 | N = 50 16 | D = 50 17 | 18 | # uniformly distributed numbers between -5, +5 19 | X = (np.random.random((N, D)) - 0.5)*10 20 | # X = (np.random.randn(N, D) - 0.5)*10 21 | 22 | # true weights - only the first 3 dimensions of X affect Y 23 | true_w = np.array([1, 0.5, -0.5] + [0]*(D - 3)) 24 | 25 | # generate Y - add noise with variance 0.5 26 | Y = np.round(sigmoid(X.dot(true_w) + np.random.randn(N)*0.5)) 27 | 28 | 29 | 30 | 31 | # let's plot the data to see what it looks like 32 | fig = plt.figure() 33 | ax = fig.add_subplot(111, projection='3d') 34 | ax.scatter(X[:,0], X[:,1], X[:,2], c=Y) 35 | plt.show() 36 | 37 | # perform gradient descent to find w 38 | costs = [] # keep track of squared error cost 39 | w = np.random.randn(D) / np.sqrt(D) # randomly initialize w 40 | learning_rate = 0.001 41 | l1 = 3.0 # try different values - what effect does it have on w? 42 | for t in range(5000): 43 | # update w 44 | Yhat = sigmoid(X.dot(w)) 45 | delta = Yhat - Y 46 | w = w - learning_rate*(X.T.dot(delta) + l1*np.sign(w)) 47 | 48 | # find and store the cost 49 | cost = -(Y*np.log(Yhat) + (1-Y)*np.log(1 - Yhat)).mean() + l1*np.abs(w).mean() 50 | costs.append(cost) 51 | 52 | # plot the costs 53 | plt.plot(costs) 54 | plt.show() 55 | 56 | print("final w:", w) 57 | 58 | # plot our w vs true w 59 | plt.plot(true_w, label='true w') 60 | plt.plot(w, label='w_map') 61 | plt.legend() 62 | plt.show() -------------------------------------------------------------------------------- /logistic1.py: -------------------------------------------------------------------------------- 1 | # demonstrates how to calculate the output of a logistic unit using numpy. 2 | # the data X and weight matrix w are randomly generated from a 3 | # standard normal distribution. 4 | 5 | 6 | from __future__ import print_function, division 7 | from builtins import range 8 | # Note: you may need to update your version of future 9 | # sudo pip install -U future 10 | 11 | 12 | import numpy as np 13 | 14 | N = 100 15 | D = 2 16 | 17 | 18 | X = np.random.randn(N,D) 19 | # ones = np.array([[1]*N]).T # old 20 | ones = np.ones((N, 1)) 21 | Xb = np.concatenate((ones, X), axis=1) 22 | 23 | w = np.random.randn(D + 1) 24 | 25 | z = Xb.dot(w) 26 | 27 | def sigmoid(z): 28 | return 1/(1 + np.exp(-z)) 29 | 30 | print(sigmoid(z)) 31 | -------------------------------------------------------------------------------- /logistic2.py: -------------------------------------------------------------------------------- 1 | # demonstrates how to calculate the cross-entropy error function 2 | # in numpy. 3 | 4 | 5 | from __future__ import print_function, division 6 | from builtins import range 7 | # Note: you may need to update your version of future 8 | # sudo pip install -U future 9 | 10 | 11 | 12 | import numpy as np 13 | 14 | N = 100 15 | D = 2 16 | 17 | 18 | X = np.random.randn(N,D) 19 | 20 | # center the first 50 points at (-2,-2) 21 | X[:50,:] = X[:50,:] - 2*np.ones((50,D)) 22 | 23 | # center the last 50 points at (2, 2) 24 | X[50:,:] = X[50:,:] + 2*np.ones((50,D)) 25 | 26 | # labels: first 50 are 0, last 50 are 1 27 | T = np.array([0]*50 + [1]*50) 28 | 29 | # add a column of ones 30 | # ones = np.array([[1]*N]).T # old 31 | ones = np.ones((N, 1)) 32 | Xb = np.concatenate((ones, X), axis=1) 33 | 34 | # randomly initialize the weights 35 | w = np.random.randn(D + 1) 36 | 37 | # calculate the model output 38 | z = Xb.dot(w) 39 | 40 | def sigmoid(z): 41 | return 1/(1 + np.exp(-z)) 42 | 43 | Y = sigmoid(z) 44 | 45 | # calculate the cross-entropy error 46 | def cross_entropy(T, Y): 47 | E = 0 48 | for i in range(len(T)): 49 | if T[i] == 1: 50 | E -= np.log(Y[i]) 51 | else: 52 | E -= np.log(1 - Y[i]) 53 | return E 54 | 55 | print(cross_entropy(T, Y)) 56 | 57 | # try it with our closed-form solution 58 | w = np.array([0, 4, 4]) 59 | 60 | # calculate the model output 61 | z = Xb.dot(w) 62 | Y = sigmoid(z) 63 | 64 | # calculate the cross-entropy error 65 | print(cross_entropy(T, Y)) 66 | 67 | -------------------------------------------------------------------------------- /logistic3.py: -------------------------------------------------------------------------------- 1 | # demonstrates how to do gradient descent with numpy matrices. 2 | 3 | 4 | 5 | from __future__ import print_function, division 6 | from builtins import range 7 | # Note: you may need to update your version of future 8 | # sudo pip install -U future 9 | 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | N = 100 15 | D = 2 16 | 17 | N_per_class = N//2 18 | 19 | 20 | X = np.random.randn(N,D) 21 | 22 | # center the first 50 points at (-2,-2) 23 | X[:N_per_class,:] = X[:N_per_class,:] - 2*np.ones((N_per_class,D)) 24 | 25 | # center the last 50 points at (2, 2) 26 | X[N_per_class:,:] = X[N_per_class:,:] + 2*np.ones((N_per_class,D)) 27 | 28 | # labels: first N_per_class are 0, last N_per_class are 1 29 | T = np.array([0]*N_per_class + [1]*N_per_class) 30 | 31 | # add a column of ones 32 | # ones = np.array([[1]*N]).T # old 33 | ones = np.ones((N, 1)) 34 | Xb = np.concatenate((ones, X), axis=1) 35 | 36 | # randomly initialize the weights 37 | w = np.random.randn(D + 1) 38 | 39 | # calculate the model output 40 | z = Xb.dot(w) 41 | 42 | def sigmoid(z): 43 | return 1/(1 + np.exp(-z)) 44 | 45 | 46 | Y = sigmoid(z) 47 | 48 | # calculate the cross-entropy error 49 | def cross_entropy(T, Y): 50 | E = 0 51 | for i in range(len(T)): 52 | if T[i] == 1: 53 | E -= np.log(Y[i]) 54 | else: 55 | E -= np.log(1 - Y[i]) 56 | return E 57 | 58 | 59 | # let's do gradient descent 100 times 60 | learning_rate = 0.1 61 | for i in range(100): 62 | if i % 10 == 0: 63 | print(cross_entropy(T, Y)) 64 | 65 | # gradient descent weight udpate 66 | w += learning_rate * Xb.T.dot(T - Y) 67 | 68 | # recalculate Y 69 | Y = sigmoid(Xb.dot(w)) 70 | 71 | 72 | print("Final w:", w) 73 | 74 | # plot the data and separating line 75 | plt.scatter(X[:,0], X[:,1], c=T, s=100, alpha=0.5) 76 | x_axis = np.linspace(-6, 6, 100) 77 | y_axis = -(w[0] + x_axis*w[1]) / w[2] 78 | plt.plot(x_axis, y_axis) 79 | plt.show() 80 | 81 | -------------------------------------------------------------------------------- /logistic4.py: -------------------------------------------------------------------------------- 1 | # demonstrates how to do gradient descent with numpy matrices, 2 | # with regularization. 3 | 4 | 5 | from __future__ import print_function, division 6 | from builtins import range 7 | # Note: you may need to update your version of future 8 | # sudo pip install -U future 9 | 10 | 11 | 12 | import numpy as np 13 | 14 | N = 100 15 | D = 2 16 | 17 | 18 | X = np.random.randn(N,D) 19 | 20 | # center the first 50 points at (-2,-2) 21 | X[:50,:] = X[:50,:] - 2*np.ones((50,D)) 22 | 23 | # center the last 50 points at (2, 2) 24 | X[50:,:] = X[50:,:] + 2*np.ones((50,D)) 25 | 26 | # labels: first 50 are 0, last 50 are 1 27 | T = np.array([0]*50 + [1]*50) 28 | 29 | # add a column of ones 30 | # ones = np.array([[1]*N]).T 31 | ones = np.ones((N, 1)) 32 | Xb = np.concatenate((ones, X), axis=1) 33 | 34 | # randomly initialize the weights 35 | w = np.random.randn(D + 1) 36 | 37 | # calculate the model output 38 | z = Xb.dot(w) 39 | 40 | def sigmoid(z): 41 | return 1/(1 + np.exp(-z)) 42 | 43 | 44 | Y = sigmoid(z) 45 | 46 | # calculate the cross-entropy error 47 | def cross_entropy(T, Y): 48 | E = 0 49 | for i in range(len(T)): 50 | if T[i] == 1: 51 | E -= np.log(Y[i]) 52 | else: 53 | E -= np.log(1 - Y[i]) 54 | return E 55 | 56 | 57 | # let's do gradient descent 100 times 58 | learning_rate = 0.1 59 | for i in range(100): 60 | if i % 10 == 0: 61 | print(cross_entropy(T, Y)) 62 | 63 | # gradient descent weight udpate with regularization 64 | w += learning_rate * ( Xb.T.dot(T - Y) - 0.1*w ) 65 | 66 | # recalculate Y 67 | Y = sigmoid(Xb.dot(w)) 68 | 69 | 70 | print("Final w:", w) 71 | 72 | 73 | -------------------------------------------------------------------------------- /logistic_donut.py: -------------------------------------------------------------------------------- 1 | # logisitc regression classifier for the donut problem. 2 | 3 | 4 | from __future__ import print_function, division 5 | from builtins import range 6 | # Note: you may need to update your version of future 7 | # sudo pip install -U future 8 | 9 | 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | N = 1000 15 | D = 2 16 | 17 | R_inner = 5 18 | R_outer = 10 19 | 20 | # distance from origin is radius + random normal 21 | # angle theta is uniformly distributed between (0, 2pi) 22 | R1 = np.random.randn(N//2) + R_inner 23 | theta = 2*np.pi*np.random.random(N//2) 24 | X_inner = np.concatenate([[R1 * np.cos(theta)], [R1 * np.sin(theta)]]).T 25 | 26 | R2 = np.random.randn(N//2) + R_outer 27 | theta = 2*np.pi*np.random.random(N//2) 28 | X_outer = np.concatenate([[R2 * np.cos(theta)], [R2 * np.sin(theta)]]).T 29 | 30 | X = np.concatenate([ X_inner, X_outer ]) 31 | T = np.array([0]*(N//2) + [1]*(N//2)) # labels: first 50 are 0, last 50 are 1 32 | 33 | plt.scatter(X[:,0], X[:,1], c=T) 34 | plt.show() 35 | 36 | 37 | 38 | # add a column of ones 39 | # ones = np.array([[1]*N]).T # old 40 | ones = np.ones((N, 1)) 41 | 42 | # add a column of r = sqrt(x^2 + y^2) 43 | r = np.sqrt( (X * X).sum(axis=1) ).reshape(-1, 1) 44 | Xb = np.concatenate((ones, r, X), axis=1) 45 | 46 | # randomly initialize the weights 47 | w = np.random.randn(D + 2) 48 | 49 | # calculate the model output 50 | z = Xb.dot(w) 51 | 52 | def sigmoid(z): 53 | return 1/(1 + np.exp(-z)) 54 | 55 | 56 | Y = sigmoid(z) 57 | 58 | # calculate the cross-entropy error 59 | def cross_entropy(T, Y): 60 | return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum() 61 | 62 | 63 | # let's do gradient descent 100 times 64 | learning_rate = 0.0001 65 | error = [] 66 | for i in range(5000): 67 | e = cross_entropy(T, Y) 68 | error.append(e) 69 | if i % 500 == 0: 70 | print(e) 71 | 72 | # gradient descent weight udpate with regularization 73 | w += learning_rate * ( Xb.T.dot(T - Y) - 0.1*w ) 74 | 75 | # recalculate Y 76 | Y = sigmoid(Xb.dot(w)) 77 | 78 | plt.plot(error) 79 | plt.title("Cross-entropy per iteration") 80 | plt.show() 81 | 82 | print("Final w:", w) 83 | print("Final classification rate:", 1 - np.abs(T - np.round(Y)).sum() / N) 84 | -------------------------------------------------------------------------------- /logistic_visualize.py: -------------------------------------------------------------------------------- 1 | # visualizes the Bayes solution 2 | 3 | 4 | from __future__ import print_function, division 5 | from builtins import range 6 | # Note: you may need to update your version of future 7 | # sudo pip install -U future 8 | 9 | 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | N = 100 15 | D = 2 16 | 17 | 18 | X = np.random.randn(N,D) 19 | 20 | # center the first 50 points at (-2,-2) 21 | X[:50,:] = X[:50,:] - 2*np.ones((50,D)) 22 | 23 | # center the last 50 points at (2, 2) 24 | X[50:,:] = X[50:,:] + 2*np.ones((50,D)) 25 | 26 | # labels: first 50 are 0, last 50 are 1 27 | T = np.array([0]*50 + [1]*50) 28 | 29 | # add a column of ones 30 | # ones = np.array([[1]*N]).T 31 | ones = np.ones((N, 1)) 32 | Xb = np.concatenate((ones, X), axis=1) 33 | 34 | def sigmoid(z): 35 | return 1/(1 + np.exp(-z)) 36 | 37 | # get the closed-form solution 38 | w = np.array([0, 4, 4]) 39 | 40 | # calculate the model output 41 | z = Xb.dot(w) 42 | Y = sigmoid(z) 43 | 44 | plt.scatter(X[:,0], X[:,1], c=T, s=100, alpha=0.5) 45 | 46 | x_axis = np.linspace(-6, 6, 100) 47 | y_axis = -x_axis 48 | plt.plot(x_axis, y_axis) 49 | plt.show() 50 | -------------------------------------------------------------------------------- /logistic_xor.py: -------------------------------------------------------------------------------- 1 | # logisitc regression classifier for the XOR problem. 2 | 3 | 4 | from __future__ import print_function, division 5 | from builtins import range 6 | # Note: you may need to update your version of future 7 | # sudo pip install -U future 8 | 9 | 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | 13 | N = 4 14 | D = 2 15 | 16 | # XOR 17 | X = np.array([ 18 | [0, 0], 19 | [0, 1], 20 | [1, 0], 21 | [1, 1], 22 | ]) 23 | T = np.array([0, 1, 1, 0]) 24 | 25 | # add a column of ones 26 | # ones = np.array([[1]*N]).T 27 | ones = np.ones((N, 1)) 28 | 29 | # add a column of xy = x*y 30 | xy = (X[:,0] * X[:,1]).reshape(N, 1) 31 | Xb = np.concatenate((ones, xy, X), axis=1) 32 | 33 | # randomly initialize the weights 34 | w = np.random.randn(D + 2) 35 | 36 | # calculate the model output 37 | z = Xb.dot(w) 38 | 39 | def sigmoid(z): 40 | return 1/(1 + np.exp(-z)) 41 | 42 | 43 | Y = sigmoid(z) 44 | 45 | # calculate the cross-entropy error 46 | def cross_entropy(T, Y): 47 | E = 0 48 | for i in range(len(T)): 49 | if T[i] == 1: 50 | E -= np.log(Y[i]) 51 | else: 52 | E -= np.log(1 - Y[i]) 53 | return E 54 | 55 | 56 | # let's do gradient descent 100 times 57 | learning_rate = 0.01 58 | error = [] 59 | for i in range(10000): 60 | e = cross_entropy(T, Y) 61 | error.append(e) 62 | if i % 1000 == 0: 63 | print(e) 64 | 65 | # gradient descent weight udpate with regularization 66 | w += learning_rate * ( Xb.T.dot(T - Y) - 0.01*w ) 67 | 68 | # recalculate Y 69 | Y = sigmoid(Xb.dot(w)) 70 | 71 | plt.plot(error) 72 | plt.title("Cross-entropy per iteration") 73 | plt.show() 74 | 75 | print("Final w:", w) 76 | print("Final classification rate:", 1 - np.abs(T - np.round(Y)).sum() / N) 77 | --------------------------------------------------------------------------------