├── PCA ├── PCA.ipynb ├── PCA.py └── images │ ├── PCA.png │ ├── dataset.png │ └── eigen_vectors.png ├── README.md ├── decision_tree ├── decision_tree.ipynb ├── decision_tree.py └── images │ ├── data.png │ └── decision_boundary.png ├── k-means ├── images │ ├── dataset.png │ ├── k-means.png │ └── total_dist_vs_k.png ├── k-means.ipynb └── k-means.py ├── linear_regression ├── images │ ├── Error.png │ ├── cost.png │ └── data.png ├── linear_regression.ipynb ├── linear_regression.py ├── linear_regression_data.csv └── linear_regression_data_multi.csv ├── logistic_regression ├── images │ ├── cost.png │ ├── data.png │ ├── data_multi-class.png │ ├── decision_boundary.png │ ├── decision_boundary_multi-class.png │ ├── decision_boundary_overfitting.png │ ├── decision_boundary_regularization.png │ └── decision_boundary_underfitting.png ├── logistic_regression.ipynb ├── logistic_regression.py ├── logistic_regression_data.csv ├── logistic_regression_multi-class.ipynb ├── logistic_regression_multi-class.py └── logistic_regression_reg.py ├── neural_network ├── images │ └── decision_boundary_nnet.png ├── neural_network.ipynb ├── neural_network.py └── neural_network_mnist.py └── title.png /PCA/PCA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "%matplotlib inline" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 3, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "x = np.array(range(25))\n", 22 | "y = x ** 1.3 + np.random.normal(10, 10, x.shape[0])\n", 23 | "X = np.stack((x, y), axis = 1)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "image/png": "\n", 34 | "text/plain": [ 35 | "
" 36 | ] 37 | }, 38 | "metadata": { 39 | "needs_background": "light" 40 | }, 41 | "output_type": "display_data" 42 | } 43 | ], 44 | "source": [ 45 | "sns.scatterplot(x = X[:, 0], y = X[:, 1], edgecolor = \"none\")\n", 46 | "plt.title('Dataset')\n", 47 | "plt.ylabel('y')\n", 48 | "plt.xlabel('x')\n", 49 | "plt.show()" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 14, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def feature_normaliza(X):\n", 59 | " mu = np.mean(X, 0) \n", 60 | " sigma = np.std(X, 0) \n", 61 | " def get_norm(col):\n", 62 | " mu = np.mean(col) \n", 63 | " sigma = np.std(col)\n", 64 | " return (col - mu)/sigma\n", 65 | " return np.apply_along_axis(get_norm, 0, X), mu, sigma\n", 66 | "\n", 67 | "def drawline(p1, p2, color = 'r'):\n", 68 | " sns.lineplot([p1[0],p2[0]], [p1[1],p2[1]], color = color) \n", 69 | " \n", 70 | "def project_data(X_norm, U, K):\n", 71 | " Z = np.zeros((X_norm.shape[0], K))\n", 72 | " U_reduce = U[:, 0:K] \n", 73 | " Z = np.dot(X_norm, U_reduce) \n", 74 | " return Z\n", 75 | "\n", 76 | "def recover_data(Z, U, K):\n", 77 | " X_rec = np.zeros((Z.shape[0], U.shape[0]))\n", 78 | " U_recude = U[:, 0:K]\n", 79 | " X_rec = np.dot(Z, U_recude.T)\n", 80 | " return X_rec" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 6, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "X_norm, mu, sigma = feature_normaliza(X) " 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 16, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "image/png": "\n", 100 | "text/plain": [ 101 | "
" 102 | ] 103 | }, 104 | "metadata": { 105 | "needs_background": "light" 106 | }, 107 | "output_type": "display_data" 108 | } 109 | ], 110 | "source": [ 111 | "m = X.shape[0]\n", 112 | "Sig = np.dot(X_norm.T,X_norm)/m \n", 113 | "U,S,V = np.linalg.svd(Sig) \n", 114 | "\n", 115 | "sns.scatterplot(x = X_norm[:, 0], y = X_norm[:, 1], edgecolor = \"none\")\n", 116 | "drawline((0, 0), S[0]*U[:,0], color = 'g')\n", 117 | "drawline((0, 0), S[1]*U[:,1])\n", 118 | "plt.ylabel('y_norm')\n", 119 | "plt.xlabel('x_norm')\n", 120 | "plt.axis('square')\n", 121 | "plt.show()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 10, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "image/png": "\n", 132 | "text/plain": [ 133 | "
" 134 | ] 135 | }, 136 | "metadata": { 137 | "needs_background": "light" 138 | }, 139 | "output_type": "display_data" 140 | } 141 | ], 142 | "source": [ 143 | "K = 1 \n", 144 | "Z = project_data(X_norm, U, K) \n", 145 | "X_rec = recover_data(Z, U, K) \n", 146 | "\n", 147 | "sns.scatterplot(x = X_norm[:, 0], y = X_norm[:, 1], edgecolor = \"none\")\n", 148 | "sns.scatterplot(x = X_rec[:, 0], y = X_rec[:, 1], edgecolor = \"none\")\n", 149 | "plt.legend(['Raw data', 'Post-PCA'])\n", 150 | "\n", 151 | "for i in range(X_norm.shape[0]):\n", 152 | " drawline(X_norm[i,:], X_rec[i,:])\n", 153 | "\n", 154 | "plt.ylabel('y_norm')\n", 155 | "plt.xlabel('x_norm')\n", 156 | "plt.axis('square')\n", 157 | "plt.show()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [] 166 | } 167 | ], 168 | "metadata": { 169 | "kernelspec": { 170 | "display_name": "Python 3", 171 | "language": "python", 172 | "name": "python3" 173 | }, 174 | "language_info": { 175 | "codemirror_mode": { 176 | "name": "ipython", 177 | "version": 3 178 | }, 179 | "file_extension": ".py", 180 | "mimetype": "text/x-python", 181 | "name": "python", 182 | "nbconvert_exporter": "python", 183 | "pygments_lexer": "ipython3", 184 | "version": "3.7.4" 185 | } 186 | }, 187 | "nbformat": 4, 188 | "nbformat_minor": 2 189 | } 190 | -------------------------------------------------------------------------------- /PCA/PCA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | 6 | def feature_normaliza(X): 7 | mu = np.mean(X, 0) 8 | sigma = np.std(X, 0) 9 | def get_norm(col): 10 | mu = np.mean(col) 11 | sigma = np.std(col) 12 | return (col - mu)/sigma 13 | return np.apply_along_axis(get_norm, 0, X), mu, sigma 14 | 15 | def drawline(p1, p2, color = 'r'): 16 | sns.lineplot([p1[0],p2[0]], [p1[1],p2[1]], color = color) 17 | 18 | def project_data(X_norm, U, K): 19 | Z = np.zeros((X_norm.shape[0], K)) 20 | U_reduce = U[:, 0:K] 21 | Z = np.dot(X_norm, U_reduce) 22 | return Z 23 | 24 | def recover_data(Z, U, K): 25 | X_rec = np.zeros((Z.shape[0], U.shape[0])) 26 | U_recude = U[:, 0:K] 27 | X_rec = np.dot(Z, U_recude.T) 28 | return X_rec 29 | 30 | def PCA(X, K): 31 | X_norm, mu, sigma = feature_normaliza(X) 32 | 33 | Sig = np.dot(X_norm.T,X_norm)/X_norm.shape[0] 34 | U,S,V = np.linalg.svd(Sig) 35 | 36 | Z = project_data(X_norm, U, K) 37 | X_rec = recover_data(Z, U, K) 38 | return X_rec 39 | 40 | if __name__ == '__main__': 41 | images_dir = os.path.join(sys.path[0], 'images') 42 | if not os.path.exists(images_dir): 43 | os.makedirs(images_dir) 44 | 45 | x = np.array(range(25)) 46 | y = x ** 1.3 + np.random.normal(10, 10, x.shape[0]) 47 | X = np.stack((x, y), axis = 1) 48 | 49 | sns.scatterplot(x = X[:, 0], y = X[:, 1], edgecolor = "none") 50 | plt.title('Dataset') 51 | plt.ylabel('y') 52 | plt.xlabel('x') 53 | plt.savefig(os.path.join(images_dir, 'dataset.png')) 54 | plt.clf() 55 | 56 | X_norm, mu, sigma = feature_normaliza(X) 57 | Sig = np.dot(X_norm.T,X_norm)/X_norm.shape[0] 58 | U,S,V = np.linalg.svd(Sig) 59 | 60 | sns.scatterplot(x = X_norm[:, 0], y = X_norm[:, 1], edgecolor = "none") 61 | drawline((0, 0), S[0]*U[:,0], color = 'g') 62 | drawline((0, 0), S[1]*U[:,1]) 63 | plt.ylabel('y_norm') 64 | plt.xlabel('x_norm') 65 | plt.axis('square') 66 | plt.savefig(os.path.join(images_dir, 'eigen_vectors.png')) 67 | plt.clf() 68 | 69 | K = 1 70 | X_rec = PCA(X, K) 71 | sns.scatterplot(x = X_norm[:, 0], y = X_norm[:, 1], edgecolor = "none") 72 | sns.scatterplot(x = X_rec[:, 0], y = X_rec[:, 1], edgecolor = "none") 73 | plt.legend(['Raw data', 'Post-PCA']) 74 | 75 | for i in range(X_norm.shape[0]): 76 | drawline(X_norm[i,:], X_rec[i,:]) 77 | 78 | plt.ylabel('y_norm') 79 | plt.xlabel('x_norm') 80 | plt.axis('square') 81 | plt.savefig(os.path.join(images_dir, 'PCA.png')) 82 | plt.clf() 83 | -------------------------------------------------------------------------------- /PCA/images/PCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/PCA/images/PCA.png -------------------------------------------------------------------------------- /PCA/images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/PCA/images/dataset.png -------------------------------------------------------------------------------- /PCA/images/eigen_vectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/PCA/images/eigen_vectors.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ML Algorithm with Python 2 | 3 |

4 | 5 |

6 | 7 | ## Table of Contents 8 | 1. [Linear Regression](#linear_regression) 9 | 2. [Logistic Regression](#logistic_regression) 10 | 3. [Neural Network](#neural_network) 11 | 4. [Decision Tree](#decision_tree) 12 | 5. [K-Means](#k-means) 13 | 6. [Principal Component Analysis](#principal_component_analysis) 14 | 15 | ## Being constantly updated .. 16 | 17 | Machine learning algorithm written in Python 18 | - Requires only basic linear algebra 19 | - Uses Numpy for matrix operation to avoid costly looping in Python 20 | - Usually includes notebook for easy visual 21 | - Simple examples provided 22 | 23 | to run each algorithm, `cd` to corresponding directory and run the following (replace `***` with the corresponding algorithm) in terminal: 24 | 25 | ``` 26 | python ***.py 27 | ``` 28 | 29 | (I also have plan to package each algorithm into a class) 30 | 31 | ## credit 32 | Most equations are from [CS 229](http://cs229.stanford.edu/syllabus-autumn2018.html) class of Stanford. 33 | 34 | 35 | ## 1. [Linear Regression](/linear_regression/linear_regression.ipynb) 36 | 37 | ### Cost function 38 | 39 | ``` 40 | def cost(X, y, theta): 41 | h = np.dot(X, theta) 42 | cos = np.sum((h - y) * ( h - y))/(2 * len(y)) 43 | return cos 44 | ``` 45 | 46 | ### Gradient descent 47 | ``` 48 | def gradient_descent(X, y, theta, alpha, num_iters): 49 | m = len(y) 50 | costs = [] 51 | for _ in range(num_iters): 52 | h = np.dot(X,theta) 53 | theta -= alpha * np.dot(X.T, (h - y))/m 54 | costs.append(cost(X, y, theta)) 55 | return theta, costs 56 | ``` 57 | ### Feature normalization 58 | ``` 59 | def feature_normaliza(X): 60 | mu = np.mean(X, 0) 61 | sigma = np.std(X, 0) 62 | def get_norm(col): 63 | mu = np.mean(col) 64 | sigma = np.std(col) 65 | return (col - mu)/sigma 66 | return np.apply_along_axis(get_norm, 0, X), mu, sigma 67 | ``` 68 | 69 | ### Main function 70 | 71 | ``` 72 | def linear_regression(X, y, alpha = 0.01,num_iters = 100): 73 | X = np.append(np.ones((X.shape[0], 1)), X, axis = 1) 74 | theta = np.zeros((X.shape[1], 1), dtype = np.float64) 75 | theta, costs = gradient_descent(X, y, theta, alpha, num_iters) 76 | predicted = np.dot(X, theta) 77 | return predicted, theta, costs 78 | ``` 79 | ### Plot an example 80 | ``` 81 | predicted, theta, costs = linear_regression(X, y) 82 | 83 | plt.plot(X, predicted, 'b') 84 | plt.plot(X, y, 'rx', 10) 85 | for i, x in enumerate(X): 86 | plt.vlines(x, min(predicted[i], y[i]), max(predicted[i], y[i])) 87 | plt.ylabel('Y') 88 | plt.xlabel('X') 89 | plt.legend(('linear fit', 'data')) 90 | plt.show() 91 | ``` 92 |

93 | 94 |

95 | 96 | 97 | ## 2. [Logistic Regression (with Regularization)](/logistic_regression/logistic_regression.ipynb) 98 | ### Sigmoid function 99 | ``` 100 | def sigmoid(z): 101 | return 1/(1 + np.exp(-z)) 102 | ``` 103 | 104 | ### Cost function 105 | 106 | ``` 107 | def cost_reg(theta, X, y, lam = 0): 108 | h = sigmoid(np.dot(X, theta)) 109 | theta1 = theta.copy() 110 | theta1[0] = 0 111 | cos = -(np.sum(y * np.log(h)) + np.sum((1 - y) * np.log(1 - h)))/len(y) + lam * np.sum(theta1 * theta1)/len(y) 112 | return cos 113 | ``` 114 | 115 | ### Expand features 116 | ``` 117 | def expand_feature(x1, x2, power = 2): 118 | #expand a 2D feature matrix to polynimial features up to the power 119 | new_x = np.ones((x1.shape[0], 1)) 120 | for i in range(1, power + 1): 121 | for j in range(i + 1): 122 | new_x = np.append(new_x, (x1**(i-j)*(x2**j)).reshape(-1, 1), axis = 1) 123 | return new_x 124 | ``` 125 | 126 | ### Gradient descent 127 | ``` 128 | def gradient_descent_reg(X, y, theta, alpha, lam = 0, num_iters = 100): 129 | m = len(y) 130 | costs = [] 131 | 132 | for _ in range(num_iters): 133 | h = sigmoid(np.dot(X, theta)) 134 | theta1 = theta.copy() 135 | theta1[0] = 0 136 | theta -= alpha * (np.dot(X.T, (h - y)) + 2 * lam * theta1)/m 137 | costs.append(cost_reg(theta, X, y)) 138 | return theta, costs 139 | ``` 140 | 141 | ### Prediction 142 | ``` 143 | def predict(theta, X): 144 | return (sigmoid(np.dot(X, theta)) > 0.5).flatten() 145 | ``` 146 | ### Main function 147 | ``` 148 | def logistic_regression_reg(X, y, power = 2, alpha = 0.01, lam = 0, num_iters = 100): 149 | X = expand_feature(X[:, 0], X[:, 1], power = power) 150 | theta = np.zeros((X.shape[1], 1), dtype = np.float64) 151 | theta, costs = gradient_descent_reg(X, y, theta, alpha, lam, num_iters) 152 | predicted = predict(theta, X) 153 | return predicted, theta, costs 154 | ``` 155 | ### Examples 156 |

157 | 158 | 159 | 160 |

161 | 162 | 163 | ## 3. [Neural Network](/neural_network/neural_network.ipynb) 164 | ### Initialize parameters 165 | ``` 166 | def init_para(D, K, h): 167 | W = np.random.normal(0, 0.01, (D, h)) 168 | b = np.zeros((1, h), dtype = float) 169 | W2 = np.random.normal(0, 0.01, (h, K)) 170 | b2 = np.zeros((1, K), dtype = float) 171 | return W, b, W2, b2 172 | ``` 173 | ### Softmax 174 | ``` 175 | def softmax(scores): 176 | exp_scores = np.exp(scores) 177 | return exp_scores / np.sum(exp_scores, axis = 1).reshape(-1, 1) 178 | ``` 179 | 180 | ### Main function 181 | ``` 182 | def nnet(X, y, step_size = 0.4, lam = 0.0001, h = 10, num_iters = 1000): 183 | # get dim of input 184 | N, D = X.shape 185 | K = y.shape[1] 186 | 187 | W, b, W2, b2 = init_para(D, K, h) 188 | 189 | # gradient descent loop to update weight and bias 190 | for i in range(num_iters): 191 | # hidden layer, ReLU activation 192 | hidden_layer = np.maximum(0, np.dot(X, W) + np.repeat(b, N, axis = 0)) 193 | 194 | # class score 195 | scores = np.dot(hidden_layer, W2) + np.repeat(b2, N, axis = 0) 196 | 197 | # compute and normalize class probabilities 198 | probs = softmax(scores) 199 | 200 | # compute the loss with regularization 201 | data_loss = np.sum(-np.log(probs) * y) / N 202 | reg_loss = 0.5 * lam * np.sum(W * W) + 0.5 * lam * np.sum(W2 * W2) 203 | loss = data_loss + reg_loss 204 | 205 | # check progress 206 | if i%1000 == 0 or i == num_iters: 207 | print("iteration {}: loss {}".format(i, loss)) 208 | 209 | # compute the gradient on scores 210 | dscores = (probs - y) / N 211 | 212 | # backpropate the gradient to the parameters 213 | dW2 = np.dot(hidden_layer.T, dscores) 214 | db2 = np.sum(dscores, axis = 0) 215 | # next backprop into hidden layer 216 | dhidden = np.dot(dscores, W2.T) 217 | # backprop the ReLU non-linearity 218 | dhidden[hidden_layer <= 0] = 0 219 | # finally into W,b 220 | dW = np.dot(X.T, dhidden) 221 | db = np.sum(dhidden, axis = 0) 222 | 223 | # add regularization gradient contribution 224 | dW2 = dW2 + lam * W2 225 | dW = dW + lam * W 226 | 227 | # update parameter 228 | W = W - step_size * dW 229 | b = b - step_size * db 230 | W2 = W2 - step_size * dW2 231 | b2 = b2 - step_size * db2 232 | return W, b, W2, b2 233 | ``` 234 | 235 | ### Example 236 |

237 | 238 |

239 | 240 | 241 | ## 4. [Decision Tree](/decision_tree/decision_tree.ipynb) 242 | ### Gini impurity/Entropy 243 | ``` 244 | def gini_impurity(y): 245 | # calculate gini_impurity given labels/classes of each example 246 | m = y.shape[0] 247 | cnts = dict(zip(*np.unique(y, return_counts = True))) 248 | impurity = 1 - sum((cnt/m)**2 for cnt in cnts.values()) 249 | return impurity 250 | 251 | def entropy(y): 252 | # calculate entropy given labels/classes of each example 253 | m = y.shape[0] 254 | cnts = dict(zip(*np.unique(y, return_counts = True))) 255 | disorder = - sum((cnt/m)*log(cnt/m) for cnt in cnts.values()) 256 | return disorder 257 | ``` 258 | 259 | ### Information gain 260 | ``` 261 | def info_gain(l_y, r_y, cur_gini): 262 | # calculate the information gain for a certain split 263 | m, n = l_y.shape[0], r_y.shape[0] 264 | p = m / (m + n) 265 | return cur_gini - p * gini_impurity(l_y) - (1 - p) * gini_impurity(r_y) 266 | ``` 267 | 268 | ### Find best split 269 | ``` 270 | def get_split(X, y): 271 | # loop through features and values to find best combination with the most information gain 272 | best_gain, best_index, best_value = 0, None, None 273 | 274 | cur_gini = gini_impurity(y) 275 | n_features = X.shape[1] 276 | 277 | for index in range(n_features): 278 | 279 | values = np.unique(X[:, index], return_counts = False) 280 | 281 | for value in values: 282 | 283 | left, right = test_split(index, value, X, y) 284 | 285 | if left['y'].shape[0] == 0 or right['y'].shape[0] == 0: 286 | continue 287 | 288 | gain = info_gain(left['y'], right['y'], cur_gini) 289 | 290 | if gain > best_gain: 291 | best_gain, best_index, best_value = gain, index, value 292 | best_split = {'gain': best_gain, 'index': best_index, 'value': best_value} 293 | return best_split 294 | ``` 295 | ### Create leaf and decision node 296 | ``` 297 | class Leaf: 298 | # define a leaf node 299 | def __init__(self, y): 300 | self.counts = dict(zip(*np.unique(y, return_counts = True))) 301 | self.prediction = max(self.counts.keys(), key = lambda x: self.counts[x]) 302 | 303 | class Decision_Node: 304 | # define a decision node 305 | def __init__(self, index, value, left, right): 306 | self.index, self.value = index, value 307 | self.left, self.right = left, right 308 | ``` 309 | ### Training (build decision tree) 310 | ``` 311 | def decision_tree(X, y, max_dep = 5, min_size = 10): 312 | # train the decision tree model with a dataset 313 | correct_prediction = 0 314 | 315 | def build_tree(X, y, dep, max_dep = max_dep, min_size = min_size): 316 | # recursively build the tree 317 | split = get_split(X, y) 318 | 319 | if split['gain'] == 0 or dep >= max_dep or y.shape[0] <= min_size: 320 | nonlocal correct_prediction 321 | leaf = Leaf(y) 322 | correct_prediction += leaf.counts[leaf.prediction] 323 | return leaf 324 | 325 | left, right = test_split(split['index'], split['value'], X, y) 326 | 327 | left_node = build_tree(left['X'], left['y'], dep + 1) 328 | right_node = build_tree(right['X'], right['y'], dep + 1) 329 | 330 | return Decision_Node(split['index'], split['value'], left_node, right_node) 331 | 332 | root = build_tree(X, y, 0) 333 | 334 | return correct_prediction/y.shape[0], root 335 | ``` 336 | ### Prediction 337 | ``` 338 | def predict(x, node): 339 | if isinstance(node, Leaf): 340 | return node.prediction 341 | 342 | if x[node.index] < node.value: 343 | return predict(x, node.left) 344 | else: 345 | return predict(x, node.right) 346 | ``` 347 | 348 | 349 | ### Example 350 |

351 | 352 |

353 | 354 | 355 | ## 5. [K-Means](/k-means/k-means.ipynb) 356 | ### Initialize centroids 357 | ``` 358 | def init_centroid(X, K): 359 | m = X.shape[0] 360 | idx = np.random.choice(m, K, replace = False) 361 | return X[idx, :] 362 | ``` 363 | ### Update labels 364 | ``` 365 | def update_label(X, centroid): 366 | m, K = X.shape[0], centroid.shape[0] 367 | dist = np.zeros((m, K)) 368 | label = np.zeros((m, 1)) 369 | 370 | for i in range(m): 371 | for j in range(K): 372 | dist[i,j] = np.dot((X[i, :] - centroid[j, :]).T, (X[i, :] - centroid[j, :])) 373 | 374 | label = np.argmin(dist, axis = 1) 375 | total_dist = np.sum(np.choose(label, dist.T)) 376 | return label, total_dist 377 | ``` 378 | ### Update centroids 379 | ``` 380 | def update_centroid(X, label, K): 381 | D = X.shape[1] 382 | centroid = np.zeros((K, D)) 383 | for i in range(K): 384 | centroid[i, :] = np.mean(X[label.flatten() == i, :], axis=0).reshape(1,-1) 385 | return centroid 386 | ``` 387 | ### K-Means function 388 | ``` 389 | def k_means(X, K, num_iters = 100): 390 | m = X.shape[0] 391 | centroid = init_centroid(X, K) 392 | 393 | for _ in range(num_iters): 394 | label, total_dist = update_label(X, centroid) 395 | centroid = update_centroid(X, label, K) 396 | 397 | return centroid, label, total_dist 398 | ``` 399 | ### Example 400 |

401 | 402 |

403 | 404 | ### Determine K 405 |

406 | 407 |

408 | 409 | 410 | ## 6. [Principal Component Analysis](/PCA/PCA.ipynb) 411 | 412 | ### SVD (Singular Value Decomposition) 413 | ``` 414 | Sig = np.dot(X_norm.T,X_norm)/X_norm.shape[0] 415 | U,S,V = np.linalg.svd(Sig) 416 | ``` 417 | 418 | ### Data projection 419 | ``` 420 | def project_data(X_norm, U, K): 421 | Z = np.zeros((X_norm.shape[0], K)) 422 | U_reduce = U[:, 0:K] 423 | Z = np.dot(X_norm, U_reduce) 424 | return Z 425 | ``` 426 | 427 | ### Data recovery 428 | ``` 429 | def recover_data(Z, U, K): 430 | X_rec = np.zeros((Z.shape[0], U.shape[0])) 431 | U_recude = U[:, 0:K] 432 | X_rec = np.dot(Z, U_recude.T) 433 | return X_rec 434 | ``` 435 | 436 | ### PCA function 437 | ``` 438 | def PCA(X, K): 439 | X_norm, mu, sigma = feature_normaliza(X) 440 | 441 | Sig = np.dot(X_norm.T,X_norm)/X_norm.shape[0] 442 | U,S,V = np.linalg.svd(Sig) 443 | 444 | Z = project_data(X_norm, U, K) 445 | X_rec = recover_data(Z, U, K) 446 | return X_rec 447 | ``` 448 | ### Example (2D -> 1D) 449 |

450 | 451 |

452 | 453 | ... 454 | -------------------------------------------------------------------------------- /decision_tree/decision_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import log 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | import os, sys 6 | 7 | def gini_impurity(y): 8 | # calculate gini_impurity given labels/classes of each example 9 | m = y.shape[0] 10 | cnts = dict(zip(*np.unique(y, return_counts = True))) 11 | impurity = 1 - sum((cnt/m)**2 for cnt in cnts.values()) 12 | return impurity 13 | 14 | def entropy(y): 15 | # calculate entropy given labels/classes of each example 16 | m = y.shape[0] 17 | cnts = dict(zip(*np.unique(y, return_counts = True))) 18 | disorder = - sum((cnt/m)*log(cnt/m) for cnt in cnts.values()) 19 | return disorder 20 | 21 | def test_split(index, value, X, y): 22 | # split a group of examples based on given index (feature) and value 23 | mask = X[:, index] < value 24 | left = {'X': X[mask, :], 'y': y[mask]} 25 | right = {'X': X[~mask, :], 'y': y[~mask]} 26 | return left, right 27 | 28 | def info_gain(l_y, r_y, cur_gini): 29 | # calculate the information gain for a certain split 30 | m, n = l_y.shape[0], r_y.shape[0] 31 | p = m / (m + n) 32 | #return cur_gini - p * entropy(l_y) - (1 - p) * entropy(r_y) 33 | return cur_gini - p * gini_impurity(l_y) - (1 - p) * gini_impurity(r_y) 34 | 35 | def get_split(X, y): 36 | # loop through features and values to find best combination with the most information gain 37 | best_gain, best_index, best_value = 0, None, None 38 | 39 | #cur_gini = entropy(y) 40 | cur_gini = gini_impurity(y) 41 | n_features = X.shape[1] 42 | 43 | for index in range(n_features): 44 | values = np.unique(X[:, index], return_counts = False) 45 | for value in values: 46 | left, right = test_split(index, value, X, y) 47 | if left['y'].shape[0] == 0 or right['y'].shape[0] == 0: 48 | continue 49 | gain = info_gain(left['y'], right['y'], cur_gini) 50 | if gain > best_gain: 51 | best_gain, best_index, best_value = gain, index, value 52 | 53 | best_split = {'gain': best_gain, 'index': best_index, 'value': best_value} 54 | return best_split 55 | 56 | class Leaf: 57 | # define a leaf node 58 | def __init__(self, y): 59 | self.counts = dict(zip(*np.unique(y, return_counts = True))) 60 | self.prediction = max(self.counts.keys(), key = lambda x: self.counts[x]) 61 | 62 | class Decision_Node: 63 | # define a decision node 64 | def __init__(self, index, value, left, right): 65 | self.index, self.value = index, value 66 | self.left, self.right = left, right 67 | 68 | def decision_tree(X, y, max_dep = 5, min_size = 10): 69 | # train the decision tree model with a dataset 70 | correct_prediction = 0 71 | 72 | def build_tree(X, y, dep, max_dep = max_dep, min_size = min_size): 73 | # recursively build the tree 74 | split = get_split(X, y) 75 | 76 | if split['gain'] == 0 or dep >= max_dep or y.shape[0] <= min_size: 77 | nonlocal correct_prediction 78 | leaf = Leaf(y) 79 | correct_prediction += leaf.counts[leaf.prediction] 80 | return leaf 81 | 82 | left, right = test_split(split['index'], split['value'], X, y) 83 | 84 | left_node = build_tree(left['X'], left['y'], dep + 1) 85 | right_node = build_tree(right['X'], right['y'], dep + 1) 86 | 87 | return Decision_Node(split['index'], split['value'], left_node, right_node) 88 | 89 | root = build_tree(X, y, 0) 90 | 91 | return correct_prediction/y.shape[0], root 92 | 93 | def predict(x, node): 94 | if isinstance(node, Leaf): 95 | return node.prediction 96 | 97 | if x[node.index] < node.value: 98 | return predict(x, node.left) 99 | else: 100 | return predict(x, node.right) 101 | 102 | if __name__ == '__main__': 103 | images_dir = os.path.join(sys.path[0], 'images') 104 | if not os.path.exists(images_dir): 105 | os.makedirs(images_dir) 106 | 107 | N = 100 # number of points per class 108 | D = 2 # dimensionality, we use 2D data for easy visulization 109 | K = 3 # number of classes 110 | X = np.zeros((N * K, D), dtype = float) # data matrix (each row = single example, can view as xy coordinates) 111 | y = np.zeros(N * K, dtype = int) # class labels 112 | 113 | for i in range(K): 114 | r = np.random.normal(i + 0.5, 0.3, (N, 1)) # radius 115 | t = np.linspace(0, np.pi * 2, N).reshape(N, 1) # theta 116 | X[i * N:(i + 1) * N] = np.append(r * np.sin(t), r * np.cos(t), axis = 1) 117 | y[i * N:(i + 1) * N] = i 118 | 119 | sns.scatterplot(x = X[:, 0], y = X[:, 1], 120 | hue = y, palette = sns.color_palette('deep', K), edgecolor = "none") 121 | plt.title('Dataset') 122 | plt.xlabel('X') 123 | plt.ylabel('Y') 124 | plt.savefig(os.path.join(images_dir, 'data.png')) 125 | plt.clf() 126 | 127 | overfit_accuracy, overfit_model = decision_tree(X, y, float('inf'), 1) 128 | accuracy, model = decision_tree(X, y, 6) 129 | 130 | print('The accuracy of a model with unlimited split is {:.2f} %'.format(overfit_accuracy*100)) 131 | print('With limited split, the accuracy becomes {:.2f} %'.format(accuracy*100)) 132 | 133 | u = np.linspace(min(X[:, 0]),max(X[:, 0]), 400) 134 | v = np.linspace(min(X[:, 1]),max(X[:, 1]), 400) 135 | 136 | models = [overfit_model, model] 137 | titles = ['Overfit DB', 'DB with limited split'] 138 | 139 | fig, axs = plt.subplots(ncols = 2, figsize = (12, 5)) 140 | for k, ax in enumerate(axs): 141 | z = np.zeros((len(u),len(v))) 142 | for i in range(len(u)): 143 | for j in range(len(v)): 144 | z[i,j] = predict([u[i], v[j]], models[k]) 145 | z = np.transpose(z) 146 | 147 | ax.contourf(u,v,z, alpha = 0.2, levels = K - 1, antialiased = True) 148 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y, 149 | palette = sns.color_palette('deep', K), edgecolor = "none", ax = ax) 150 | ax.set_title(titles[k]) 151 | ax.set_xlabel('X') 152 | ax.set_ylabel('Y') 153 | plt.savefig(os.path.join(images_dir, 'decision_boundary.png')) 154 | plt.clf() 155 | -------------------------------------------------------------------------------- /decision_tree/images/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/decision_tree/images/data.png -------------------------------------------------------------------------------- /decision_tree/images/decision_boundary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/decision_tree/images/decision_boundary.png -------------------------------------------------------------------------------- /k-means/images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/k-means/images/dataset.png -------------------------------------------------------------------------------- /k-means/images/k-means.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/k-means/images/k-means.png -------------------------------------------------------------------------------- /k-means/images/total_dist_vs_k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/k-means/images/total_dist_vs_k.png -------------------------------------------------------------------------------- /k-means/k-means.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import seaborn as sns\n", 12 | "%matplotlib inline" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "K-means is a type of unsupervised learning method, therefore there are no classes per say. However I generate the data with distinctive classes for easy visulization. Later we will find that the no. of centroids we choose will be similar to the no. of classes for generated data." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 13, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "N = 80 # number of points per class\n", 29 | "D = 2 # dimensionality, we use 2D data for easy visulization\n", 30 | "K = 3 # number of classes \n", 31 | "X = np.zeros((N * K, D), dtype = float) # data matrix (each row = single example, can view as xy coordinates)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 22, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "for i in range(K):\n", 41 | " r = np.linspace(0.05, 1, N).reshape(-1, 1) # radius\n", 42 | " t = np.random.normal(0, 0.4, (N, 1)) # theta\n", 43 | " X[i * N:(i + 1) * N] = np.append(r * np.sin(t) + i/2, r * np.cos(t) - 2*i/2, axis = 1)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 23, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "image/png": "\n", 54 | "text/plain": [ 55 | "
" 56 | ] 57 | }, 58 | "metadata": { 59 | "needs_background": "light" 60 | }, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "sns.scatterplot(x = X[:, 0], y = X[:, 1], palette = sns.color_palette('deep', K), edgecolor = \"none\")\n", 66 | "plt.title('Dataset')\n", 67 | "plt.ylabel('Y')\n", 68 | "plt.xlabel('X')\n", 69 | "plt.show()" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 68, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def update_label(X, centroid):\n", 79 | " m, K = X.shape[0], centroid.shape[0] \n", 80 | " dist = np.zeros((m, K)) \n", 81 | " label = np.zeros((m, 1)) \n", 82 | " \n", 83 | " for i in range(m):\n", 84 | " for j in range(K):\n", 85 | " dist[i,j] = np.dot((X[i, :] - centroid[j, :]).T, (X[i, :] - centroid[j, :]))\n", 86 | " \n", 87 | " label = np.argmin(dist, axis = 1) \n", 88 | " total_dist = np.sum(np.choose(label, dist.T))\n", 89 | " return label, total_dist\n", 90 | " \n", 91 | "\n", 92 | "def update_centroid(X, label, K):\n", 93 | " D = X.shape[1]\n", 94 | " centroid = np.zeros((K, D))\n", 95 | " for i in range(K):\n", 96 | " centroid[i, :] = np.mean(X[label.flatten() == i, :], axis=0).reshape(1,-1)\n", 97 | " return centroid\n", 98 | "\n", 99 | "\n", 100 | "def init_centroid(X, K):\n", 101 | " m = X.shape[0]\n", 102 | " idx = np.random.choice(m, K, replace = False)\n", 103 | " return X[idx, :]" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 69, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "def k_means(X, K, num_iters = 100):\n", 113 | " m = X.shape[0]\n", 114 | " centroid = init_centroid(X, K)\n", 115 | " label = np.zeros((m, 1))\n", 116 | " \n", 117 | " for _ in range(num_iters):\n", 118 | " label, total_dist = update_label(X, centroid)\n", 119 | " centroid = update_centroid(X, label, K)\n", 120 | "\n", 121 | " return centroid, label, total_dist" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 51, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "centroid, label, _ = k_means(X, K, num_iters = 100)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 48, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "image/png": "\n", 141 | "text/plain": [ 142 | "
" 143 | ] 144 | }, 145 | "metadata": { 146 | "needs_background": "light" 147 | }, 148 | "output_type": "display_data" 149 | } 150 | ], 151 | "source": [ 152 | "sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = label, palette = sns.color_palette('deep', K), edgecolor = \"none\")\n", 153 | "sns.scatterplot(x = centroid[:, 0], y = centroid[:, 1], marker = \"x\", facecolor='red', s = 10**2, linewidth = 5)\n", 154 | "plt.title('Dataset')\n", 155 | "plt.ylabel('Y')\n", 156 | "plt.xlabel('X')\n", 157 | "plt.show()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 70, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "dist = [0]*10\n", 167 | "for i in range(10):\n", 168 | " _, _, dist[i] = k_means(X, i + 1, num_iters = 100)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 71, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "image/png": "\n", 179 | "text/plain": [ 180 | "
" 181 | ] 182 | }, 183 | "metadata": { 184 | "needs_background": "light" 185 | }, 186 | "output_type": "display_data" 187 | } 188 | ], 189 | "source": [ 190 | "sns.scatterplot(range(1, 11), dist)\n", 191 | "plt.title('Total Distance vs K')\n", 192 | "plt.ylabel('Total Distance')\n", 193 | "plt.xlabel('K, No. of Centroids')\n", 194 | "plt.show()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [] 203 | } 204 | ], 205 | "metadata": { 206 | "kernelspec": { 207 | "display_name": "Python 3", 208 | "language": "python", 209 | "name": "python3" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.7.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 2 226 | } 227 | -------------------------------------------------------------------------------- /k-means/k-means.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | 6 | def init_centroid(X, K): 7 | m = X.shape[0] 8 | idx = np.random.choice(m, K, replace = False) 9 | return X[idx, :] 10 | 11 | def update_label(X, centroid): 12 | m, K = X.shape[0], centroid.shape[0] 13 | dist = np.zeros((m, K)) 14 | label = np.zeros((m, 1)) 15 | 16 | for i in range(m): 17 | for j in range(K): 18 | dist[i,j] = np.dot((X[i, :] - centroid[j, :]).T, (X[i, :] - centroid[j, :])) 19 | 20 | label = np.argmin(dist, axis = 1) 21 | total_dist = np.sum(np.choose(label, dist.T)) 22 | return label, total_dist 23 | 24 | def update_centroid(X, label, K): 25 | D = X.shape[1] 26 | centroid = np.zeros((K, D)) 27 | for i in range(K): 28 | centroid[i, :] = np.mean(X[label.flatten() == i, :], axis=0).reshape(1,-1) 29 | return centroid 30 | 31 | def k_means(X, K, num_iters = 100): 32 | m = X.shape[0] 33 | centroid = init_centroid(X, K) 34 | 35 | for _ in range(num_iters): 36 | label, total_dist = update_label(X, centroid) 37 | centroid = update_centroid(X, label, K) 38 | 39 | return centroid, label, total_dist 40 | 41 | if __name__ == '__main__': 42 | images_dir = os.path.join(sys.path[0], 'images') 43 | if not os.path.exists(images_dir): 44 | os.makedirs(images_dir) 45 | 46 | N = 80 # number of points per class 47 | D = 2 # dimensionality, we use 2D data for easy visulization 48 | K = 3 # number of classes 49 | X = np.zeros((N * K, D), dtype = float) # data matrix (each row = single example, can view as xy coordinates) 50 | 51 | for i in range(K): 52 | r = np.linspace(0.05, 1, N).reshape(-1, 1) # radius 53 | t = np.random.normal(0, 0.4, (N, 1)) # theta 54 | X[i * N:(i + 1) * N] = np.append(r * np.sin(t) + i/2, r * np.cos(t) - 2*i/2, axis = 1) 55 | 56 | # sns.scatterplot(x = X[:, 0], y = X[:, 1], edgecolor = "none") 57 | # plt.title('Dataset') 58 | # plt.ylabel('Y') 59 | # plt.xlabel('X') 60 | # plt.savefig(os.path.join(images_dir, 'dataset.png')) 61 | # plt.clf() 62 | 63 | centroid, label, _ = k_means(X, K, num_iters = 50) 64 | 65 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = label, palette = sns.color_palette('deep', K), edgecolor = "none") 66 | sns.scatterplot(x = centroid[:, 0], y = centroid[:, 1], marker = "x", facecolor='red', s = 10**2, linewidth = 5) 67 | plt.title('Dataset') 68 | plt.ylabel('Y') 69 | plt.xlabel('X') 70 | plt.savefig(os.path.join(images_dir, 'k-means.png')) 71 | plt.clf() 72 | 73 | dist = [0]*10 74 | for i in range(10): 75 | _, _, dist[i] = k_means(X, i + 1, num_iters = 100) 76 | 77 | sns.scatterplot(range(1, 11), dist) 78 | plt.title('Total Distance vs K') 79 | plt.ylabel('Total Distance') 80 | plt.xlabel('K, No. of Centroids') 81 | plt.savefig(os.path.join(images_dir, 'total_dist_vs_k.png')) 82 | plt.clf() 83 | -------------------------------------------------------------------------------- /linear_regression/images/Error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/linear_regression/images/Error.png -------------------------------------------------------------------------------- /linear_regression/images/cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/linear_regression/images/cost.png -------------------------------------------------------------------------------- /linear_regression/images/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/linear_regression/images/data.png -------------------------------------------------------------------------------- /linear_regression/linear_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | 6 | def cost(X, y, theta): 7 | h = np.dot(X, theta) 8 | cos = np.sum((h - y) * ( h - y))/(2 * len(y)) 9 | return cos 10 | 11 | def gradient_descent(X, y, theta, alpha, num_iters): 12 | m = len(y) 13 | costs = [] 14 | for _ in range(num_iters): 15 | h = np.dot(X, theta) 16 | theta -= alpha * np.dot(X.T, (h - y))/m 17 | costs.append(cost(X, y, theta)) 18 | return theta, costs 19 | 20 | def linear_regression(X, y, alpha = 0.01,num_iters = 100): 21 | X = np.append(np.ones((X.shape[0], 1)), X, axis = 1) 22 | theta = np.zeros((X.shape[1], 1), dtype = np.float64) 23 | theta, costs = gradient_descent(X, y, theta, alpha, num_iters) 24 | predicted = np.dot(X, theta) 25 | return predicted, theta, costs 26 | 27 | if __name__ == '__main__': 28 | images_dir = os.path.join(sys.path[0], 'images') 29 | if not os.path.exists(images_dir): 30 | os.makedirs(images_dir) 31 | 32 | X = np.array(range(25)) 33 | y = (X ** 1.3 + np.random.normal(10, 10, X.shape[0])) 34 | X, y = X.reshape((-1, 1)), y.reshape((-1, 1)) 35 | # data = np.loadtxt(os.path.join(sys.path[0], 'linear_regression_data.csv'), delimiter = ',', dtype = np.float64) 36 | # X, y = data[:, :-1], data[:, -1].reshape((-1, 1)) 37 | 38 | predicted, theta, costs = linear_regression(X, y) 39 | 40 | sns.scatterplot(X[:, 0], y.flatten()) 41 | plt.title('Dataset') 42 | plt.ylabel('Y') 43 | plt.xlabel('X') 44 | plt.savefig(os.path.join(images_dir, 'data.png')) 45 | plt.clf() 46 | 47 | sns.lineplot(range(100), costs) 48 | plt.title('Cost vs Number of Interations') 49 | plt.ylabel('Cost') 50 | plt.xlabel('No. of Interations') 51 | plt.savefig(os.path.join(images_dir, 'cost.png')) 52 | plt.clf() 53 | 54 | sns.lineplot(X[:, 0], predicted.flatten()) 55 | sns.scatterplot(X[:, 0], y.flatten()) 56 | for i, x in enumerate(X): 57 | plt.vlines(x, min(predicted[i], y[i]), max(predicted[i], y[i])) 58 | plt.ylabel('Y') 59 | plt.xlabel('X') 60 | plt.title('Error') 61 | plt.legend(('linear fit', 'data')) 62 | plt.savefig(os.path.join(images_dir, 'Error.png')) 63 | plt.clf() 64 | -------------------------------------------------------------------------------- /linear_regression/linear_regression_data.csv: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /linear_regression/linear_regression_data_multi.csv: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /logistic_regression/images/cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/cost.png -------------------------------------------------------------------------------- /logistic_regression/images/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/data.png -------------------------------------------------------------------------------- /logistic_regression/images/data_multi-class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/data_multi-class.png -------------------------------------------------------------------------------- /logistic_regression/images/decision_boundary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/decision_boundary.png -------------------------------------------------------------------------------- /logistic_regression/images/decision_boundary_multi-class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/decision_boundary_multi-class.png -------------------------------------------------------------------------------- /logistic_regression/images/decision_boundary_overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/decision_boundary_overfitting.png -------------------------------------------------------------------------------- /logistic_regression/images/decision_boundary_regularization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/decision_boundary_regularization.png -------------------------------------------------------------------------------- /logistic_regression/images/decision_boundary_underfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/logistic_regression/images/decision_boundary_underfitting.png -------------------------------------------------------------------------------- /logistic_regression/logistic_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | 6 | def sigmoid(z): 7 | return 1/(1 + np.exp(-z)) 8 | 9 | def cost(theta, X, y): 10 | h = sigmoid(np.dot(X, theta)) 11 | cos = -(np.sum(y * np.log(h)) + np.sum((1 - y) * np.log(1 - h)))/len(y) 12 | return cos 13 | 14 | def expand_feature(x1, x2, power = 2): 15 | #expand a 2D feature matrix to polynimial features up to the power 16 | new_x = np.ones((x1.shape[0], 1)) 17 | for i in range(1, power + 1): 18 | for j in range(i + 1): 19 | new_x = np.append(new_x, (x1**(i-j)*(x2**j)).reshape(-1, 1), axis = 1) 20 | return new_x 21 | 22 | def gradient_descent(X, y, theta, alpha, num_iters): 23 | m = len(y) 24 | costs = [] 25 | for _ in range(num_iters): 26 | h = sigmoid(np.dot(X, theta)) 27 | theta -= alpha * np.dot(X.T, (h - y))/m 28 | costs.append(cost(theta, X, y)) 29 | return theta, costs 30 | 31 | def predict(theta, X): 32 | return (sigmoid(np.dot(X, theta)) > 0.5).flatten() 33 | 34 | def logistic_regression(X, y, power = 2, alpha = 0.01, num_iters = 100): 35 | X = expand_feature(X[:, 0], X[:, 1], power = power) 36 | theta = np.zeros((X.shape[1], 1), dtype = np.float64) 37 | theta, costs = gradient_descent(X, y, theta, alpha, num_iters) 38 | predicted = predict(theta, X) 39 | return predicted, theta, costs 40 | 41 | if __name__ == '__main__': 42 | images_dir = os.path.join(sys.path[0], 'images') 43 | if not os.path.exists(images_dir): 44 | os.makedirs(images_dir) 45 | 46 | data = np.loadtxt(os.path.join(sys.path[0], 'logistic_regression_data.csv'), delimiter = ',', dtype = np.float64) 47 | X, y = data[:, :-1], data[:, -1].reshape((-1, 1)) 48 | 49 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y.flatten()) 50 | plt.title('Dataset') 51 | plt.xlabel('X') 52 | plt.ylabel('Y') 53 | plt.savefig(os.path.join(images_dir, 'data.png')) 54 | plt.clf() 55 | 56 | predicted, theta, costs = logistic_regression(X, y, alpha = 0.15, num_iters = 4000) 57 | print('The accuracy is {:.2f} %'.format(sum(predicted == y.flatten())/len(y)*100)) 58 | 59 | sns.lineplot(range(4000), costs) 60 | plt.title('Cost vs Number of Interations') 61 | plt.ylabel('Cost') 62 | plt.xlabel('No. of Interations') 63 | plt.savefig(os.path.join(images_dir, 'cost.png')) 64 | plt.clf() 65 | 66 | u = np.linspace(min(X[:, 0]),max(X[:, 0]), 50) 67 | v = np.linspace(min(X[:, 1]),max(X[:, 1]), 50) 68 | 69 | z = np.zeros((len(u),len(v))) 70 | 71 | for i in range(len(u)): 72 | for j in range(len(v)): 73 | z[i,j] = np.dot(expand_feature(u[i].reshape(1,-1),v[j].reshape(1,-1)),theta) 74 | z = np.transpose(z) 75 | 76 | plt.contour(u,v,z,[0,0.01], cmap = "Reds") 77 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y.flatten()) 78 | plt.title('Decision Boundary') 79 | plt.xlabel('X') 80 | plt.ylabel('Y') 81 | plt.savefig(os.path.join(images_dir, 'decision_boundary.png')) 82 | plt.clf() 83 | -------------------------------------------------------------------------------- /logistic_regression/logistic_regression_data.csv: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /logistic_regression/logistic_regression_multi-class.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | from logistic_regression_reg import * 6 | 7 | # prediction function is different from binary classification 8 | def predict_multi_class(theta, X): 9 | return np.argmax(sigmoid(np.dot(X, theta)), axis = 1) 10 | 11 | def logistic_regression_reg_multi_class(X, y, power = 2, alpha = 0.01, lam = 0, num_iters = 100): 12 | X = expand_feature(X[:, 0], X[:, 1], power = power) 13 | theta = np.zeros((X.shape[1], y.shape[1]), dtype = np.float64) 14 | theta, costs = gradient_descent_reg(X, y, theta, alpha, lam, num_iters) 15 | predicted = predict_multi_class(theta, X) 16 | return predicted, theta, costs 17 | 18 | if __name__ == '__main__': 19 | images_dir = os.path.join(sys.path[0], 'images') 20 | if not os.path.exists(images_dir): 21 | os.makedirs(images_dir) 22 | 23 | N = 80 # number of points per class 24 | D = 2 # dimensionality, we use 2D data for easy visulization 25 | K = 3 # number of classes, binary for logistic regression 26 | X = np.zeros((N * K, D), dtype = float) # data matrix (each row = single example, can view as xy coordinates) 27 | y_ = np.zeros(N * K, dtype = int) # class labels for plotting 28 | y = np.zeros((N * K, K), dtype = int) # class labels for training 29 | 30 | 31 | for i in range(K): 32 | r = np.random.normal(i + 0.5, 0.3, (N, 1)) # radius 33 | t = np.linspace(0, np.pi * 2, N).reshape(N, 1) # theta 34 | 35 | X[i * N:(i + 1) * N] = np.append(r * np.sin(t), r * np.cos(t), axis = 1) 36 | y_[i * N:(i + 1) * N] = i 37 | y[i * N:(i + 1) * N, i] = 1 38 | 39 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y_, palette = sns.color_palette('deep', K), edgecolor = "none") 40 | plt.title('Dataset') 41 | plt.xlabel('X') 42 | plt.ylabel('Y') 43 | plt.savefig(os.path.join(images_dir, 'data_multi-class.png')) 44 | plt.clf() 45 | 46 | num_iters = 2000 47 | predicted, theta, costs = logistic_regression_reg_multi_class(X, y, alpha = 0.3, lam = 0, num_iters = num_iters) 48 | print('The accuracy is {:.2f} %'.format(sum(predicted == y_)/len(y_)*100)) 49 | 50 | gridsize = 200 51 | u = np.linspace(min(X[:, 0]),max(X[:, 0]), gridsize) 52 | v = np.linspace(min(X[:, 1]),max(X[:, 1]), gridsize) 53 | 54 | gridx, gridy = np.meshgrid(u, v) 55 | grid = np.array([gridx.reshape(-1, ), gridy.reshape(-1, )]).T 56 | 57 | z = predict_multi_class(theta, expand_feature(gridx.reshape(-1, 1), gridy.reshape(-1, 1))).reshape(gridsize, gridsize) 58 | plt.contourf(u, v, z, alpha = 0.2, levels = K - 1, antialiased = True) 59 | 60 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y_, palette = sns.color_palette('deep', K), edgecolor = "none") 61 | plt.title('Decision Boundary') 62 | plt.xlabel('X') 63 | plt.ylabel('Y') 64 | plt.savefig(os.path.join(images_dir, 'decision_boundary_multi-class.png')) 65 | plt.clf() 66 | -------------------------------------------------------------------------------- /logistic_regression/logistic_regression_reg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | from logistic_regression import * 6 | 7 | def cost_reg(theta, X, y, lam = 0): 8 | h = sigmoid(np.dot(X, theta)) 9 | theta1 = theta.copy() 10 | theta1[0] = 0 11 | cos = -(np.sum(y * np.log(h)) + np.sum((1 - y) * np.log(1 - h)))/len(y) + lam * np.sum(theta1 * theta1)/len(y) 12 | return cos 13 | 14 | def gradient_descent_reg(X, y, theta, alpha, lam = 0, num_iters = 100): 15 | costs = [] 16 | 17 | for _ in range(num_iters): 18 | h = sigmoid(np.dot(X, theta)) 19 | theta1 = theta.copy() 20 | theta1[0] = 0 21 | theta -= alpha * (np.dot(X.T, (h - y)) + 2 * lam * theta1)/len(y) 22 | costs.append(cost_reg(theta, X, y)) 23 | return theta, costs 24 | 25 | def logistic_regression_reg(X, y, power = 2, alpha = 0.01, lam = 0, num_iters = 100): 26 | X = expand_feature(X[:, 0], X[:, 1], power = power) 27 | theta = np.zeros((X.shape[1], y.shape[1]), dtype = np.float64) 28 | theta, costs = gradient_descent_reg(X, y, theta, alpha, lam, num_iters) 29 | predicted = predict(theta, X) 30 | return predicted, theta, costs 31 | 32 | if __name__ == '__main__': 33 | images_dir = os.path.join(sys.path[0], 'images') 34 | if not os.path.exists(images_dir): 35 | os.makedirs(images_dir) 36 | 37 | data = np.loadtxt(os.path.join(sys.path[0], 'logistic_regression_data.csv'), delimiter = ',', dtype = np.float64) 38 | X, y = data[:, :-1], data[:, -1].reshape((-1, 1)) 39 | 40 | # overfitting without regularization 41 | power, num_iters = 10, 100000 42 | predicted, theta, costs = logistic_regression_reg(X, y, power = power, alpha = 0.6, lam = 0, num_iters = num_iters) 43 | print('The accuracy is {:.2f} %'.format(sum(predicted == y.flatten())/len(y)*100)) 44 | 45 | u = np.linspace(min(X[:, 0]),max(X[:, 0]), 50) 46 | v = np.linspace(min(X[:, 1]),max(X[:, 1]), 50) 47 | 48 | z = np.zeros((len(u),len(v))) 49 | 50 | for i in range(len(u)): 51 | for j in range(len(v)): 52 | z[i,j] = np.dot(expand_feature(u[i].reshape(1,-1),v[j].reshape(1,-1), power = power),theta) 53 | z = np.transpose(z) 54 | 55 | plt.contour(u,v,z,[0,0.01], cmap = "Reds") 56 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y.flatten()) 57 | plt.title('Overfitting') 58 | plt.xlabel('X') 59 | plt.ylabel('Y') 60 | plt.savefig(os.path.join(images_dir, 'decision_boundary_overfitting.png')) 61 | plt.clf() 62 | 63 | # underfitting with too much regularization 64 | predicted, theta, costs = logistic_regression_reg(X, y, power = power, alpha = 0.6, lam = 5, num_iters = num_iters) 65 | print('The accuracy is {:.2f} %'.format(sum(predicted == y.flatten())/len(y)*100)) 66 | 67 | z = np.zeros((len(u),len(v))) 68 | for i in range(len(u)): 69 | for j in range(len(v)): 70 | z[i,j] = np.dot(expand_feature(u[i].reshape(1,-1),v[j].reshape(1,-1), power = power),theta) 71 | z = np.transpose(z) 72 | 73 | plt.contour(u,v,z,[0,0.01], cmap = "Reds") 74 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y.flatten()) 75 | plt.title('Underfitting') 76 | plt.xlabel('X') 77 | plt.ylabel('Y') 78 | plt.savefig(os.path.join(images_dir, 'decision_boundary_underfitting.png')) 79 | plt.clf() 80 | 81 | # proper regularization 82 | predicted, theta, costs = logistic_regression_reg(X, y, power = power, alpha = 0.6, lam = 0.5, num_iters = num_iters) 83 | print('The accuracy is {:.2f} %'.format(sum(predicted == y.flatten())/len(y)*100)) 84 | 85 | z = np.zeros((len(u),len(v))) 86 | for i in range(len(u)): 87 | for j in range(len(v)): 88 | z[i,j] = np.dot(expand_feature(u[i].reshape(1,-1),v[j].reshape(1,-1), power = power),theta) 89 | z = np.transpose(z) 90 | 91 | plt.contour(u,v,z,[0,0.01], cmap = "Reds") 92 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y.flatten()) 93 | plt.title('Adequate Regularization') 94 | plt.xlabel('X') 95 | plt.ylabel('Y') 96 | plt.savefig(os.path.join(images_dir, 'decision_boundary_regularization.png')) 97 | plt.clf() 98 | -------------------------------------------------------------------------------- /neural_network/images/decision_boundary_nnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/neural_network/images/decision_boundary_nnet.png -------------------------------------------------------------------------------- /neural_network/neural_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | 6 | def init_para(D, K, h): 7 | # initialize parameters randomly 8 | W = np.random.normal(0, 0.01, (D, h)) 9 | b = np.zeros((1, h), dtype = float) 10 | W2 = np.random.normal(0, 0.01, (h, K)) 11 | b2 = np.zeros((1, K), dtype = float) 12 | return W, b, W2, b2 13 | 14 | def softmax(scores): 15 | exp_scores = np.exp(scores) 16 | return exp_scores / np.sum(exp_scores, axis = 1).reshape(-1, 1) 17 | 18 | def nnet(X, y, step_size = 0.4, lam = 0.0001, h = 10, num_iters = 1000): 19 | # get dim of input 20 | N, D = X.shape 21 | K = y.shape[1] 22 | 23 | W, b, W2, b2 = init_para(D, K, h) 24 | 25 | # gradient descent loop to update weight and bias 26 | for i in range(num_iters): 27 | # hidden layer, ReLU activation 28 | hidden_layer = np.maximum(0, np.dot(X, W) + np.repeat(b, N, axis = 0)) 29 | 30 | # class score 31 | scores = np.dot(hidden_layer, W2) + np.repeat(b2, N, axis = 0) 32 | 33 | # compute and normalize class probabilities 34 | probs = softmax(scores) 35 | 36 | # compute the loss with regularization 37 | data_loss = np.sum(-np.log(probs) * y) / N 38 | reg_loss = 0.5 * lam * np.sum(W * W) + 0.5 * lam * np.sum(W2 * W2) 39 | loss = data_loss + reg_loss 40 | 41 | # check progress 42 | if i%1000 == 0 or i == num_iters: 43 | print("iteration {}: loss {}".format(i, loss)) 44 | 45 | # compute the gradient on scores 46 | dscores = (probs - y) / N 47 | 48 | # backpropate the gradient to the parameters 49 | dW2 = np.dot(hidden_layer.T, dscores) 50 | db2 = np.sum(dscores, axis = 0) 51 | # next backprop into hidden layer 52 | dhidden = np.dot(dscores, W2.T) 53 | # backprop the ReLU non-linearity 54 | dhidden[hidden_layer <= 0] = 0 55 | # finally into W,b 56 | dW = np.dot(X.T, dhidden) 57 | db = np.sum(dhidden, axis = 0) 58 | 59 | # add regularization gradient contribution 60 | dW2 = dW2 + lam * W2 61 | dW = dW + lam * W 62 | 63 | # update parameter 64 | W = W - step_size * dW 65 | b = b - step_size * db 66 | W2 = W2 - step_size * dW2 67 | b2 = b2 - step_size * db2 68 | return W, b, W2, b2 69 | 70 | def predict(X, para): 71 | W, b, W2, b2 = para 72 | N = X.shape[0] 73 | hidden_layer = np.maximum(0, np.dot(X, W) + np.repeat(b, N, axis = 0)) 74 | scores = np.dot(hidden_layer, W2) + np.repeat(b2, N, axis = 0) 75 | return np.argmax(scores, axis = 1) 76 | 77 | if __name__ == '__main__': 78 | images_dir = os.path.join(sys.path[0], 'images') 79 | if not os.path.exists(images_dir): 80 | os.makedirs(images_dir) 81 | 82 | N = 80 # number of points per class 83 | D = 2 # dimensionality, we use 2D data for easy visulization 84 | K = 4 # number of classes, binary for logistic regression 85 | X = np.zeros((N * K, D), dtype = float) # data matrix (each row = single example, can view as xy coordinates) 86 | y_ = np.zeros(N * K, dtype = int) # class labels for plotting 87 | y = np.zeros((N * K, K), dtype = int) # class labels for training 88 | 89 | 90 | for i in range(K): 91 | r = np.linspace(0.05, 1, N).reshape(-1, 1) # radius 92 | t = np.linspace(i*4.7, (i+1)*4.7, N).reshape(-1, 1) + np.random.normal(0, 0.3, (N, 1)) # theta 93 | 94 | X[i * N:(i + 1) * N] = np.append(r * np.sin(t), r * np.cos(t), axis = 1) 95 | y_[i * N:(i + 1) * N] = i 96 | y[i * N:(i + 1) * N, i] = 1 97 | 98 | num_iters = 10000 99 | para = nnet(X, y, step_size = 0.3, lam = 0.0005, h = 50, num_iters = num_iters) 100 | predicted = predict(X, para) 101 | print('The accuracy is {:.2f} %'.format(sum(predicted == y_)/len(y_)*100)) 102 | 103 | grid_size = 200 104 | u = np.linspace(min(X[:, 0]),max(X[:, 0]), grid_size) 105 | v = np.linspace(min(X[:, 1]),max(X[:, 1]), grid_size) 106 | 107 | gridx, gridy = np.meshgrid(u, v) 108 | grid = np.array([gridx.reshape(-1, ), gridy.reshape(-1, )]).T 109 | z = predict(grid, para).reshape(grid_size, grid_size) 110 | 111 | plt.contourf(u,v,z, alpha = 0.2, levels = K - 1, antialiased = True) 112 | sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y_, palette = sns.color_palette('deep', K), edgecolor = "none") 113 | plt.title('Decision Boundary') 114 | plt.xlabel('X') 115 | plt.ylabel('Y') 116 | plt.savefig(os.path.join(images_dir, 'decision_boundary_nnet.png')) 117 | plt.clf() 118 | -------------------------------------------------------------------------------- /neural_network/neural_network_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import os, sys 5 | from neural_network import * 6 | 7 | if __name__ == '__main__': 8 | images_dir = os.path.join(sys.path[0], 'images') 9 | if not os.path.exists(images_dir): 10 | os.makedirs(images_dir) 11 | 12 | # load train and test data 13 | train = np.loadtxt(os.path.join(sys.path[0], 'data/train.csv'), delimiter = ',', skiprows = 1) 14 | test = np.loadtxt(os.path.join(sys.path[0], 'data/test.csv'), delimiter = ',', skiprows = 1) 15 | trainx, trainy_ = train[:, 1:], train[:, 0].flatten() 16 | trainx_norm = trainx/255 17 | 18 | # generate one-hot trainy 19 | trainy = np.zeros((trainx.shape[0], 10), dtype = int) 20 | for i, v in enumerate(trainy_): 21 | trainy[i, int(v)] = 1 22 | 23 | num_iters = 2000 24 | mnist_para = nnet(trainx_norm, trainy, step_size = 0.4, lam = 0.001, h = 10, num_iters = num_iters) 25 | predicted = predict(trainx_norm, mnist_para) 26 | print('The accuracy is {:.2f} %'.format(sum(predicted == trainy_)/len(trainy_)*100)) 27 | -------------------------------------------------------------------------------- /title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szjunma/ML-Algorithm-with-Python/25f9f14372b32be342c4e36b9e0c7817e9391c5c/title.png --------------------------------------------------------------------------------