├── .gitignore ├── D1-linear-reg └── D1-linear-reg-ols.ipynb ├── D11-bubble-sort └── bubble-sort.ipynb ├── D13 └── D13-sql.txt ├── D14 └── D14-sql.txt ├── D15-sql-intermediate └── problems.txt ├── D16-cnn-basics └── D16_cnn_basics.ipynb ├── D2-logistic-reg └── D2-logistic-regression.ipynb ├── D3-rmsprop ├── D3-1-gradient-descent.ipynb └── D3-RMSPROP-logistic-reg.ipynb ├── D4-knn └── D4-knn.ipynb ├── D5-kmeans └── D5-Kmeans.ipynb ├── D6-naivebayes ├── D6-naive-bayes.ipynb └── iris.csv ├── D7-GMM └── D7-Gaussian-mixture-model.ipynb ├── D8-SLP └── D8-SLP.ipynb ├── D9-10 └── Multilayer-perceptron.ipynb ├── Output-images ├── D1.png ├── D11.png ├── D12.png ├── D13.png ├── D14.png ├── D16-cnn-basics.png ├── D2.png ├── D3-rmsprop.png ├── D4.png ├── D5.png ├── D6.png ├── D7.png ├── D8.png ├── D9.png └── d15.png └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /.ipynb_checkpoints/ 2 | *.ipynb_checkpoints/ 3 | -------------------------------------------------------------------------------- /D1-linear-reg/D1-linear-reg-ols.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 38, 6 | "id": "herbal-stroke", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# LINEAR REGRESSION USING ORDINARY LEAST SQUARES\n", 11 | "\n", 12 | "\n", 13 | "# OLS = Ordinaryleastsquares\n", 14 | "\n", 15 | "class Ordinaryleastsquares():\n", 16 | " \n", 17 | " def __init__(self):\n", 18 | " \n", 19 | " # list to store coef's we'll get after OLS\n", 20 | " self.coefs = [] \n", 21 | " \n", 22 | " \n", 23 | " # Function -- > if we are passing only 1D array of fetaures we would like to convert that \n", 24 | " # list to Two Dimensional Input for processing\n", 25 | " \n", 26 | " def reshape_input(self,X):\n", 27 | " \n", 28 | " return X.reshape(-1,1) \n", 29 | " \n", 30 | " def concat_ones(self,X):\n", 31 | " \n", 32 | " # if we have n rows in dataset we will create a Numpy Ones array of shape (n,1)\n", 33 | " \n", 34 | " ones = np.ones(shape = X.shape[0]).reshape(-1,1)\n", 35 | " \n", 36 | " return np.concatenate((ones,X),axis=1)\n", 37 | " \n", 38 | " def forward(self,X,y):\n", 39 | " \n", 40 | " if len(X.shape) == 1: \n", 41 | " X = self.reshape_input(X)\n", 42 | " \n", 43 | " X = self.concat_ones(X)\n", 44 | " \n", 45 | " \n", 46 | " # formula \n", 47 | " \n", 48 | " # link https://towardsdatascience.com/multiple-linear-regression-from-scratch-in-numpy-36a3e8ac8014\n", 49 | " self.coefs = np.linalg.inv(X.transpose().dot(X)).dot(X.transpose()).dot(y)\n", 50 | " \n", 51 | " def predict(self,data):\n", 52 | " \n", 53 | " b0 = self.coefs[0]\n", 54 | " \n", 55 | " other_coefs = self.coefs[1:]\n", 56 | " \n", 57 | " prediction = b0\n", 58 | " \n", 59 | " for i,j in zip(data,other_coefs):\n", 60 | " \n", 61 | " prediction += i*j\n", 62 | " \n", 63 | " return prediction" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 39, 69 | "id": "nasty-heaven", 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "import pandas as pd\n", 74 | "import numpy as np\n", 75 | "df = pd.read_csv('PYTORCH_NOTEBOOKS/Data/iris.csv')\n", 76 | "\n", 77 | "input_features = [col for col in list(df.columns) if col!='target']\n", 78 | "\n", 79 | "X = df[input_features].values\n", 80 | "y =df['target'].values\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 40, 86 | "id": "infinite-blackjack", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "model = Ordinaryleastsquares()\n", 91 | "\n", 92 | "model.forward(X,y)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 41, 98 | "id": "rapid-ethics", 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "array([ 0.19208399, -0.10974146, -0.04424045, 0.22700138, 0.60989412])" 105 | ] 106 | }, 107 | "execution_count": 41, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "model.coefs" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 42, 119 | "id": "human-transportation", 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "2.0\n" 127 | ] 128 | }, 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "1.591379758075804" 133 | ] 134 | }, 135 | "execution_count": 42, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "print(y[123])\n", 142 | "model.predict(X[123])" 143 | ] 144 | } 145 | ], 146 | "metadata": { 147 | "kernelspec": { 148 | "display_name": "Python 3.8.2 64-bit", 149 | "language": "python", 150 | "name": "python38264bita03373cad2404f55bdc5db0285b9fbe0" 151 | }, 152 | "language_info": { 153 | "codemirror_mode": { 154 | "name": "ipython", 155 | "version": 3 156 | }, 157 | "file_extension": ".py", 158 | "mimetype": "text/x-python", 159 | "name": "python", 160 | "nbconvert_exporter": "python", 161 | "pygments_lexer": "ipython3", 162 | "version": "3.8.5" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 5 167 | } 168 | -------------------------------------------------------------------------------- /D11-bubble-sort/bubble-sort.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "affiliated-success", 6 | "metadata": {}, 7 | "source": [ 8 | "Bubble Sort goes through n-1 iterations, looking at n-1 pairs of adjacent elements. This gives it the time complexity of O(n2), in both best-case and average-case situations. O(n2) is considered pretty horrible for a sorting algorithm.\n", 9 | "\n", 10 | "It does have an O(1) space complexity, but that isn't enough to compensate for its shortcomings in other fields." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 10, 16 | "id": "first-payment", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def bubble_sort(ls):\n", 21 | " _swapped = True\n", 22 | "\n", 23 | " num_of_iterations = 0\n", 24 | "\n", 25 | " while(_swapped):\n", 26 | " _swapped = False\n", 27 | " for i in range(len(ls) - num_of_iterations - 1):\n", 28 | " if ls[i] > ls[i+1]:\n", 29 | " # Swap\n", 30 | " ls[i], ls[i+1] = ls[i+1], ls[i]\n", 31 | " _swapped = True\n", 32 | " num_of_iterations += 1\n", 33 | "\n", 34 | " return ls\n", 35 | "import random\n", 36 | "ls= random.sample(range(1, 10000),1000)\n", 37 | "bubble_sort()" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3.8.2 64-bit", 44 | "language": "python", 45 | "name": "python38264bita03373cad2404f55bdc5db0285b9fbe0" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.8.5" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 5 62 | } 63 | -------------------------------------------------------------------------------- /D13/D13-sql.txt: -------------------------------------------------------------------------------- 1 | /* problem 1 -weather station 19 problem Hacker rank */ 2 | SELECT 3 | ROUND(SQRT( 4 | POWER(MAX(LAT_N) - MIN(LAT_N), 2) 5 | + POWER(MAX(LONG_W) - MIN(LONG_W), 2) 6 | ), 4) 7 | FROM 8 | STATION; -------------------------------------------------------------------------------- /D14/D14-sql.txt: -------------------------------------------------------------------------------- 1 | SELECT ROUND(MEDIAN(Lat_N), 4) 2 | FROM Station; 3 | #solution for oracle only 4 | -------------------------------------------------------------------------------- /D15-sql-intermediate/problems.txt: -------------------------------------------------------------------------------- 1 | q1. Write a query to calculate the average daily price change in Apple stock, grouped by year. 2 | 3 | answer :- select year,avg(close-open) as open_close_price from tutorial.aapl_historical_stock_price group by year order by year 4 | 5 | Q2. Write a query that calculates the lowest and highest prices that Apple stock achieved each month. 6 | 7 | answer :- select year,month, max(high) as MAXIMUM ,min(low) as MINIMUM from tutorial.aapl_historical_stock_price group by year,month ORDER by year,month 8 | 9 | 10 | Q3. Basic example of using HAVINg keyword 11 | answer:- SELECT year, 12 | month, 13 | MAX(high) AS month_high 14 | FROM tutorial.aapl_historical_stock_price 15 | GROUP BY year, month 16 | HAVING MAX(high) > 400 17 | ORDER BY year, month 18 | -------------------------------------------------------------------------------- /D16-cnn-basics/D16_cnn_basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "D16-cnn-basics.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "code", 21 | "metadata": { 22 | "colab": { 23 | "base_uri": "https://localhost:8080/" 24 | }, 25 | "id": "yEsg7V5bvGF5", 26 | "outputId": "d0ef687c-5e34-4243-dd38-04864e85326e" 27 | }, 28 | "source": [ 29 | "#resource = https://jhui.github.io/2017/03/16/CNN-Convolutional-neural-network/\n", 30 | "# and https://victorzhou.com/blog/intro-to-cnns-part-1/\n", 31 | "\n", 32 | "# run this notebook in colab if not having tensorflow in local\n", 33 | "import numpy as np\n", 34 | "from tensorflow import keras\n", 35 | "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", 36 | "x_train.shape" 37 | ], 38 | "execution_count": 3, 39 | "outputs": [ 40 | { 41 | "output_type": "stream", 42 | "text": [ 43 | "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz\n", 44 | "11493376/11490434 [==============================] - 0s 0us/step\n" 45 | ], 46 | "name": "stdout" 47 | }, 48 | { 49 | "output_type": "execute_result", 50 | "data": { 51 | "text/plain": [ 52 | "(60000, 28, 28)" 53 | ] 54 | }, 55 | "metadata": { 56 | "tags": [] 57 | }, 58 | "execution_count": 3 59 | } 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "metadata": { 65 | "id": "tW3t1Zi0vtol" 66 | }, 67 | "source": [ 68 | "\n", 69 | "class Conv3x3:\n", 70 | " # A Convolution layer using 3x3 filters.\n", 71 | "\n", 72 | " def __init__(self, num_filters):\n", 73 | " self.num_filters = num_filters\n", 74 | "\n", 75 | " # filters is a 3d array with dimensions (num_filters, 3, 3)\n", 76 | " # We divide by 9 to reduce the variance of our initial values---> This is Xavier Initialisation\n", 77 | " self.filters = np.random.randn(num_filters, 3, 3) / 9\n", 78 | "\n", 79 | " def iterate_regions(self, image):\n", 80 | " '''\n", 81 | " Generates all possible 3x3 image regions using valid padding.\n", 82 | " - image is a 2d numpy array\n", 83 | " '''\n", 84 | " h, w = image.shape\n", 85 | "\n", 86 | " for i in range(h - 2):\n", 87 | " for j in range(w - 2):\n", 88 | " im_region = image[i:(i + 3), j:(j + 3)]\n", 89 | " # print(\"Regions\",'\\n',im_region,i,j)\n", 90 | " # region of image where our convolution will take place (a 3x3 grid of image)\n", 91 | " yield im_region, i, j\n", 92 | "\n", 93 | " def forward(self, input):\n", 94 | " '''\n", 95 | " Performs a forward pass of the conv layer using the given input.\n", 96 | " Returns a 3d numpy array with dimensions (h, w, num_filters).\n", 97 | " - input is a 2d numpy array\n", 98 | " '''\n", 99 | " h, w = input.shape\n", 100 | " #initialising all Feature maps with zeros (8 feature maps of shape 26,26)\n", 101 | " output = np.zeros((h - 2, w - 2, self.num_filters))\n", 102 | "\n", 103 | " # for i in range(output.shape[2]):\n", 104 | " # print('Initialyy :-',output[i],output[i].shape)\n", 105 | "\n", 106 | " for im_region, i, j in self.iterate_regions(input):\n", 107 | " \n", 108 | " output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))\n", 109 | "\n", 110 | " return output\n", 111 | "\n", 112 | "\n", 113 | "\n", 114 | "class MaxPool2:\n", 115 | " # A Max Pooling layer using a pool size of 2.\n", 116 | "\n", 117 | " def iterate_regions(self, image):\n", 118 | " '''\n", 119 | " Generates non-overlapping 2x2 image regions to pool over.\n", 120 | " - image is a 2d numpy array\n", 121 | " '''\n", 122 | " h, w, _ = image.shape\n", 123 | " new_h = h // 2\n", 124 | " new_w = w // 2\n", 125 | "\n", 126 | " for i in range(new_h):\n", 127 | " for j in range(new_w):\n", 128 | " im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]\n", 129 | " yield im_region, i, j\n", 130 | "\n", 131 | " def forward(self, input):\n", 132 | " '''\n", 133 | " Performs a forward pass of the maxpool layer using the given input.\n", 134 | " Returns a 3d numpy array with dimensions (h / 2, w / 2, num_filters).\n", 135 | " - input is a 3d numpy array with dimensions (h, w, num_filters)\n", 136 | " '''\n", 137 | " h, w, num_filters = input.shape\n", 138 | " output = np.zeros((h // 2, w // 2, num_filters))\n", 139 | "\n", 140 | " for im_region, i, j in self.iterate_regions(input):\n", 141 | " output[i, j] = np.amax(im_region, axis=(0, 1))\n", 142 | "\n", 143 | " return output\n", 144 | "\n" 145 | ], 146 | "execution_count": 28, 147 | "outputs": [] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "metadata": { 152 | "colab": { 153 | "base_uri": "https://localhost:8080/" 154 | }, 155 | "id": "aMHn2Lzovtsn", 156 | "outputId": "ec77aeed-c715-43fc-fb5b-108044d05bd2" 157 | }, 158 | "source": [ 159 | "conv = Conv3x3(8)\n", 160 | "pool = MaxPool2()\n", 161 | "\n", 162 | "output = conv.forward(x_train[0])\n", 163 | "output = pool.forward(output)\n", 164 | "print(output.shape) # (13, 13, 8)" 165 | ], 166 | "execution_count": 30, 167 | "outputs": [ 168 | { 169 | "output_type": "stream", 170 | "text": [ 171 | "(13, 13, 8)\n" 172 | ], 173 | "name": "stdout" 174 | } 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "metadata": { 180 | "id": "2jGQTIABvtwD" 181 | }, 182 | "source": [ 183 | "" 184 | ], 185 | "execution_count": null, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "metadata": { 191 | "id": "w9fOp-kTvty8" 192 | }, 193 | "source": [ 194 | "" 195 | ], 196 | "execution_count": null, 197 | "outputs": [] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "metadata": { 202 | "id": "tcbwUaTivt1s" 203 | }, 204 | "source": [ 205 | "" 206 | ], 207 | "execution_count": null, 208 | "outputs": [] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "metadata": { 213 | "id": "D-cP_Zquvt4X" 214 | }, 215 | "source": [ 216 | "" 217 | ], 218 | "execution_count": null, 219 | "outputs": [] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "metadata": { 224 | "id": "ZH9Wm-Jivt67" 225 | }, 226 | "source": [ 227 | "" 228 | ], 229 | "execution_count": null, 230 | "outputs": [] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "metadata": { 235 | "id": "WoHG47zivt9q" 236 | }, 237 | "source": [ 238 | "" 239 | ], 240 | "execution_count": null, 241 | "outputs": [] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "id": "H8jWT3kOvuAD" 247 | }, 248 | "source": [ 249 | "" 250 | ], 251 | "execution_count": null, 252 | "outputs": [] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "metadata": { 257 | "id": "xfYjVvkUvuCJ" 258 | }, 259 | "source": [ 260 | "" 261 | ], 262 | "execution_count": null, 263 | "outputs": [] 264 | } 265 | ] 266 | } -------------------------------------------------------------------------------- /D2-logistic-reg/D2-logistic-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 38, 6 | "id": "precious-pharmaceutical", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#import required modules\n", 11 | "\n", 12 | "\n", 13 | "import warnings\n", 14 | "warnings.filterwarnings('ignore' )\n", 15 | "\n", 16 | "\n", 17 | "import numpy as np\n", 18 | " \n", 19 | "class LogisticRegression:\n", 20 | " def __init__(self,x,y): \n", 21 | " self.intercept = np.ones((x.shape[0], 1)) \n", 22 | " self.x = np.concatenate((self.intercept, x), axis=1)\n", 23 | " self.weight = np.zeros(self.x.shape[1])\n", 24 | " self.y = y\n", 25 | " \n", 26 | " #Sigmoid method\n", 27 | " def sigmoid(self, x, weight):\n", 28 | " z = np.dot(x, weight)\n", 29 | " return 1 / (1 + np.exp(-z))\n", 30 | " \n", 31 | " #method to calculate the Loss\n", 32 | " # h- predicted value\n", 33 | " # y- true value\n", 34 | " def loss(self, h, y):\n", 35 | " return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()\n", 36 | " \n", 37 | " #Method for calculating the gradients\n", 38 | " def gradient_descent(self, X, h, y):\n", 39 | " return np.dot(X.T, (h - y)) / y.shape[0]\n", 40 | " \n", 41 | " \n", 42 | " def fit(self, lr , iterations):\n", 43 | " \n", 44 | " # this is batch gradient descent\n", 45 | " for i in range(iterations):\n", 46 | " if i==0:\n", 47 | " print(\"For first epochs weights are \",self.weight,\"\\n\")\n", 48 | " z = self.sigmoid(self.x, self.weight)\n", 49 | " \n", 50 | " loss = self.loss(z,self.y)\n", 51 | " \n", 52 | " delta_w = self.gradient_descent(self.x , z, self.y)\n", 53 | " \n", 54 | " #Updating the weights\n", 55 | " self.weight -= lr * delta_w\n", 56 | " \n", 57 | " return print('fitted successfully to data',\"\\n\")\n", 58 | " \n", 59 | " #Method to predict the class label.\n", 60 | " def predict(self, x_new , treshold):\n", 61 | " x_new = np.concatenate((self.intercept, x_new), axis=1)\n", 62 | " result = self.sigmoid(x_new, self.weight)\n", 63 | " result = result >= treshold\n", 64 | " y_pred = np.zeros(result.shape[0])\n", 65 | " for i in range(len(y_pred)):\n", 66 | " if result[i] == True: \n", 67 | " y_pred[i] = 1\n", 68 | " else:\n", 69 | " continue\n", 70 | " \n", 71 | " return y_pred" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 40, 77 | "id": "determined-things", 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "For first epochs weights are [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 85 | " 0. 0. 0. 0. 0. 0. 0.] \n", 86 | "\n", 87 | "fitted successfully to data \n", 88 | "\n", 89 | "accuracy -> 0.9103690685413005\n", 90 | "CPU times: user 5.77 s, sys: 0 ns, total: 5.77 s\n", 91 | "Wall time: 5.77 s\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "%%time\n", 97 | "from sklearn.datasets import load_breast_cancer\n", 98 | " \n", 99 | "#Loading the data\n", 100 | "data = load_breast_cancer()\n", 101 | " \n", 102 | "#Preparing the data\n", 103 | "x = data.data\n", 104 | "y = data.target\n", 105 | " \n", 106 | "#creating the class Object\n", 107 | "regressor = LogisticRegression(x,y)\n", 108 | " \n", 109 | "#\n", 110 | "regressor.fit(0.001 , 50000)\n", 111 | " \n", 112 | "y_pred = regressor.predict(x,0.5)\n", 113 | " \n", 114 | "print('accuracy -> {}'.format(sum(y_pred == y) / y.shape[0]))" 115 | ] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3.8.5 64-bit", 121 | "language": "python", 122 | "name": "python385jvsc74a57bd0916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.8.5" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 5 139 | } 140 | -------------------------------------------------------------------------------- /D3-rmsprop/D3-RMSPROP-logistic-reg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 91, 6 | "id": "designed-reader", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#import required modules\n", 11 | "\n", 12 | "# link to article = https://ruder.io/optimizing-gradient-descent/\n", 13 | "import warnings\n", 14 | "warnings.filterwarnings('ignore' )\n", 15 | "\n", 16 | "\n", 17 | "import numpy as np\n", 18 | " \n", 19 | "class LogisticRegression:\n", 20 | " def __init__(self,x,y): \n", 21 | " self.intercept = np.ones((x.shape[0], 1)) \n", 22 | " self.x = np.concatenate((self.intercept, x), axis=1)\n", 23 | " self.weight = np.zeros(self.x.shape[1])\n", 24 | " self.y = y\n", 25 | " \n", 26 | " #Sigmoid method\n", 27 | " def sigmoid(self, x, weight):\n", 28 | " z = np.dot(x, weight)\n", 29 | " return 1 / (1 + np.exp(-z))\n", 30 | " \n", 31 | " #method to calculate the Loss\n", 32 | " # h- predicted value\n", 33 | " # y- true value\n", 34 | " def loss(self, h, y):\n", 35 | " return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()\n", 36 | " \n", 37 | " #Method for calculating the gradients\n", 38 | " def gradient_descent(self, X, h, y):\n", 39 | " return np.dot(X.T, (h - y)) / y.shape[0]\n", 40 | " \n", 41 | " \n", 42 | " def fit(self, lr , iterations,decay_factor = 0.9,eps=0.0000001):\n", 43 | " \n", 44 | " grad_sq =0\n", 45 | " \n", 46 | " # index will be used to randomly select rows\n", 47 | " index_ = [i for i in range(len(self.x)-1)]\n", 48 | " \n", 49 | " for i in range(iterations):\n", 50 | " \n", 51 | " \n", 52 | " random_index = random.choice(index_)\n", 53 | " \n", 54 | " # STOCHASTIC GRADIENT DESCENT selcting only one row for X and Y\n", 55 | " row_x = self.x[random_index:random_index+1,:]\n", 56 | " \n", 57 | " row_y = self.y[random_index:random_index+1]\n", 58 | " \n", 59 | " if i==0:\n", 60 | " print(\"For first epochs weights are \",self.weight,\"\\n\")\n", 61 | " z = self.sigmoid(row_x, self.weight)\n", 62 | " \n", 63 | " loss = self.loss(z,row_y)\n", 64 | " \n", 65 | " delta_w = self.gradient_descent(row_x , z, row_y)\n", 66 | " \n", 67 | " # moving averages\n", 68 | " grad_sq = decay_factor * grad_sq + (1-decay_factor)*(delta_w**2)\n", 69 | " \n", 70 | " #Updating the weights accoring to RMS prop \n", 71 | " # eps is added because at times grad_sq will be close to zero so to prevent shooting sqrt to infinite\n", 72 | " self.weight -= (lr/np.sqrt(grad_sq+eps))*delta_w\n", 73 | " \n", 74 | " return print('fitted successfully to data',\"\\n\")\n", 75 | " \n", 76 | " #Method to predict the class label.\n", 77 | " def predict(self, x_new , treshold):\n", 78 | " x_new = np.concatenate((self.intercept, x_new), axis=1)\n", 79 | " result = self.sigmoid(x_new, self.weight)\n", 80 | " result = result >= treshold\n", 81 | " y_pred = np.zeros(result.shape[0])\n", 82 | " for i in range(len(y_pred)):\n", 83 | " if result[i] == True: \n", 84 | " y_pred[i] = 1\n", 85 | " else:\n", 86 | " continue\n", 87 | " \n", 88 | " return y_pred" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 92, 94 | "id": "brutal-situation", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "For first epochs weights are [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 102 | " 0. 0. 0. 0. 0. 0. 0.] \n", 103 | "\n", 104 | "fitted successfully to data \n", 105 | "\n", 106 | "accuracy -> 0.929701230228471\n", 107 | "CPU times: user 1.33 s, sys: 0 ns, total: 1.33 s\n", 108 | "Wall time: 1.33 s\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "%%time\n", 114 | "from sklearn.datasets import load_breast_cancer\n", 115 | " \n", 116 | "#Loading the data\n", 117 | "data = load_breast_cancer()\n", 118 | " \n", 119 | "#Preparing the data\n", 120 | "x = data.data\n", 121 | "y = data.target\n", 122 | " \n", 123 | "#creating the class Object\n", 124 | "regressor = LogisticRegression(x,y)\n", 125 | " \n", 126 | "#\n", 127 | "regressor.fit(lr= 0.0001 , iterations=50000)\n", 128 | " \n", 129 | "y_pred = regressor.predict(x,0.5)\n", 130 | " \n", 131 | "print('accuracy -> {}'.format(sum(y_pred == y) / y.shape[0]))" 132 | ] 133 | } 134 | ], 135 | "metadata": { 136 | "kernelspec": { 137 | "display_name": "Python 3.8.5 64-bit", 138 | "language": "python", 139 | "name": "python385jvsc74a57bd0916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 3 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython3", 151 | "version": "3.8.5" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 5 156 | } 157 | -------------------------------------------------------------------------------- /D4-knn/D4-knn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 104, 6 | "id": "annual-lotus", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "\n", 13 | "# loading data\n", 14 | "from sklearn.metrics import accuracy_score\n", 15 | "from sklearn.datasets import load_wine\n", 16 | "from sklearn.metrics import accuracy_score\n", 17 | "from sklearn.model_selection import train_test_split\n", 18 | "\n", 19 | "from numpy.random import randint\n", 20 | "#Loading the Data\n", 21 | "data= load_wine()\n", 22 | " \n", 23 | "# Store features matrix in X\n", 24 | "X= data.data\n", 25 | "#Store target vector in \n", 26 | "y= data.target\n", 27 | " \n", 28 | "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 114, 34 | "id": "hundred-ministry", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "0.7457627118644068\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "class KNN:\n", 47 | " def __init__(self,n_neighbours=3):\n", 48 | " self.n_neighbours = n_neighbours\n", 49 | " self.top_k = []\n", 50 | " self.x = []\n", 51 | " self.y = []\n", 52 | " self.distances = []\n", 53 | " \n", 54 | " \n", 55 | " def load(self,x,y):\n", 56 | " \n", 57 | " if type(x) == np.ndarray and len(x.shape)>1 and type(y) == np.ndarray :\n", 58 | " if x.shape[0]==y.shape[0]:\n", 59 | " self.x = x\n", 60 | " self.y = y \n", 61 | " else:\n", 62 | " raise ValueError('Number of rows do not match for X and Y' )\n", 63 | " else:\n", 64 | " raise ValueError('Input Data not in Valid Format. Type should be N dimensional Numpy array')\n", 65 | " \n", 66 | " def predict(self,test_data):\n", 67 | " \n", 68 | " if type(test_data)==np.ndarray and test_data.shape[1]==self.x.shape[1]:\n", 69 | " \n", 70 | " prediction=[]\n", 71 | " \n", 72 | " for t in range(test_data.shape[0]):\n", 73 | " \n", 74 | " # Euclidean distance of test data point wrt to all Training rows\n", 75 | " ls = [ np.linalg.norm(self.x[i]-test_data[t]) for i in range(self.x.shape[0]) ] \n", 76 | " \n", 77 | " # indexes of sorted euclidean distances \n", 78 | " top_k_index = np.argsort(ls)\n", 79 | "\n", 80 | " # slicing top k predictions\n", 81 | " self.top_k = self.y[top_k_index[:self.n_neighbours]]\n", 82 | "\n", 83 | " # converting np array of shape (n,1) to (n,)\n", 84 | " self.top_k = self.top_k.reshape(self.top_k.shape[0])\n", 85 | " \n", 86 | " # storing predictions( actual labels ) we require\n", 87 | " prediction.append(np.bincount(self.top_k).argmax())\n", 88 | " \n", 89 | " return prediction\n", 90 | " else:\n", 91 | " raise ValueError('Test Data dimensions don\\'t match with dimensions of Training data')\n", 92 | " \n", 93 | "f = KNN(n_neighbours=50)\n", 94 | "f.load(X_train,y_train)\n", 95 | "prediction=f.predict(X_test)\n", 96 | "print(accuracy_score(y_test,prediction))\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "lesser-poetry", 102 | "metadata": {}, 103 | "source": [ 104 | "### Sklearn's KNN" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 115, 110 | "id": "insured-reminder", 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "0.7457627118644068\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "from sklearn.neighbors import KNeighborsClassifier\n", 123 | "neigh = KNeighborsClassifier(n_neighbors=50)\n", 124 | "neigh.fit(X_train,y_train)\n", 125 | "p=neigh.predict(X_test)\n", 126 | "\n", 127 | "print(accuracy_score(y_test,p))" 128 | ] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3.8.2 64-bit", 134 | "language": "python", 135 | "name": "python38264bita03373cad2404f55bdc5db0285b9fbe0" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.8.5" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 5 152 | } 153 | -------------------------------------------------------------------------------- /D5-kmeans/D5-Kmeans.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "latter-elements", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import random\n", 12 | "from scipy.spatial.distance import cdist \n", 13 | "\n", 14 | "\n", 15 | "# make sure number of centroids are not more than Number of data points in dataset\n", 16 | "class Kmeans: \n", 17 | " def __init__(self,centroids=3,iterations=1000):\n", 18 | " # Hyperparameter - number of centroids \n", 19 | " self.centroids = centroids\n", 20 | " \n", 21 | " # data to be used for clustering\n", 22 | " self.data = []\n", 23 | " \n", 24 | " # list containing centroids selected at each iteration\n", 25 | " self.centroids_ls =[]\n", 26 | " #number of iterations\n", 27 | " self.iterations = iterations \n", 28 | " \n", 29 | " def load(self,data): \n", 30 | " \n", 31 | " if type(data) == np.ndarray and len(data.shape)>1:\n", 32 | " \n", 33 | " self.data = data\n", 34 | " else:\n", 35 | " raise ValueError('Input Data not in Valid Format. Type should be N dimensional Numpy array') \n", 36 | " \n", 37 | " def fit(self):\n", 38 | " # if number of centroids more than number of rows in dataset then run this\n", 39 | " if self.centroids < len(self.data):\n", 40 | " \n", 41 | " # selecting random rows indexes\n", 42 | " n_random_rows = np.random.randint(self.data.shape[0], size=self.centroids)\n", 43 | " centroids_ = self.data[n_random_rows, :]\n", 44 | "\n", 45 | " # storing centroids we get at each stage --- ADDITIONAL STEP\n", 46 | " self.centroids_ls.append(centroids_)\n", 47 | "\n", 48 | " # distances from centroids to rest of other points\n", 49 | " distances = cdist(self.data,centroids_,'euclidean')\n", 50 | "\n", 51 | " #Centroid with the minimum Distance on first Iteration\n", 52 | " # points will always vary from 0 to centroids\n", 53 | " points = np.array([np.argmin(i) for i in distances]) \n", 54 | "\n", 55 | "\n", 56 | " for _ in range(self.iterations):\n", 57 | "\n", 58 | " centroids_ =[]\n", 59 | " # For each centoid we will see which point belongs to that class and then take mean to \n", 60 | " # update centroid of that class\n", 61 | " for c in range(self.centroids):\n", 62 | "\n", 63 | " #Updating Centroids by taking mean of Cluster it belongs to\n", 64 | " temp_cent = self.data[points==c].mean(axis=0)\n", 65 | "\n", 66 | " centroids_.append(temp_cent)\n", 67 | "\n", 68 | "\n", 69 | " # on every iteration storing new centoids formed\n", 70 | " self.centroids_ls.append(centroids_)\n", 71 | "\n", 72 | " centroids_ = np.vstack(centroids_)\n", 73 | " distances = cdist(self.data, centroids_ ,'euclidean')\n", 74 | " points = np.array([np.argmin(i) for i in distances])\n", 75 | " return points\n", 76 | " else:\n", 77 | " raise ValueError(\"Number of Centroids more than Number of rows of Dataset\")\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 2, 83 | "id": "distributed-benefit", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "from sklearn.datasets import load_iris\n", 88 | "#Loading the data\n", 89 | "data = load_iris()\n", 90 | " \n", 91 | "#Preparing the data\n", 92 | "x = data.data\n", 93 | "y = data.target\n", 94 | " \n", 95 | "k = Kmeans(centroids=3,iterations=100)\n", 96 | "\n", 97 | "k.load(x)\n", 98 | "\n", 99 | "preds = k.fit()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "known-start", 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 3.8.5 64-bit", 114 | "language": "python", 115 | "name": "python385jvsc74a57bd0916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.8.5" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 5 132 | } 133 | -------------------------------------------------------------------------------- /D6-naivebayes/D6-naive-bayes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "id": "liked-vertex", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# why we are ignoring Marginal prob term ( denominator) of Naive bayes\n", 11 | "# https://chrisalbon.com/machine_learning/naive_bayes/naive_bayes_classifier_from_scratch/\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 12, 20 | "id": "junior-envelope", 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "class NaiveBayesClassifier():\n", 25 | " '''\n", 26 | " Bayes Theorem form\n", 27 | " P(y|X) = P(X|y) * P(y) / P(X)\n", 28 | " '''\n", 29 | " def calc_prior(self, features, target):\n", 30 | " '''\n", 31 | " prior probability P(y)\n", 32 | " calculate prior probabilities\n", 33 | " '''\n", 34 | " self.prior = (features.groupby(target).apply(lambda x: len(x)) / self.rows).to_numpy()\n", 35 | "\n", 36 | " return self.prior\n", 37 | " \n", 38 | " def calc_statistics(self, features, target):\n", 39 | " '''\n", 40 | " calculate mean, variance for each column and convert to numpy array\n", 41 | " ''' \n", 42 | " self.mean = features.groupby(target).apply(np.mean).to_numpy()\n", 43 | " self.var = features.groupby(target).apply(np.var).to_numpy()\n", 44 | " \n", 45 | " return self.mean, self.var\n", 46 | " \n", 47 | " def gaussian_density(self, class_idx, x): \n", 48 | " '''\n", 49 | " calculate probability from gaussian density function (normally distributed)\n", 50 | " we will assume that probability of specific target value given specific class is normally distributed \n", 51 | " \n", 52 | " probability density function for gaussain dist. is:\n", 53 | " (1/√2pi*σ) * exp((-1/2)*((x-μ)^2)/(2*σ²)), where μ is mean, σ² is variance, σ is quare root of variance (standard deviation)\n", 54 | " '''\n", 55 | " mean = self.mean[class_idx]\n", 56 | " var = self.var[class_idx]\n", 57 | " numerator = np.exp((-1/2)*((x-mean)**2) / (2 * var))\n", 58 | " denominator = np.sqrt(2 * np.pi * var)\n", 59 | " prob = numerator / denominator\n", 60 | " return prob\n", 61 | " \n", 62 | " def calc_posterior(self, x):\n", 63 | " posteriors = []\n", 64 | "\n", 65 | " # calculate posterior probability for each class\n", 66 | " for i in range(self.count):\n", 67 | " prior = self.prior[i] ## use the log to make it more numerically stable\n", 68 | " conditional = np.sum(self.gaussian_density(i, x)) # use the log to make it more numerically stable\n", 69 | " posterior = prior * conditional\n", 70 | " posteriors.append(posterior)\n", 71 | " # return class with highest posterior probability\n", 72 | " return self.classes[np.argmax(posteriors)]\n", 73 | " def fit(self, features, target):\n", 74 | " self.classes = np.unique(target)\n", 75 | " self.count = len(self.classes)\n", 76 | " self.feature_nums = features.shape[1]\n", 77 | " self.rows = features.shape[0]\n", 78 | " \n", 79 | " self.calc_statistics(features, target)\n", 80 | " self.calc_prior(features, target)\n", 81 | " \n", 82 | " def predict(self, features):\n", 83 | " preds = [self.calc_posterior(f) for f in features.to_numpy()]\n", 84 | " return preds\n", 85 | "\n", 86 | " def accuracy(self, y_test, y_pred):\n", 87 | " accuracy = np.sum(y_test == y_pred) / len(y_test)\n", 88 | " return accuracy\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 13, 94 | "id": "alternative-police", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "(150, 5)\n", 102 | "(100, 4) (100,)\n", 103 | "(50, 4) (50,)\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "# upload Iris dataset - shape is (150, 5)\n", 109 | "df = pd.read_csv(\"/home/sahib/100days/D6-naivebayes/iris.csv\")\n", 110 | "# shuffle dataset with sample\n", 111 | "df = df.sample(frac=1, random_state=1).reset_index(drop=True)\n", 112 | "# df shape\n", 113 | "print(df.shape)\n", 114 | "# set features and target\n", 115 | "X, y = df.iloc[:, :-1], df.iloc[:, -1]\n", 116 | "\n", 117 | "\n", 118 | "# # split on train and test 0.7/0.3\n", 119 | "X_train, X_test, y_train, y_test = X[:100], X[100:], y[:100], y[100:]\n", 120 | "\n", 121 | "print(X_train.shape, y_train.shape)\n", 122 | "print(X_test.shape, y_test.shape)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 14, 128 | "id": "champion-closure", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# train the model\n", 133 | "x = NaiveBayesClassifier()\n", 134 | "\n", 135 | "\n", 136 | "x.fit(X_train, y_train)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 15, 142 | "id": "sixth-allah", 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "0.92" 149 | ] 150 | }, 151 | "execution_count": 15, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "predictions = x.predict(X_test)\n", 158 | "x.accuracy(y_test, predictions)" 159 | ] 160 | } 161 | ], 162 | "metadata": { 163 | "kernelspec": { 164 | "display_name": "Python 3.8.5 64-bit", 165 | "language": "python", 166 | "name": "python385jvsc74a57bd0916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.8.5" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 5 183 | } 184 | -------------------------------------------------------------------------------- /D6-naivebayes/iris.csv: -------------------------------------------------------------------------------- 1 | "sepal.length","sepal.width","petal.length","petal.width","variety" 2 | 5.1,3.5,1.4,.2,"Setosa" 3 | 4.9,3,1.4,.2,"Setosa" 4 | 4.7,3.2,1.3,.2,"Setosa" 5 | 4.6,3.1,1.5,.2,"Setosa" 6 | 5,3.6,1.4,.2,"Setosa" 7 | 5.4,3.9,1.7,.4,"Setosa" 8 | 4.6,3.4,1.4,.3,"Setosa" 9 | 5,3.4,1.5,.2,"Setosa" 10 | 4.4,2.9,1.4,.2,"Setosa" 11 | 4.9,3.1,1.5,.1,"Setosa" 12 | 5.4,3.7,1.5,.2,"Setosa" 13 | 4.8,3.4,1.6,.2,"Setosa" 14 | 4.8,3,1.4,.1,"Setosa" 15 | 4.3,3,1.1,.1,"Setosa" 16 | 5.8,4,1.2,.2,"Setosa" 17 | 5.7,4.4,1.5,.4,"Setosa" 18 | 5.4,3.9,1.3,.4,"Setosa" 19 | 5.1,3.5,1.4,.3,"Setosa" 20 | 5.7,3.8,1.7,.3,"Setosa" 21 | 5.1,3.8,1.5,.3,"Setosa" 22 | 5.4,3.4,1.7,.2,"Setosa" 23 | 5.1,3.7,1.5,.4,"Setosa" 24 | 4.6,3.6,1,.2,"Setosa" 25 | 5.1,3.3,1.7,.5,"Setosa" 26 | 4.8,3.4,1.9,.2,"Setosa" 27 | 5,3,1.6,.2,"Setosa" 28 | 5,3.4,1.6,.4,"Setosa" 29 | 5.2,3.5,1.5,.2,"Setosa" 30 | 5.2,3.4,1.4,.2,"Setosa" 31 | 4.7,3.2,1.6,.2,"Setosa" 32 | 4.8,3.1,1.6,.2,"Setosa" 33 | 5.4,3.4,1.5,.4,"Setosa" 34 | 5.2,4.1,1.5,.1,"Setosa" 35 | 5.5,4.2,1.4,.2,"Setosa" 36 | 4.9,3.1,1.5,.2,"Setosa" 37 | 5,3.2,1.2,.2,"Setosa" 38 | 5.5,3.5,1.3,.2,"Setosa" 39 | 4.9,3.6,1.4,.1,"Setosa" 40 | 4.4,3,1.3,.2,"Setosa" 41 | 5.1,3.4,1.5,.2,"Setosa" 42 | 5,3.5,1.3,.3,"Setosa" 43 | 4.5,2.3,1.3,.3,"Setosa" 44 | 4.4,3.2,1.3,.2,"Setosa" 45 | 5,3.5,1.6,.6,"Setosa" 46 | 5.1,3.8,1.9,.4,"Setosa" 47 | 4.8,3,1.4,.3,"Setosa" 48 | 5.1,3.8,1.6,.2,"Setosa" 49 | 4.6,3.2,1.4,.2,"Setosa" 50 | 5.3,3.7,1.5,.2,"Setosa" 51 | 5,3.3,1.4,.2,"Setosa" 52 | 7,3.2,4.7,1.4,"Versicolor" 53 | 6.4,3.2,4.5,1.5,"Versicolor" 54 | 6.9,3.1,4.9,1.5,"Versicolor" 55 | 5.5,2.3,4,1.3,"Versicolor" 56 | 6.5,2.8,4.6,1.5,"Versicolor" 57 | 5.7,2.8,4.5,1.3,"Versicolor" 58 | 6.3,3.3,4.7,1.6,"Versicolor" 59 | 4.9,2.4,3.3,1,"Versicolor" 60 | 6.6,2.9,4.6,1.3,"Versicolor" 61 | 5.2,2.7,3.9,1.4,"Versicolor" 62 | 5,2,3.5,1,"Versicolor" 63 | 5.9,3,4.2,1.5,"Versicolor" 64 | 6,2.2,4,1,"Versicolor" 65 | 6.1,2.9,4.7,1.4,"Versicolor" 66 | 5.6,2.9,3.6,1.3,"Versicolor" 67 | 6.7,3.1,4.4,1.4,"Versicolor" 68 | 5.6,3,4.5,1.5,"Versicolor" 69 | 5.8,2.7,4.1,1,"Versicolor" 70 | 6.2,2.2,4.5,1.5,"Versicolor" 71 | 5.6,2.5,3.9,1.1,"Versicolor" 72 | 5.9,3.2,4.8,1.8,"Versicolor" 73 | 6.1,2.8,4,1.3,"Versicolor" 74 | 6.3,2.5,4.9,1.5,"Versicolor" 75 | 6.1,2.8,4.7,1.2,"Versicolor" 76 | 6.4,2.9,4.3,1.3,"Versicolor" 77 | 6.6,3,4.4,1.4,"Versicolor" 78 | 6.8,2.8,4.8,1.4,"Versicolor" 79 | 6.7,3,5,1.7,"Versicolor" 80 | 6,2.9,4.5,1.5,"Versicolor" 81 | 5.7,2.6,3.5,1,"Versicolor" 82 | 5.5,2.4,3.8,1.1,"Versicolor" 83 | 5.5,2.4,3.7,1,"Versicolor" 84 | 5.8,2.7,3.9,1.2,"Versicolor" 85 | 6,2.7,5.1,1.6,"Versicolor" 86 | 5.4,3,4.5,1.5,"Versicolor" 87 | 6,3.4,4.5,1.6,"Versicolor" 88 | 6.7,3.1,4.7,1.5,"Versicolor" 89 | 6.3,2.3,4.4,1.3,"Versicolor" 90 | 5.6,3,4.1,1.3,"Versicolor" 91 | 5.5,2.5,4,1.3,"Versicolor" 92 | 5.5,2.6,4.4,1.2,"Versicolor" 93 | 6.1,3,4.6,1.4,"Versicolor" 94 | 5.8,2.6,4,1.2,"Versicolor" 95 | 5,2.3,3.3,1,"Versicolor" 96 | 5.6,2.7,4.2,1.3,"Versicolor" 97 | 5.7,3,4.2,1.2,"Versicolor" 98 | 5.7,2.9,4.2,1.3,"Versicolor" 99 | 6.2,2.9,4.3,1.3,"Versicolor" 100 | 5.1,2.5,3,1.1,"Versicolor" 101 | 5.7,2.8,4.1,1.3,"Versicolor" 102 | 6.3,3.3,6,2.5,"Virginica" 103 | 5.8,2.7,5.1,1.9,"Virginica" 104 | 7.1,3,5.9,2.1,"Virginica" 105 | 6.3,2.9,5.6,1.8,"Virginica" 106 | 6.5,3,5.8,2.2,"Virginica" 107 | 7.6,3,6.6,2.1,"Virginica" 108 | 4.9,2.5,4.5,1.7,"Virginica" 109 | 7.3,2.9,6.3,1.8,"Virginica" 110 | 6.7,2.5,5.8,1.8,"Virginica" 111 | 7.2,3.6,6.1,2.5,"Virginica" 112 | 6.5,3.2,5.1,2,"Virginica" 113 | 6.4,2.7,5.3,1.9,"Virginica" 114 | 6.8,3,5.5,2.1,"Virginica" 115 | 5.7,2.5,5,2,"Virginica" 116 | 5.8,2.8,5.1,2.4,"Virginica" 117 | 6.4,3.2,5.3,2.3,"Virginica" 118 | 6.5,3,5.5,1.8,"Virginica" 119 | 7.7,3.8,6.7,2.2,"Virginica" 120 | 7.7,2.6,6.9,2.3,"Virginica" 121 | 6,2.2,5,1.5,"Virginica" 122 | 6.9,3.2,5.7,2.3,"Virginica" 123 | 5.6,2.8,4.9,2,"Virginica" 124 | 7.7,2.8,6.7,2,"Virginica" 125 | 6.3,2.7,4.9,1.8,"Virginica" 126 | 6.7,3.3,5.7,2.1,"Virginica" 127 | 7.2,3.2,6,1.8,"Virginica" 128 | 6.2,2.8,4.8,1.8,"Virginica" 129 | 6.1,3,4.9,1.8,"Virginica" 130 | 6.4,2.8,5.6,2.1,"Virginica" 131 | 7.2,3,5.8,1.6,"Virginica" 132 | 7.4,2.8,6.1,1.9,"Virginica" 133 | 7.9,3.8,6.4,2,"Virginica" 134 | 6.4,2.8,5.6,2.2,"Virginica" 135 | 6.3,2.8,5.1,1.5,"Virginica" 136 | 6.1,2.6,5.6,1.4,"Virginica" 137 | 7.7,3,6.1,2.3,"Virginica" 138 | 6.3,3.4,5.6,2.4,"Virginica" 139 | 6.4,3.1,5.5,1.8,"Virginica" 140 | 6,3,4.8,1.8,"Virginica" 141 | 6.9,3.1,5.4,2.1,"Virginica" 142 | 6.7,3.1,5.6,2.4,"Virginica" 143 | 6.9,3.1,5.1,2.3,"Virginica" 144 | 5.8,2.7,5.1,1.9,"Virginica" 145 | 6.8,3.2,5.9,2.3,"Virginica" 146 | 6.7,3.3,5.7,2.5,"Virginica" 147 | 6.7,3,5.2,2.3,"Virginica" 148 | 6.3,2.5,5,1.9,"Virginica" 149 | 6.5,3,5.2,2,"Virginica" 150 | 6.2,3.4,5.4,2.3,"Virginica" 151 | 5.9,3,5.1,1.8,"Virginica" -------------------------------------------------------------------------------- /D7-GMM/D7-Gaussian-mixture-model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": { 5 | "b11da248-8866-48db-85c7-d6216be795f7.png": { 6 | "image/png": "" 7 | }, 8 | "f7a898b0-b393-418b-a4e9-f05a773fb32a.png": { 9 | "image/png": "" 10 | } 11 | }, 12 | "cell_type": "markdown", 13 | "id": "obvious-store", 14 | "metadata": {}, 15 | "source": [ 16 | "## GAUSSIAN MIXTURE MODELS are basically composed of Expectation and Maximization step\n", 17 | "### 1. Expectation Step in Multivariate Case\n", 18 | "![image.png](attachment:b11da248-8866-48db-85c7-d6216be795f7.png)\n", 19 | "\n", 20 | "### 2. Expectation Step in Single Varibale Case\n", 21 | "![image.png](attachment:f7a898b0-b393-418b-a4e9-f05a773fb32a.png)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "stuffed-documentary", 27 | "metadata": {}, 28 | "source": [ 29 | "The only diffrence Between One and Multi Dimension data for GMM is instead of Variance\n", 30 | "\n", 31 | "We capture Covariance\n", 32 | "\n", 33 | "To learn what is Covariance [here](https://www.youtube.com/watch?v=WBlnwvjfMtQ)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 161, 39 | "id": "impossible-recipient", 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "Dataset shape: (300,)\n", 47 | "initially [-5.29908696 -4.88724168 -2.25448289] [0.78700799 0.3781514 0.54445358] \n", 48 | "\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "class GMM():\n", 54 | " \n", 55 | " def __init__(self,data,k,epochs,eps=1e-8):\n", 56 | " # data\n", 57 | " self.X=data\n", 58 | " # number of clusters \n", 59 | " self.k=k\n", 60 | " # list of likelihood\n", 61 | " self.likelihood =[]\n", 62 | " # expectation\n", 63 | " self.b =[]\n", 64 | " # epsilon\n", 65 | " self.eps=eps\n", 66 | " #epochs\n", 67 | " self.epochs=epochs\n", 68 | " \n", 69 | " \n", 70 | " def prob_density_function(self,data, mean, variance):\n", 71 | " # A normal continuous random variable.\n", 72 | " s1 = 1/(np.sqrt(2*np.pi*variance))\n", 73 | " s2 = np.exp(-(np.square(data - mean)/(2*variance)))\n", 74 | " return s1 * s2\n", 75 | " \n", 76 | " def fit(self): \n", 77 | " \n", 78 | " weights = np.ones((self.k)) / self.k # ones array of each weight 1/k (here 1/3)\n", 79 | " means = np.random.choice(self.X, self.k)\n", 80 | " variances = np.random.random_sample(size=self.k)\n", 81 | " self.X=np.array(self.X)\n", 82 | " print(\"initially \",means,variances,\"\\n\")\n", 83 | " \n", 84 | " ls_mean=[]\n", 85 | " ls_variance=[]\n", 86 | " # iterating throught the data\n", 87 | " for i in range(self.epochs):\n", 88 | " \n", 89 | " # calculate the maximum likelihood of each observation xi\n", 90 | " self.likelihood = []\n", 91 | " # Expectation step\n", 92 | " for j in range(self.k):\n", 93 | " self.likelihood.append(self.prob_density_function(self.X, means[j], np.sqrt(variances[j])))\n", 94 | " self.likelihood = np.array(self.likelihood)\n", 95 | " \n", 96 | " self.b = []\n", 97 | " \n", 98 | " \n", 99 | " # Maximization step \n", 100 | " for j in range(self.k):\n", 101 | " \n", 102 | " # use the current values for the parameters to evaluate the posterior\n", 103 | " # probabilities of the data to have been generanted by each gaussian \n", 104 | " self.b.append((self.likelihood[j] * weights[j]) / (np.sum([self.likelihood[i] * weights[i] for i in range(self.k)], axis=0)+self.eps))\n", 105 | "\n", 106 | " # updage mean and variance\n", 107 | " means[j] = np.sum(self.b[j] * self.X) / (np.sum(self.b[j]+self.eps))\n", 108 | " variances[j] = np.sum(self.b[j] * np.square(self.X - means[j])) / (np.sum(self.b[j]+self.eps))\n", 109 | "\n", 110 | " # update the weights\n", 111 | " weights[j] = np.mean(self.b[j])\n", 112 | " # storing means for each iteration \n", 113 | " ls_mean.append(means.tolist())\n", 114 | " # storing variance for each iteration \n", 115 | " ls_variance.append(variances.tolist())\n", 116 | "# print(f'after {i} epochs means is {means} and variance is {variances}')\n", 117 | " return ls_mean,ls_variance\n", 118 | "# print(f'after {i} epochs means is {means} and variance is {variances}')\n", 119 | "\n", 120 | "\n", 121 | " \n", 122 | " \n", 123 | "## Generating my own 1d data\n", 124 | "n_samples =100\n", 125 | "\n", 126 | "# mu --> mean and sigma ---> standard deviation\n", 127 | "\n", 128 | "# define the number of points\n", 129 | "n_samples = 100\n", 130 | "mu1, sigma1 = -5, 1.2 # mean and variance\n", 131 | "mu2, sigma2 = 5, 1.8 # mean and variance\n", 132 | "mu3, sigma3 = 0, 1.6 # mean and variance\n", 133 | "\n", 134 | "x1 = np.random.normal(mu1, np.sqrt(sigma1), n_samples)\n", 135 | "x2 = np.random.normal(mu2, np.sqrt(sigma2), n_samples)\n", 136 | "x3 = np.random.normal(mu3, np.sqrt(sigma3), n_samples)\n", 137 | "\n", 138 | "X = np.array(list(x1) + list(x2) + list(x3))\n", 139 | "np.random.shuffle(X)\n", 140 | "print(\"Dataset shape:\", X.shape)\n", 141 | "\n", 142 | "gaussain = GMM(data=X,k=3,epochs=15,eps=1e-7)\n", 143 | "mean,variance = gaussain.fit()\n", 144 | " \n", 145 | " " 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 162, 151 | "id": "known-withdrawal", 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "[[-5.447911687806895, -4.951896946876751, -0.5550162097775722],\n", 158 | " [-5.391387238156239, -4.852843508515019, 1.5730825573084812],\n", 159 | " [-5.303079891428191, -4.648157774798244, 2.5189544997806244],\n", 160 | " [-5.265546765946115, -4.390236896785014, 2.697618361752606],\n", 161 | " [-5.322051025897019, -4.119717090722412, 2.835451824440251],\n", 162 | " [-5.428595347859782, -3.8018603055540767, 2.9787518465034166],\n", 163 | " [-5.519339495708541, -3.419554700391987, 3.1583608286991893],\n", 164 | " [-5.547176671221818, -2.973874323953654, 3.3937667105372413],\n", 165 | " [-5.504520343889167, -2.4791721712158856, 3.67729030856226],\n", 166 | " [-5.409892953408194, -1.9426131815331689, 3.984337580974629],\n", 167 | " [-5.293001175054609, -1.3931250711594054, 4.314744514749055],\n", 168 | " [-5.1851869896880185, -0.9321227155425464, 4.635336386158243],\n", 169 | " [-5.102586930070997, -0.6256510057762783, 4.876354196388706],\n", 170 | " [-5.043216069762877, -0.434073681206713, 5.033373234897915],\n", 171 | " [-5.002003982042112, -0.31250361535637, 5.132054434612007]]" 172 | ] 173 | }, 174 | "execution_count": 162, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "mean" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "id": "protective-scheduling", 186 | "metadata": {}, 187 | "source": [ 188 | "### choose iteration number \n", 189 | "\n", 190 | "1. select to see gaussian mixture working\n", 191 | " 1. First select iteratio number =0\n", 192 | " \n", 193 | " and then select iteration number =14 (last epoch) ( This will be a fitted graph)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 163, 199 | "id": "ceramic-headline", 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "[]" 206 | ] 207 | }, 208 | "execution_count": 163, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | }, 212 | { 213 | "data": { 214 | "image/png": "\n", 215 | "text/plain": [ 216 | "
" 217 | ] 218 | }, 219 | "metadata": { 220 | "needs_background": "light" 221 | }, 222 | "output_type": "display_data" 223 | } 224 | ], 225 | "source": [ 226 | "\n", 227 | "iteration_no=0\n", 228 | "\n", 229 | "# Graph at begining\n", 230 | "bins = np.linspace(np.min(X),np.max(X),100)\n", 231 | "\n", 232 | "plt.figure(figsize=(10,7))\n", 233 | "plt.xlabel(\"$x$\")\n", 234 | "plt.ylabel(\"pdf\")\n", 235 | "plt.scatter(X, [0.005] * len(X), color='navy', s=30, marker=2, label=\"Train data\")\n", 236 | "\n", 237 | "plt.plot(bins, gaussain.prob_density_function(bins, mu1,sigma1), color='red', label=\"True pdf\")\n", 238 | "plt.plot(bins, gaussain.prob_density_function(bins, mu2, sigma2), color='red')\n", 239 | "plt.plot(bins, gaussain.prob_density_function(bins,mu3 , sigma3), color='red')\n", 240 | "\n", 241 | "\n", 242 | "plt.plot(bins, gaussain.prob_density_function(bins, mean[iteration_no][0],variance[iteration_no][0] ), color='magenta', label=\"Cluster 1\")\n", 243 | "plt.plot(bins, gaussain.prob_density_function(bins, mean[iteration_no][1], variance[iteration_no][1]), color='green',label='Cluster 2')\n", 244 | "plt.plot(bins, gaussain.prob_density_function(bins, mean[iteration_no][2] , variance[iteration_no][2]), color='blue',label='Cluster 3')\n", 245 | "\n", 246 | "\n", 247 | "plt.legend()\n", 248 | "plt.plot()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "id": "major-register", 254 | "metadata": {}, 255 | "source": [ 256 | "##### Below is code which does not follow oops" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 164, 262 | "id": "determined-lawsuit", 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "\n", 267 | "# def prob_density_function(data, mean, variance):\n", 268 | "# # A normal continuous random variable.\n", 269 | "# s1 = 1/(np.sqrt(2*np.pi*variance))\n", 270 | "# s2 = np.exp(-(np.square(data - mean)/(2*variance)))\n", 271 | "# return s1 * s2\n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 170, 277 | "id": "turned-charm", 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "# import numpy as np\n", 282 | "# import pandas as pd\n", 283 | "# import math\n", 284 | "# import matplotlib.pyplot as plt\n", 285 | "\n", 286 | "# ## Generating my own 1d data\n", 287 | "# n_samples =100\n", 288 | "\n", 289 | "# # mu --> mean and sigma ---> standard deviation\n", 290 | "\n", 291 | "# # define the number of points\n", 292 | "# n_samples = 100\n", 293 | "# mu1, sigma1 = -4, 1.2 # mean and variance\n", 294 | "# mu2, sigma2 = 4, 1.8 # mean and variance\n", 295 | "# mu3, sigma3 = 0, 1.6 # mean and variance\n", 296 | "\n", 297 | "# x1 = np.random.normal(mu1, np.sqrt(sigma1), n_samples)\n", 298 | "# x2 = np.random.normal(mu2, np.sqrt(sigma2), n_samples)\n", 299 | "# x3 = np.random.normal(mu3, np.sqrt(sigma3), n_samples)\n", 300 | "\n", 301 | "# X = np.array(list(x1) + list(x2) + list(x3))\n", 302 | "# np.random.shuffle(X)\n", 303 | "# print(\"Dataset shape:\", X.shape)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 171, 309 | "id": "large-midnight", 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "# # visualize the training data\n", 314 | "\n", 315 | "\n", 316 | "# bins = np.linspace(np.min(X),np.max(X),100)\n", 317 | "\n", 318 | "# plt.figure(figsize=(10,7))\n", 319 | "# plt.xlabel(\"$x$\")\n", 320 | "# plt.ylabel(\"pdf\")\n", 321 | "# plt.scatter(X, [0.005] * len(X), color='navy', s=30, marker=2, label=\"Train data\")\n", 322 | "\n", 323 | "# plt.plot(bins, prob_density_function(bins, mu1, sigma1), color='red', label=\"True pdf\")\n", 324 | "# plt.plot(bins, prob_density_function(bins, mu2, sigma2), color='red')\n", 325 | "# plt.plot(bins, prob_density_function(bins, mu3, sigma3), color='red')\n", 326 | "\n", 327 | "# plt.legend()\n", 328 | "# plt.plot()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 172, 334 | "id": "furnished-database", 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "# # define the number of clusters to be learned\n", 339 | "# k = 3\n", 340 | "# weights = np.ones((k)) / k\n", 341 | "# means = np.random.choice(X, k)\n", 342 | "# variances = np.random.random_sample(size=k)\n", 343 | "# print(means, variances)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 173, 349 | "id": "computational-dynamics", 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "# X = np.array(X)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 174, 359 | "id": "built-italian", 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "# eps=1e-8\n", 364 | "# for step in range(20):\n", 365 | " \n", 366 | "# if step % 5 == 0:\n", 367 | " \n", 368 | "# plt.figure(figsize=(10,6))\n", 369 | "# axes = plt.gca()\n", 370 | "# plt.xlabel(\"$x$\")\n", 371 | "# plt.ylabel(\"pdf\")\n", 372 | "# plt.title(\"Iteration {}\".format(step))\n", 373 | "# plt.scatter(X, [0.005] * len(X), color='navy', s=30, marker=2, label=\"Train data\")\n", 374 | "\n", 375 | "# plt.plot(bins, prob_density_function(bins, mu1, sigma1), color='grey', label=\"True pdf\")\n", 376 | "# plt.plot(bins, prob_density_function(bins, mu2, sigma2), color='grey')\n", 377 | "# plt.plot(bins, prob_density_function(bins, mu3, sigma3), color='grey')\n", 378 | "\n", 379 | "# plt.plot(bins, prob_density_function(bins, means[0], variances[0]), color='blue', label=\"Cluster 1\")\n", 380 | "# plt.plot(bins, prob_density_function(bins, means[1], variances[1]), color='green', label=\"Cluster 2\")\n", 381 | "# plt.plot(bins, prob_density_function(bins, means[2], variances[2]), color='magenta', label=\"Cluster 3\")\n", 382 | "\n", 383 | "# plt.legend(loc='upper left')\n", 384 | "\n", 385 | "\n", 386 | "# plt.show()\n", 387 | " \n", 388 | "# # calculate the maximum likelihood of each observation xi\n", 389 | "# likelihood = []\n", 390 | "# # Expectation step\n", 391 | "# for j in range(k):\n", 392 | "# likelihood.append(prob_density_function(X, means[j], np.sqrt(variances[j])))\n", 393 | "# likelihood = np.array(likelihood)\n", 394 | " \n", 395 | "# b = []\n", 396 | "# # Maximization step \n", 397 | "# for j in range(k):\n", 398 | " \n", 399 | "# # use the current values for the parameters to evaluate the posterior\n", 400 | "# # probabilities of the data to have been generanted by each gaussian \n", 401 | "# b.append((likelihood[j] * weights[j]) / (np.sum([likelihood[i] * weights[i] for i in range(k)], axis=0)+eps))\n", 402 | " \n", 403 | "# # updage mean and variance\n", 404 | "# means[j] = np.sum(b[j] * X) / (np.sum(b[j]+eps))\n", 405 | "# variances[j] = np.sum(b[j] * np.square(X - means[j])) / (np.sum(b[j]+eps))\n", 406 | "\n", 407 | "# # update the weights\n", 408 | "# weights[j] = np.mean(b[j])" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 140, 414 | "id": "tracked-syntax", 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "id": "ethical-applicant", 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "id": "multiple-conditions", 430 | "metadata": {}, 431 | "source": [] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "id": "female-reform", 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "id": "comic-navigator", 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [] 448 | } 449 | ], 450 | "metadata": { 451 | "kernelspec": { 452 | "display_name": "Python 3.8.5 64-bit", 453 | "language": "python", 454 | "name": "python385jvsc74a57bd0916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 455 | }, 456 | "language_info": { 457 | "codemirror_mode": { 458 | "name": "ipython", 459 | "version": 3 460 | }, 461 | "file_extension": ".py", 462 | "mimetype": "text/x-python", 463 | "name": "python", 464 | "nbconvert_exporter": "python", 465 | "pygments_lexer": "ipython3", 466 | "version": "3.8.5" 467 | } 468 | }, 469 | "nbformat": 4, 470 | "nbformat_minor": 5 471 | } 472 | -------------------------------------------------------------------------------- /D9-10/Multilayer-perceptron.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 185, 6 | "id": "fitted-currency", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "## Loading data and importing libraries\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.datasets import load_iris\n", 14 | "from sklearn import datasets" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 193, 20 | "id": "organic-washington", 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "class MLP(): # Multilayer Perceptron\n", 25 | " def __init__(self,epochs,lr):\n", 26 | " self.learning_rate = lr\n", 27 | " self.epochs=epochs\n", 28 | " def sigmoid(self,x):\n", 29 | " return 1/(1+np.exp(-x)) \n", 30 | " def der_sigmoid(self,x):\n", 31 | " return self.sigmoid(x)*(1-self.sigmoid(x))\n", 32 | " def fit(self,features,labels):\n", 33 | " weights_layer1 = np.random.rand(features.shape[1],features.shape[1]*2) # weight's shape (2,4) \n", 34 | " weights_layer2 = np.random.rand(features.shape[1]*2,6) # weights for hidden layer1 to hidden layer 2\n", 35 | " weights_layer3 = np.random.rand(6,1) # hidden layer having 6 neurons and going to 1 neuron for output\n", 36 | " for i in range(self.epochs):\n", 37 | " ls=[]\n", 38 | " # feedforward\n", 39 | " \n", 40 | " # layer 1 --> layer 2 ( 2 input neurons to 4 neurons in hidden layer1)\n", 41 | " z1 = np.dot(features, weights_layer1)\n", 42 | " a1 = self.sigmoid(z1)\n", 43 | " \n", 44 | " # layer 2 ---> layer 3 ( 4 neurons in hidden layer 1 to 6 neurons in hidden layer 2 )\n", 45 | " z2 = np.dot(a1, weights_layer2)\n", 46 | " a2 = self.sigmoid(z2)\n", 47 | " \n", 48 | " # layer 3 --> output layer ( 6 neurons in hidden layer 2 to 1 neuron in output layer)\n", 49 | " z3 = np.dot(a2,weights_layer3)\n", 50 | " a3 = self.sigmoid(z3)\n", 51 | " ls.append(a3)\n", 52 | " # backpropagation\n", 53 | " labels = labels.reshape(-1,1)\n", 54 | " # Phase 1 # Updating weights of hidden layer 2 (hidden layer2 to output layer)\n", 55 | " error_out = ((1 / 2) * (np.power((a3 - labels), 2)))\n", 56 | " dcost_dao = 2*(a3 - labels) # derivative of cost wrt ao\n", 57 | " dao_dzo = self.der_sigmoid(z3) # derivative of ao wrt to zo\n", 58 | " dzo_dwo = a2 # derivative of zo wrt w_3\n", 59 | " \n", 60 | " dcost_weight_output_layer = np.dot(dzo_dwo.T, dcost_dao * dao_dzo)\n", 61 | " weights_layer3 -= self.learning_rate * dcost_weight_output_layer\n", 62 | " \n", 63 | " # Phase 2 # Updating weights of hidden layer 1 ( hidden layer1 to hidden layer 2 )\n", 64 | " #dcost_dw2 = dcost_da2 * da2_dz2 * dz2_dw2\n", 65 | " # Breaking -------> dcost_da2\n", 66 | " # dcost_da2 = dcost_dao *dao_dzo *dzo_da2 \n", 67 | " \n", 68 | " #Final quation\n", 69 | " # dcost_dw2 = (dcost_dao *dao_dzo) * (dzo_da2 .T * da2_dz2 .T ) * (dz2_dw2) === > (part1) * (part2) *(dz2_dw2)\n", 70 | " \n", 71 | " dzo_da2 = weights_layer3.T # 1,6\n", 72 | " \n", 73 | " da2_dz2 = self.der_sigmoid(z2).T # 6,100\n", 74 | " dz2_dw2 = a1 #100,4\n", 75 | " part1 = np.dot(dcost_dao.T,dao_dzo) #1 , 1 \n", 76 | " \n", 77 | " part2 = np.dot(dzo_da2,da2_dz2) # 1,100\n", 78 | "\n", 79 | " temp = np.dot(part1,part2) # 1,100\n", 80 | " dcost_dw2=np.dot(temp,dz2_dw2).T \n", 81 | " weights_layer2 -= self.learning_rate * dcost_dw2\n", 82 | " \n", 83 | " # Phase 3 # Updating weights of Input layer ( input layer to hidden layer1 )\n", 84 | " \n", 85 | " # dcost_dw1 = dcost_a1 * da1_dz1 * dz1_dw1\n", 86 | "\n", 87 | " # Breaking ---> dcost_da1 = dcost_dzo * dzo_da1 ( means dcost_da1 consists of 2 parts )= part1 * part2\n", 88 | " # Breaking part1 ---> dcost_dzo = dcost_dao * dao_dzo \n", 89 | " # Breaking part2 ----> dzo_da1 = dzo_da2 * da2_dz2 * dz2_da1 \n", 90 | " # part1 is already calculated above ( dcost_dzo = dcost_dao * dao_dzo ) # 1,1\n", 91 | " dzo_da2 = weights_layer3.T # 1,6\n", 92 | " \n", 93 | "# # da2_dz2 already calulate above # 6,100\n", 94 | "# print(\"da2_dz2\",da2_dz2.shape)\n", 95 | " dz2_da1 = weights_layer2.T # 6,4\n", 96 | " part2 = np.dot(dzo_da2,dz2_da1)\n", 97 | " \n", 98 | " temp= np.dot(part1,part2)\n", 99 | " \n", 100 | " da1_dz1 = self.der_sigmoid(z1).T # 4,100\n", 101 | " \n", 102 | " dz1_dw1 = features # 100,2\n", 103 | " \n", 104 | " part3 = np.dot(da1_dz1,dz1_dw1)\n", 105 | " \n", 106 | " dcost_dw1 = np.dot(temp,part3).T # (2,1)\n", 107 | " # final eq\n", 108 | " weights_layer1 -= self.learning_rate * dcost_dw1\n", 109 | " return ls # predictions\n", 110 | " \n", 111 | " " 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 194, 117 | "id": "annual-extension", 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "name": "stderr", 122 | "output_type": "stream", 123 | "text": [ 124 | ":6: RuntimeWarning: overflow encountered in exp\n", 125 | " return 1/(1+np.exp(-x))\n" 126 | ] 127 | }, 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "[array([[4.81937919e-13],\n", 132 | " [4.81937919e-13],\n", 133 | " [4.81937919e-13],\n", 134 | " [4.81937919e-13],\n", 135 | " [4.81937919e-13],\n", 136 | " [4.81937919e-13],\n", 137 | " [4.81937919e-13],\n", 138 | " [6.94217005e-07],\n", 139 | " [4.81937919e-13],\n", 140 | " [4.81937919e-13],\n", 141 | " [4.81937919e-13],\n", 142 | " [4.81937919e-13],\n", 143 | " [4.81937919e-13],\n", 144 | " [4.81937919e-13],\n", 145 | " [4.81937919e-13],\n", 146 | " [6.94217005e-07],\n", 147 | " [4.81937919e-13],\n", 148 | " [4.81937919e-13],\n", 149 | " [4.81937919e-13],\n", 150 | " [4.81937919e-13],\n", 151 | " [4.81937919e-13],\n", 152 | " [6.94217005e-07],\n", 153 | " [4.81937919e-13],\n", 154 | " [4.81937919e-13],\n", 155 | " [4.81937919e-13],\n", 156 | " [4.81937919e-13],\n", 157 | " [4.81937919e-13],\n", 158 | " [6.94217005e-07],\n", 159 | " [4.81937919e-13],\n", 160 | " [6.94217005e-07],\n", 161 | " [4.81937919e-13],\n", 162 | " [4.81937919e-13],\n", 163 | " [4.81937919e-13],\n", 164 | " [4.81937919e-13],\n", 165 | " [4.81937919e-13],\n", 166 | " [4.81937919e-13],\n", 167 | " [4.81937919e-13],\n", 168 | " [4.81937919e-13],\n", 169 | " [6.94217005e-07],\n", 170 | " [4.81937919e-13],\n", 171 | " [4.81937919e-13],\n", 172 | " [4.81937919e-13],\n", 173 | " [4.81937919e-13],\n", 174 | " [4.81937919e-13],\n", 175 | " [4.81937919e-13],\n", 176 | " [4.81937919e-13],\n", 177 | " [6.94217005e-07],\n", 178 | " [4.81937919e-13],\n", 179 | " [4.81937919e-13],\n", 180 | " [4.81937919e-13],\n", 181 | " [4.81937919e-13],\n", 182 | " [4.81937919e-13],\n", 183 | " [4.81937919e-13],\n", 184 | " [4.81937919e-13],\n", 185 | " [4.81937919e-13],\n", 186 | " [4.81937919e-13],\n", 187 | " [6.94217005e-07],\n", 188 | " [4.81937919e-13],\n", 189 | " [4.81937919e-13],\n", 190 | " [4.81937919e-13],\n", 191 | " [4.81937919e-13],\n", 192 | " [4.81937919e-13],\n", 193 | " [4.81937919e-13],\n", 194 | " [4.81937919e-13],\n", 195 | " [4.81937919e-13],\n", 196 | " [6.94217005e-07],\n", 197 | " [4.81937919e-13],\n", 198 | " [4.81937919e-13],\n", 199 | " [4.81937919e-13],\n", 200 | " [4.81937919e-13],\n", 201 | " [4.81937919e-13],\n", 202 | " [4.81937919e-13],\n", 203 | " [6.94217005e-07],\n", 204 | " [6.94217005e-07],\n", 205 | " [4.81937919e-13],\n", 206 | " [4.81937919e-13],\n", 207 | " [4.81937919e-13],\n", 208 | " [4.81937919e-13],\n", 209 | " [4.81937919e-13],\n", 210 | " [6.94217005e-07],\n", 211 | " [4.81937919e-13],\n", 212 | " [4.81937919e-13],\n", 213 | " [4.81937919e-13],\n", 214 | " [6.94217005e-07],\n", 215 | " [6.94217005e-07],\n", 216 | " [4.81937919e-13],\n", 217 | " [4.81937919e-13],\n", 218 | " [4.81937919e-13],\n", 219 | " [4.81937919e-13],\n", 220 | " [4.81937919e-13],\n", 221 | " [6.94217005e-07],\n", 222 | " [4.81937919e-13],\n", 223 | " [4.81937919e-13],\n", 224 | " [6.94217005e-07],\n", 225 | " [4.81937919e-13],\n", 226 | " [4.81937919e-13],\n", 227 | " [4.81937919e-13],\n", 228 | " [6.94217005e-07],\n", 229 | " [6.94217005e-07],\n", 230 | " [4.81937919e-13]])]" 231 | ] 232 | }, 233 | "execution_count": 194, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "s =MLP(2000,0.9)\n", 240 | "features,target = datasets.make_moons(100, noise=0.25)\n", 241 | "prediction=s.fit(features,target)\n", 242 | "prediction" 243 | ] 244 | } 245 | ], 246 | "metadata": { 247 | "kernelspec": { 248 | "display_name": "Python 3.8.5 64-bit", 249 | "language": "python", 250 | "name": "python385jvsc74a57bd0916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.8.5" 263 | } 264 | }, 265 | "nbformat": 4, 266 | "nbformat_minor": 5 267 | } 268 | -------------------------------------------------------------------------------- /Output-images/D1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D1.png -------------------------------------------------------------------------------- /Output-images/D11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D11.png -------------------------------------------------------------------------------- /Output-images/D12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D12.png -------------------------------------------------------------------------------- /Output-images/D13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D13.png -------------------------------------------------------------------------------- /Output-images/D14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D14.png -------------------------------------------------------------------------------- /Output-images/D16-cnn-basics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D16-cnn-basics.png -------------------------------------------------------------------------------- /Output-images/D2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D2.png -------------------------------------------------------------------------------- /Output-images/D3-rmsprop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D3-rmsprop.png -------------------------------------------------------------------------------- /Output-images/D4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D4.png -------------------------------------------------------------------------------- /Output-images/D5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D5.png -------------------------------------------------------------------------------- /Output-images/D6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D6.png -------------------------------------------------------------------------------- /Output-images/D7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D7.png -------------------------------------------------------------------------------- /Output-images/D8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D8.png -------------------------------------------------------------------------------- /Output-images/D9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/D9.png -------------------------------------------------------------------------------- /Output-images/d15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sahibpreetsingh12/100daysofmlcode/940c4b039ac1657cb2b48e0cd7ab351c67b0b198/Output-images/d15.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 100daysofmlcode 2 | 3 | #### Day 11 4 | 5 | > Started learning SQL under #100daysofcode it's Day 1 of #8weeksqlchallenge 6 | --------------------------------------------------------------------------------