├── README.md ├── regression.py ├── ridge_regression.py ├── lasso_regression.py ├── lin_reg_data.csv ├── linear regression from scratch-final.ipynb ├── linear regression l1 regularized from scratch-final.ipynb └── linear regression l2 regularized from scratch-final.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # linear-regression-using-only-numpy 2 | Implementation of unregularized, l1 regularized and l2 regularized linear regression using numpy and without sklearn 3 | -------------------------------------------------------------------------------- /regression.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[14]: 5 | 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | import statistics as st 11 | plt.rcParams['figure.figsize'] = (5.0, 5.0) 12 | 13 | 14 | 15 | # In[15]: 16 | 17 | 18 | class LinearRegression: 19 | def __init__(self,X_data,Y_data,l,iterations): #initialize all parameters 20 | self.X=X_data 21 | self.Y=Y_data 22 | #weight and bias 23 | self.m=0 24 | self.c=0 25 | self.L=l #learning rate 26 | self.iter=iterations #num of iterations 27 | self.n=99 #size of data 28 | 29 | def cost(self,pred_y): #cost function 30 | cost=np.sum(np.square(self.Y - pred_y))/(2*self.n) 31 | return(cost) 32 | 33 | def fit(self): 34 | self.history=np.zeros(self.iter) 35 | #updating values of m and c 36 | for i in range(self.iter): 37 | pred_y=self.m*self.X + self.c 38 | #print(pred_y) 39 | Dm= (-2/self.n)*(self.X*(self.Y-pred_y)) 40 | Dc= (-2/self.n)*(self.Y-pred_y) 41 | #update 42 | self.m=self.m-Dm*self.L 43 | self.c=self.c-Dc*self.L 44 | #cost is calculated for every iteration 45 | self.history[i]=self.cost(pred_y) 46 | self.mse=self.MSE(self.Y,pred_y) 47 | 48 | 49 | 50 | 51 | def MSE(self,pred_y,Y): 52 | errors=Y-pred_y #error is the difference between actual and predicted value 53 | mse=np.sum(np.square(errors))/self.n #mean of sum of square of erros 54 | return mse 55 | 56 | def results(self): 57 | fig=plt.figure(figsize=(14,14)) 58 | a1=fig.add_subplot(211) 59 | 60 | plt.title('minimisation of errors across the iterations') 61 | a1.plot(self.history) 62 | 63 | 64 | #making predictions 65 | a2=fig.add_subplot(212) 66 | final_y=self.m*self.X +self.c 67 | plt.scatter(self.X,self.Y) 68 | plt.title('regrssion line') 69 | a2.plot([min(self.X),max(self.X)],[min(final_y),max(final_y)],color='red') #plotting the red line 70 | 71 | plt.show() 72 | 73 | print ('Mean Squared Error=',self.mse) 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /ridge_regression.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[3]: 5 | 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | import statistics as st 11 | plt.rcParams['figure.figsize'] = (5.0, 5.0) 12 | 13 | 14 | 15 | # In[1]: 16 | 17 | 18 | class RidgeLinearRegression: 19 | def __init__(self,X_data,Y_data,l,l2,iterations): #initialize all parameters 20 | self.X=X_data 21 | self.Y=Y_data 22 | #weight and bias 23 | self.m=0 24 | self.c=0 25 | self.L=l #learning rate 26 | self.l2=l2 #regularization parameter 27 | self.iter=iterations #num of iterations 28 | self.n=float(len(self.X)) #size of data 29 | 30 | def cost(self,pred_y): #cost function 31 | cost=np.sum(np.square(self.Y-pred_y))/(2*self.n) + self.l2*np.sum(np.square(self.m)) 32 | return(cost) 33 | 34 | def fit(self): 35 | self.history=np.zeros(self.iter) 36 | #updating values of m and c 37 | for i in range(self.iter): 38 | pred_y=self.m*self.X + self.c 39 | #print(pred_y) 40 | Dm= (-2/self.n)*(self.X*(self.Y-pred_y))+2*self.l2*self.m 41 | Dc= (-2/self.n)*(self.Y-pred_y) 42 | #update 43 | self.m=self.m-Dm*self.L 44 | self.c=self.c-Dc*self.L 45 | #cost is calculated for every iteration 46 | self.history[i]=self.cost(pred_y) 47 | self.mse=self.MSE(self.Y,pred_y) 48 | 49 | 50 | 51 | 52 | def MSE(self,pred_y,Y): 53 | errors=Y-pred_y #error is the difference between actual and predicted value 54 | mse=np.sum(np.square(errors))/self.n #mean of sum of square of erros 55 | return mse 56 | 57 | def results(self): 58 | fig=plt.figure(figsize=(14,14)) 59 | a1=fig.add_subplot(211) 60 | 61 | plt.title('minimisation of errors across the iterations') 62 | a1.plot(self.history) 63 | 64 | 65 | #making predictions 66 | a2=fig.add_subplot(212) 67 | final_y=self.m*self.X +self.c 68 | plt.scatter(self.X,self.Y) 69 | plt.title('regrssion line') 70 | a2.plot([min(self.X),max(self.X)],[min(final_y),max(final_y)],color='red') #plotting the red line 71 | 72 | plt.show() 73 | 74 | print ('Mean Squared Error=',self.mse) 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /lasso_regression.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[3]: 5 | 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | import statistics as st 11 | plt.rcParams['figure.figsize'] = (5.0, 5.0) 12 | 13 | 14 | 15 | # In[5]: 16 | 17 | 18 | class LassoLinearRegression: 19 | def __init__(self,X_data,Y_data,l,l1,iterations): #initialize all parameters 20 | self.X=X_data 21 | self.Y=Y_data 22 | #weight and bias 23 | self.m=np.random.randn(X_data.shape[0]) 24 | self.c=0 25 | self.L=l #learning rate 26 | self.l1=l1 #regularization parameter 27 | self.iter=iterations #num of iterations 28 | self.n=float(len(X_data)) #size of data 29 | 30 | def cost(self,pred_y): #cost function 31 | cost=np.sum(np.square(self.Y-pred_y))/(2*self.n) + self.l1*np.sum(self.m) 32 | return(cost) 33 | 34 | def fit(self): 35 | self.history=np.zeros(self.iter) 36 | pred_y=np.zeros((self.Y.shape)) 37 | for j in range(self.iter): 38 | for i in range(0,len(self.X)): 39 | pred_y[i]=self.m[i]*self.X[i]+self.c 40 | if self.m[i]>0: 41 | Dm= (-2/self.n)*(self.X[i]*(self.Y[i]-pred_y[i]))-self.l1 42 | else: 43 | Dm= (-2/self.n)*(self.X[i]*(self.Y[i]-pred_y[i]))+self.l1 44 | 45 | 46 | Dc= (-2/self.n)*(self.Y[i]-pred_y[i]) 47 | #update 48 | self.m[i]=self.m[i]-Dm*self.L 49 | self.c=self.c-Dc*self.L 50 | self.history[j]=self.cost(pred_y) 51 | self.mse=self.MSE(self.Y,pred_y) 52 | 53 | 54 | 55 | def MSE(self,pred_y,Y): 56 | errors=Y-pred_y #error is the difference between actual and predicted value 57 | mse=np.sum(np.square(errors))/self.n #mean of sum of square of erros 58 | return mse 59 | 60 | def results(self): 61 | fig=plt.figure(figsize=(14,14)) 62 | a1=fig.add_subplot(211) 63 | 64 | plt.title('minimisation of errors across the iterations') 65 | a1.plot(self.history) 66 | 67 | 68 | #making predictions 69 | a2=fig.add_subplot(212) 70 | final_y=self.m*self.X +self.c 71 | plt.scatter(self.X,self.Y) 72 | plt.title('regrssion line') 73 | a2.plot([min(self.X),max(self.X)],[min(final_y),max(final_y)],color='red') #plotting the red line 74 | 75 | plt.show() 76 | 77 | print ('Mean Squared Error=',self.mse) 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /lin_reg_data.csv: -------------------------------------------------------------------------------- 1 | 32.50234527,31.70700585 2 | 53.42680403,68.77759598 3 | 61.53035803,62.5623823 4 | 47.47563963,71.54663223 5 | 59.81320787,87.23092513 6 | 55.14218841,78.21151827 7 | 52.21179669,79.64197305 8 | 39.29956669,59.17148932 9 | 48.10504169,75.3312423 10 | 52.55001444,71.30087989 11 | 45.41973014,55.16567715 12 | 54.35163488,82.47884676 13 | 44.1640495,62.00892325 14 | 58.16847072,75.39287043 15 | 56.72720806,81.43619216 16 | 48.95588857,60.72360244 17 | 44.68719623,82.89250373 18 | 60.29732685,97.37989686 19 | 45.61864377,48.84715332 20 | 38.81681754,56.87721319 21 | 66.18981661,83.87856466 22 | 65.41605175,118.5912173 23 | 47.48120861,57.25181946 24 | 41.57564262,51.39174408 25 | 51.84518691,75.38065167 26 | 59.37082201,74.76556403 27 | 57.31000344,95.45505292 28 | 63.61556125,95.22936602 29 | 46.73761941,79.05240617 30 | 50.55676015,83.43207142 31 | 52.22399609,63.35879032 32 | 35.56783005,41.4128853 33 | 42.43647694,76.61734128 34 | 58.16454011,96.76956643 35 | 57.50444762,74.08413012 36 | 45.44053073,66.58814441 37 | 61.89622268,77.76848242 38 | 33.09383174,50.71958891 39 | 36.43600951,62.12457082 40 | 37.67565486,60.81024665 41 | 44.55560838,52.68298337 42 | 43.31828263,58.56982472 43 | 50.07314563,82.90598149 44 | 43.87061265,61.4247098 45 | 62.99748075,115.2441528 46 | 32.66904376,45.57058882 47 | 40.16689901,54.0840548 48 | 53.57507753,87.99445276 49 | 33.86421497,52.72549438 50 | 64.70713867,93.57611869 51 | 38.11982403,80.16627545 52 | 44.50253806,65.10171157 53 | 40.59953838,65.56230126 54 | 41.72067636,65.28088692 55 | 51.08863468,73.43464155 56 | 55.0780959,71.13972786 57 | 41.37772653,79.10282968 58 | 62.49469743,86.52053844 59 | 49.20388754,84.74269781 60 | 41.10268519,59.35885025 61 | 41.18201611,61.68403752 62 | 50.18638949,69.84760416 63 | 52.37844622,86.09829121 64 | 50.13548549,59.10883927 65 | 33.64470601,69.89968164 66 | 39.55790122,44.86249071 67 | 56.13038882,85.49806778 68 | 57.36205213,95.53668685 69 | 60.26921439,70.25193442 70 | 35.67809389,52.72173496 71 | 31.588117,50.39267014 72 | 53.66093226,63.64239878 73 | 46.68222865,72.24725107 74 | 43.10782022,57.81251298 75 | 70.34607562,104.2571016 76 | 44.49285588,86.64202032 77 | 57.5045333,91.486778 78 | 36.93007661,55.23166089 79 | 55.80573336,79.55043668 80 | 38.95476907,44.84712424 81 | 56.9012147,80.20752314 82 | 56.86890066,83.14274979 83 | 34.3331247,55.72348926 84 | 59.04974121,77.63418251 85 | 57.78822399,99.05141484 86 | 54.28232871,79.12064627 87 | 51.0887199,69.58889785 88 | 50.28283635,69.51050331 89 | 44.21174175,73.68756432 90 | 38.00548801,61.36690454 91 | 32.94047994,67.17065577 92 | 53.69163957,85.66820315 93 | 68.76573427,114.8538712 94 | 46.2309665,90.12357207 95 | 68.31936082,97.91982104 96 | 50.03017434,81.53699078 97 | 49.23976534,72.11183247 98 | 50.03957594,85.23200734 99 | 48.14985889,66.22495789 100 | 25.12848465,53.45439421 101 | -------------------------------------------------------------------------------- /linear regression from scratch-final.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/html": [ 13 | "
\n", 14 | "\n", 27 | "\n", 28 | " \n", 29 | " \n", 30 | " \n", 31 | " \n", 32 | " \n", 33 | " \n", 34 | " \n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | "
32.5023452731.70700585
053.42680468.777596
161.53035862.562382
247.47564071.546632
359.81320887.230925
455.14218878.211518
\n", 63 | "
" 64 | ], 65 | "text/plain": [ 66 | " 32.50234527 31.70700585\n", 67 | "0 53.426804 68.777596\n", 68 | "1 61.530358 62.562382\n", 69 | "2 47.475640 71.546632\n", 70 | "3 59.813208 87.230925\n", 71 | "4 55.142188 78.211518" 72 | ] 73 | }, 74 | "execution_count": 1, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "import numpy as np\n", 81 | "import pandas as pd\n", 82 | "import matplotlib.pyplot as plt\n", 83 | "import statistics as st\n", 84 | "plt.rcParams['figure.figsize'] = (5.0, 5.0)\n", 85 | "\n", 86 | "data=pd.read_csv('lin_reg_data.csv')\n", 87 | "data.head()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 2, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | " class LinearRegression:\n", 97 | " def __init__(self,X_data,Y_data,l,iterations): #initialize all parameters\n", 98 | " self.X=X_data\n", 99 | " self.Y=Y_data\n", 100 | " #weight and bias\n", 101 | " self.m=0\n", 102 | " self.c=0\n", 103 | " self.L=l #learning rate\n", 104 | " self.iter=iterations #num of iterations\n", 105 | " self.n=float(len(self.X)) #size of data\n", 106 | " \n", 107 | " def cost(self,pred_y): #cost function\n", 108 | " cost=np.sum(np.square(self.Y-pred_y))/(2*self.n)\n", 109 | " return(cost)\n", 110 | " \n", 111 | " def fit(self):\n", 112 | " self.history=np.zeros(self.iter)\n", 113 | " #updating values of m and c\n", 114 | " for i in range(self.iter):\n", 115 | " pred_y=self.m*X + self.c\n", 116 | " #print(pred_y)\n", 117 | " Dm= (-2/self.n)*(self.X*(self.Y-pred_y))\n", 118 | " Dc= (-2/self.n)*(self.Y-pred_y)\n", 119 | " #update\n", 120 | " self.m=self.m-Dm*self.L\n", 121 | " self.c=self.c-Dc*self.L\n", 122 | " #cost is calculated for every iteration\n", 123 | " self.history[i]=self.cost(pred_y)\n", 124 | " self.mse=self.MSE(self.Y,pred_y)\n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " def MSE(self,pred_y,Y):\n", 130 | " errors=Y-pred_y #error is the difference between actual and predicted value\n", 131 | " mse=np.sum(np.square(errors))/self.n #mean of sum of square of erros\n", 132 | " return mse\n", 133 | " \n", 134 | " def results(self):\n", 135 | " fig=plt.figure(figsize=(14,14))\n", 136 | " a1=fig.add_subplot(211)\n", 137 | "\n", 138 | " plt.title('minimisation of errors across the iterations')\n", 139 | " a1.plot(self.history)\n", 140 | "\n", 141 | "\n", 142 | " #making predictions\n", 143 | " a2=fig.add_subplot(212)\n", 144 | " final_y=self.m*self.X +self.c \n", 145 | " plt.scatter(self.X,self.Y)\n", 146 | " plt.title('regrssion line')\n", 147 | " a2.plot([min(self.X),max(self.X)],[min(final_y),max(final_y)],color='red') #plotting the red line \n", 148 | " \n", 149 | " plt.show()\n", 150 | " \n", 151 | " print ('Mean Squared Error=',self.mse)\n", 152 | "\n", 153 | "\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Plotting the dataset using a scatter plot" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 3, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHZRJREFUeJzt3X2MXfWd3/H3BzPAmG4yPBgEQ2YhEnI2WRYII0RqFRHYxCQbgUvCBrqrultUt1K03WRbF9NKTXa1FK/cKptW2lRWko2rzQIOD4Yu2xDLQFNFgnQcQ3h0IQ+Ax17sBIZs4lkYm2//mHM945n7fM49T/fzkqw798y59/7mjOd7fr/v70kRgZmZ1dcJRRfAzMwGy4HezKzmHOjNzGrOgd7MrOYc6M3Mas6B3sys5hzozcxqzoHezKzmHOjNzGruxKILAHDmmWfG+eefX3QxzMwqZffu3T+NiFWdzitFoD///POZmpoquhhmZpUi6eVuznPqxsys5joGeklfk3RQ0jOLjm2R9IKkH0i6X9LYou/dJuklSXslrR1Uwc3MrDvd1Oi/Dly75NhO4Ncj4jeA/wfcBiDp/cBNwAeS1/y5pBWZldbMzHrWMdBHxHeA15cc+3ZEHEmePg6cl3x9PXBXRLwVET8GXgIuz7C8ZmbWoyxy9P8c+F/J1+PAq4u+ty85ZmZmBUk16kbSfwCOAN9oHGpyWtOdTSRtADYATExMpCmGmVlPduyZZsvDe9k/M8u5Y6NsXLuadZfWt07ad41e0nrgE8DvxMI2VfuA9yw67Txgf7PXR8TWiJiMiMlVqzoOAzUzy8SOPdPcdt/TTM/MEsD0zCy33fc0O/ZMF120gekr0Eu6FrgVuC4iDi/61oPATZJOlnQBcCHwvfTFNDPLxpaH9zI7d/S4Y7NzR9ny8N6CSjR4HVM3ku4ErgLOlLQP+Dzzo2xOBnZKAng8Iv5VRDwraTvwHPMpnc9ExNHm72xmlr/9M7M9Ha+DjoE+Im5ucvirbc6/Hbg9TaHMzAbl3LFRppsE9XPHRgsoTT48M9bMhsrGtasZHTl+es/oyAo2rl1dUIkGrxRr3ZiZ5aUxumaYRt040JvZ0Fl36XitA/tSTt2YmdWca/RmVinDNtkpCw70ZlYZjclOjXHwjclOQCmCfVlvQk7dmFlllHmyU5ln3DrQm1lllHmyU5lvQg70ZlYZrSY1lWGyU5lvQg70ZlYZZZ7sVOabkAO9mVXGukvHueOGixgfG0XA+Ngod9xwUSk6PMt8E/KoGzOrlLJOdirzjFsHejOzjJT1JuTUjZlZzTnQm5nVnAO9mVnNOdCbmdVcx0Av6WuSDkp6ZtGxGyU9K+kdSZNLzr9N0kuS9kpaO4hCm5lZ97qp0X8duHbJsWeAG4DvLD4o6f3ATcAHktf8uaQVmJlZYToG+oj4DvD6kmPPR0SzBRyuB+6KiLci4sfAS8DlmZTUzMz6knWOfhx4ddHzfcmxZSRtkDQlaerQoUMZF8PMzBqyDvRqciyanRgRWyNiMiImV61alXExzMysIetAvw94z6Ln5wH7M/4MMzPrQdaB/kHgJkknS7oAuBD4XsafYWZmPei41o2kO4GrgDMl7QM+z3zn7H8DVgEPSXoyItZGxLOStgPPAUeAz0TE0RZvbWZmOegY6CPi5hbfur/F+bcDt6cplJmZZcczY83Mas6B3sys5hzozcxqzoHezKzmHOjNzGrOgd7MrOa8Z6yZWQF27JnObSNxB3ozs5zt2DPNbfc9zezc/HzS6ZlZbrvvaYCBBHunbszMcrbl4b3HgnzD7NxRtjzcbPX39Bzozcxytn9mtqfjaTnQm5nl7Nyx0Z6Op+VAb2aWs41rVzM6cvwuq6MjK9i4dvVAPs+dsWZmOWt0uHrUjZlZja27dHxggX0pp27MzGrOgd7MrOY6BnpJX5N0UNIzi46dLmmnpBeTx9OS45L0XyW9JOkHkj44yMKbmVln3dTovw5cu+TYJmBXRFwI7EqeA3yM+X1iLwQ2AF/OpphmZtavjoE+Ir7D/B6xi10PbEu+3gasW3T8f8S8x4ExSedkVVgzM+tdv6Nuzo6IAwARcUDSWcnxceDVReftS44d6L+IZla0PBfgsuxlPbxSTY5F0xOlDcynd5iYmMi4GGaWlbwX4LLs9Tvq5rVGSiZ5PJgc3we8Z9F55wH7m71BRGyNiMmImFy1alWfxTCzQct7AS7LXr+B/kFgffL1euCBRcf/aTL65grgzUaKx8yqKe8FuKpmx55p1mx+hAs2PcSazY+wY8900UVapmPqRtKdwFXAmZL2AZ8HNgPbJd0CvALcmJz+N8DHgZeAw8DvDaDMZpajc8dGmW4S1Ae1AFeVVCWt1THQR8TNLb51TZNzA/hM2kKZWXlsXLv6uGAG/S3AVccO3XZprTL9bJ4Za2Ztrbt0nDtuuIjxsVEEnLZyhJNPPIHP3f1k16mKRs13emaWYKHmW8Y0Ry+qktZyoDezjtZdOs53N13NFz99CX8/9w4zs3M9Bey6dujmva58vxzozaxr/QbsqtR8e5X3uvL98jLFZtZUs5x6vwG7rh26ea8r3y8HejNbZseeaTbe8xRzR+fnO07PzLLxnqd49+gIM7Nzy87vFLCz6tBtVdYiA22e68r3y4HezJb5o//57LEg3zB3NJg7+g6jIyt6DtiDqvlWZXhj0RzozWyZNw4vr7UD/PLto/zZpy/pK2APouZbleGNRXOgN7OelClVUddO3qw50JvZMmMtcvFjoyMFlKa1bjp5i87hl4GHV5rZMl+47gOMnHD8YrQjJ4gvXPeBgkrUXKfhjXWdqNUrB3ozW2bdpeNsufHiY7Nhx8dG2XLjxaWrCS+dtTs+NsodN1x0XOdvHSdq9cqpGzNrqky5+HbaldM5/Hmu0ZtZbVVliYJBc6A3s9qqyhIFg+bUjZm1VPURK1VZomDQHOjNrKm6zDqtSl/DIDl1Y2ZNecRKfaQK9JL+QNIzkp6V9Nnk2OmSdkp6MXk8LZuimqVXhf09y8IjVuqj70Av6deBfwFcDlwMfELShcAmYFdEXAjsSp6bFc6TZ3rjESv1kaZG/2vA4xFxOCKOAP8b+MfA9cC25JxtwLp0RTTLhlMRvfGIlfpIE+ifAa6UdIaklcDHgfcAZ0fEAYDk8axmL5a0QdKUpKlDhw6lKIZZd5yK6E2nWadWHX2PuomI5yX9KbAT+AXwFHCkh9dvBbYCTE5ORofTzVKr6y5Hg+QRK/WQqjM2Ir4aER+MiCuB14EXgdcknQOQPB5MX0yz9JyKsGGVahy9pLMi4qCkCeAG4EPABcB6YHPy+EDqUpploE6TZ6o+kcnypYj+syaS/g9wBjAH/GFE7JJ0BrAdmABeAW6MiNfbvc/k5GRMTU31XQ6zYbJ0IhPMt0ycPx8+knZHxGSn81LV6CPiHzU59jPgmjTva2atVWn7PLc8ysFLIFjlDVswqcrooSyXUBi233HWHOit0uqyHstS7QJbVUYPZdXyqOvvOE9e68YqrY6ToDrN4K3K6KGsWh51/B3nzYHeKq0qaYxedApsjYlMizfqPmWk9z/lQa/7k9USCnX8HefNgd4qrazrsaQJot0GtreOvHPs6zcOz/W0bk8e6/5k1fIo6++4ShzordKKTmM0C+hpg2g3gS1tOiOPdEhWSygU/TuuA3fGWqUVOQmqVSfhKSMnpOqE3Lh2ddNx8osDW9p0Rl7pkCyWUKjTRLeiONBb5RW1HkurWvHSYw3dBtFuAlvakTdVGbnTkMfvuM5DOB3ozfrUa+33BIkLNj3UVRDpFNi6qfW3k/b1dVP3IZwO9GZ9alUrHhsd4a0j7yyr2R9NlhvJIoikTWe0ej3Ams2P1LJW206VZhv3I9VaN1nxWjeWVhHN7nZrzky9/Dp3PvHqseDezPjYKN/ddPVAy9iLYV5D54JND9HsNyXgx5t/K+/idC2XtW7MyqCoZne7WvG9u6fbBnnIvuMz7c2u7rXadqrWZ9ErB3qrvCIDVLNc+prNj7TskF0syyCSxc1umCcm1b3PwuPorfLKFqC6+dysg0gW4+LHVo40PV6XWm07dd820TV6q7yyNbvfPTrCzOzcsuNKHgfRh5D2ZrdjzzS/+PvlO4GOrFBtarWd1HnbRAd6q7yyNbul5sfHVo6w5z9+dCCfmfZmt+Xhvcy9s7xP4dSTTqxt8BsmqVI3kj4n6VlJz0i6U9Ipki6Q9ISkFyXdLemkrApr1kzZmt0zh5fX5tsdz0LaZQJa1fzfbNIyserpu0YvaRz418D7I2JW0nbgJuDjwBcj4i5J/x24BfhyJqW13FRtlmCZmt1FpJLSjqtvVeZ3jzbP21u1pE3dnAiMSpoDVgIHgKuBf5J8fxvwBRzoK6XuswQHLW0qqd+bbLc3u2bvv3HtajZ+86ll6Ztfvn2EHXum/XuvuL5TNxExDfxn5jcAPwC8CewGZiKi0auzD/D/kIoZ5MqGg14DvQzSpJLSrHzZzbVt9f4A/+CU5fW+uaPhDT5qIE3q5jTgeuACYAb4JvCxJqc2nTUiaQOwAWBiYqLfYtgADGq4YhlbCoNKUfWbSup3TkC317bd+7fqQxiGcfR1l6Yz9jeBH0fEoYiYA+4D/iEwJqlxAzkP2N/sxRGxNSImI2Jy1apVKYphWRvURg9l2xIuj803etXvTbbba9vu/b3BR32lCfSvAFdIWilJwDXAc8CjwKeSc9YDD6QrouVtUBs9lG1iU9luPND/TbbVNZyemT0undNuUpQ3+KivvlM3EfGEpHuA7wNHgD3AVuAh4C5Jf5Ic+2oWBbX8DGqjh7JNbCrbjQf678htdW0Fx45Pz8wycoIYWSHmji5kVBvvn/b3nmUarGqjvsrOq1dabsq2OuKazY80DY5FryrZT5Brdm1F8w6ysdERTj35xEyDaJa/27L9Pykzr15ppVO2LeHKNqO2oZ+O3GbXttlNDOYnQT35+Wxn6Ga5sNwwr6I5KA70lqt2QSzv5nrZbjxpLb22rVosg0iVZZkGK2NKreoc6K0UilxTvqqBvZM8WyxZ9r+UrS+nDrxMsZVCGUfAVF2eawBlOWLHo3+y5xq9lUKZm+t5pJTKNnGrnXZlzeJnqFtKrQw86sZKocwjYAY9AqRKo0y6KauHRuan21E3Tt1YKZS1uZ5HSqlKaatOZS3jbGNz6sZKoqzN9TxSSlVKW7Uastkoq4dGlpMDvZVGr/nkPFIEeYwAKesok2YjoVpNwmqUtcw3rWHm1I1VUl4pgkGklJYuJ/zh962qTNoqWNj7tmFxWb0wWjk50Fsl5ZXXznqIYrMb1L27p/nkZeOl2QqxoVUtPKBlWcva1zLsnLqxSsozRZDlEMVWN6hHXzhU6OiiZlqllNqNhCprX8uwc6C3Ssoqr51Fnr+X96hSDrvfmbV1nm1cVU7dWCVlkSLIIs/f63tUKYed58xaGyxPmLLKSlsbbzVJ67SVI6w8qbtlfHud6NXL5ChPPLJOvEyx1V7aFEGrdMkbh+d4I9k/tdPiar2mYrrNYWe1yJtvFgYO9DbE2k0AWqzdhJ9++gq6uUFlMfGojJuxWzH6ztFLWi3pyUX/fi7ps5JOl7RT0ovJ42lZFtgsK83y/K20qqGXeX/dsi2tsHT+gJdFyE+aPWP3ApcASFoBTAP3A5uAXRGxWdKm5PmtGZR1KLnpPTjN0ii/fOsIM7Nzy85tVUNvl4pJ87vLYlRRmUb4uHVRrKxSN9cAP4yIlyVdD1yVHN8GPIYDfV/8xzF4S9MorTpL29XQm6Vimv3uPnf3k0y9/DqTv3p6xxtAFpuGlGlpBa+BU6yshlfeBNyZfH12RBwASB7Pyugzhk7Zmt7DIKshha2WD/jLx19h4zef6jgcM4tylGmWaplaF8ModY1e0knAdcBtPb5uA7ABYGJiIm0xasl/HMXIYsJPu9/R3DvHD2luVbNtVY5uU0JlmqVaptbFMMoidfMx4PsR8Vry/DVJ50TEAUnnAAebvSgitgJbYX4cfQblqB3/cVRXtyN6Grq9efeazivLLNU896+15bJI3dzMQtoG4EFgffL1euCBDD5jKJWp6W292bh29bJVHtvp9uZd1XSeZ9kWK1WNXtJK4CPAv1x0eDOwXdItwCvAjWk+Y5iVqeltvVl36ThTL7/ONx5/5bj120dWCOL49E0vN+8qp/PK0roYRqkCfUQcBs5YcuxnzI/CsR61yr36j6Oa/mTdRU1H2ED/N2+n86wfnhlbEh5Kmb0yzEFodaPutxzOdVs/HOhLwuOMs5XljbMMN4wGp/OsHw70JVHl3GsZZXXjLGNLy+k865XXoy+JKq1T3q0i1zbJ6sZZ1VEuZou5Rl8Sdcu9dqoJZ5kOafZeWXVauqVldeBAXxJ1y712qglnmT9v9l6fvGyce3dPp75xepSL1YEDfYnUKffariacZcdzu82277jhotQ3zrq1tGw4OdDbQLSrCWeZDmn3XlncOOvW0rLh5EBvA9GuJrzl4b2ZpUPySK2UsaVVpiGfVn4edWMD0W5tkyzX8KnjekCdRis1+iU6LXVs1uAavQ1Mp1mhWdRI65Za6WbcvifXWa8c6K0QWaZDypha6Vc3QbyuQz6djhocB/ohUrc/pLr9PNBdEK/jkM8yzkCuE+foh0TRed2sZ8kW/fMMSjczpOvYL+EZyIPlQD8kevlDKntQ3rFnmn+z/alaBoZugngdN/GoazqqLJy6GRLd/CHt2DPNFx58lpnZuWPHsmhCZ9l52LhpHI3mu0/mGRgGkTrqtnO5Tv0SUM90VJk40A+JTn9IS3Oki6Ud0ZFlba3ZTWOxvALDIHPKdQvi3fAM5MFKlbqRNCbpHkkvSHpe0ocknS5pp6QXk8fTsiqs9a9TSqBTAE1TU85yZc525cgzMDinnK06pqPKJG2N/kvAtyLiU5JOAlYC/x7YFRGbJW0CNgG3pvwcS6lTSqBTIE9TU86yttaqZbJCyjUwOKecvWFsyeSl70Av6V3AlcA/A4iIt4G3JV0PXJWctg14DAf6Umj3h9QqgELzoNxLfrqxUfadT7zK0QhWSHzysv7+qFvdNPKu/TmnbFWSJnXzXuAQ8BeS9kj6iqRTgbMj4gBA8nhWsxdL2iBpStLUoUOHUhTDstAstQNw2sqRZUG011E0O/ZMc+/u6WMdqEcjuHf3dF+jbsrSxO9mdEyRG6+YLaZoMXqh4wulSeBxYE1EPCHpS8DPgd+PiLFF570REW3z9JOTkzE1NdVXOSw73dbS12x+pGltdnxslO9uujr1+f2WK2/tytWsc7uIlofVm6TdETHZ6bw0Ofp9wL6IeCJ5fg/z+fjXJJ0TEQcknQMcTPEZlqNuc6S95qfT5LPLPGOy3fXyejRWJn2nbiLib4FXJTXaqtcAzwEPAuuTY+uBB1KV0Eqn11E0/Y66qfLEKHfWWpmknRn7+8A3JP0AuAT4T8Bm4COSXgQ+kjy3Gul1Cn4/U/aLnBiVRW69jpu9W3WlGl4ZEU8CzfJD16R5XyunxTnpsZUjnHziCbw5O9fVqBvobSnhoiZGZZUq8gQgKxPPjLWuLA2AbxyeY3RkBV/89CVdBcBex0gXNTEqq9x63dbJt2pzoLeu5N25WNTEqCxz654AZGXh1Ssrpqix2Xl3LrbK6/+X3754oMHTuXWrIwf6CilyDfa8A2BRE6PquNa7mVM3FZL1cr+95I+L6FxMm/roZ6KVc+tWRw70FZJV+qSfkSVVC4BpRs84t25140BfIVktpNVvy6CbAFiW5Qo8M9VsgXP0FZJV/nhQHatl2sfVM1PNFlQ20A/jyoBZdVAOqmO1TJtxePSM2YJKpm7KvNDVoGWRPx5Ux2qZatGemWq2oJKBftjyr73mvTudP6iO1aI242j385ahv8CsaJUM9GWqOQ5ar62Xbs8fxMiSImrRnX5eB3aziubohyn/2mveu8g8eRGTnMrUL2BWVpWs0Q9T/jXPTT6ykPcQzKJ/XrMqqGSNviz7huYhr00+8pL1EMyy/7xmZVDJGj3UN/+6tLb74fet4t7d0123Xsre2sm6I73sP69ZGaQK9JJ+AvwdcBQ4EhGTkk4H7gbOB34C/HZEvJGumNXXTbqiWcfivbun+eRl4zz6wqGuUh1lH22Sdaql7D+vWRlkUaP/cET8dNHzTcCuiNgsaVPy/NYMPqeyuh0J06q2++gLh/jupqu7/rwyt3YGMQSzzD+vWRkMIkd/PbAt+XobsG4An1Ep3Y4MGYaORS8DbJa/tIE+gG9L2i1pQ3Ls7Ig4AJA8npXyMyqv2wA+DB2Lw9SRblYWaVM3ayJiv6SzgJ2SXuj2hcmNYQPAxMREymKUW7fpimHpWMwz1VKW1TTNipSqRh8R+5PHg8D9wOXAa5LOAUgeD7Z47daImIyIyVWrVqUpRul1m65wbTdbZVpN06xIfdfoJZ0KnBARf5d8/VHgj4EHgfXA5uTxgSwKWmW9jAxxx2J2hm1NJLNW0qRuzgbul9R4n7+KiG9J+r/Adkm3AK8AN6YvZvU5gOdvGDq3zbrRd6CPiB8BFzc5/jPgmjSFMstCUatpmpVNJZdAMOuGh3KazavsEghmnXjWrNk8B3qrNfeNmDnQl4LHepvZIDnQF2yY9781s3w40BesamO9my2j3O3KmmZWDAf6glVprHez1sdfPv7Kse+7NWJWTh5eWbAqLWTWrPWxlPdrNSsfB/qCVWmsd7etjDK2RsyGmQN9waq0kFm3rYwytkbMhplz9CVQlbHezZZRXqqsrRGzYeYavXWtWevjd6+YqERrxGyYuUZvPalK68PMFrhGb2ZWc67R15CXVDCzxRzoa8ZLKpjZUk7d1Ey7JRXMbDilDvSSVkjaI+mvk+cXSHpC0ouS7pZ0UvpiWreqtKSCmeUjixr9HwDPL3r+p8AXI+JC4A3glgw+w7pUpSUVzCwfqQK9pPOA3wK+kjwXcDVwT3LKNmBdms+w3lRpSQUzy0faztg/A/4d8CvJ8zOAmYg4kjzfBzTtAZS0AdgAMDExkbIY1uDt88xsqb4DvaRPAAcjYrekqxqHm5wazV4fEVuBrQCTk5NNz7H+eFKTmS2Wpka/BrhO0seBU4B3MV/DH5N0YlKrPw/Yn76YZmbWr75z9BFxW0ScFxHnAzcBj0TE7wCPAp9KTlsPPJC6lGZm1rdBjKO/FfhDSS8xn7P/6gA+w8zMupTJzNiIeAx4LPn6R8DlWbyvmZml55mxZmY1p4jiB7xIOgS8XHQ5UjgT+GnRhSgRX48FvhYLfC0WZHUtfjUiVnU6qRSBvuokTUXEZNHlKAtfjwW+Fgt8LRbkfS2cujEzqzkHejOzmnOgz8bWogtQMr4eC3wtFvhaLMj1WjhHb2ZWc67Rm5nVnAN9jySdIul7kp6S9KykP0qOD+2GK958Zp6kn0h6WtKTkqaSY6dL2plci52STiu6nHmRNCbpHkkvSHpe0oeG8XpIWp38n2j8+7mkz+Z5LRzoe/cWcHVEXAxcAlwr6QqGe8MVbz6z4MMRccmioXObgF3JtdiVPB8WXwK+FRHvAy5m/v/I0F2PiNib/J+4BLgMOAzcT47XwoG+RzHvF8nTkeRfMKQbrnjzmY6uZ/4awBBdC0nvAq4kWesqIt6OiBmG9Hoscg3ww4h4mRyvhQN9H5JUxZPAQWAn8EO63HClhhqbz7yTPO9685kaCuDbknYnG+sAnB0RBwCSx7MKK12+3gscAv4iSet9RdKpDO/1aLgJuDP5Ordr4UDfh4g4mjTDzmN+Abdfa3ZavqXK3+LNZxYfbnJq7a9FYk1EfBD4GPAZSVcWXaACnQh8EPhyRFwK/JIhSNO0k/RVXQd8M+/PdqBPIWmKPgZcQbLhSvKtYdlwpbH5zE+Au5hP2RzbfCY5Z1iuBRGxP3k8yHwO9nLgNUnnACSPB4srYa72Afsi4onk+T3MB/5hvR4wXwH4fkS8ljzP7Vo40PdI0ipJY8nXo8BvMt/JNHQbrnjzmQWSTpX0K42vgY8CzwAPMn8NYEiuBUBE/C3wqqTGrvTXAM8xpNcjcTMLaRvI8Vp4wlSPJP0G8x0nK5i/UW6PiD+W9F7ma7WnA3uA342It4orab6SfYP/bUR8YhivRfIz3588PRH4q4i4XdIZwHZgAngFuDEiXi+omLmSdAnznfQnAT8Cfo/kb4Yhux6SVgKvAu+NiDeTY7n933CgNzOrOaduzMxqzoHezKzmHOjNzGrOgd7MrOYc6M3Mas6B3sys5hzozcxqzoHezKzm/j+n1HXMT02E5gAAAABJRU5ErkJggg==\n", 171 | "text/plain": [ 172 | "" 173 | ] 174 | }, 175 | "metadata": {}, 176 | "output_type": "display_data" 177 | } 178 | ], 179 | "source": [ 180 | "data=pd.read_csv('lin_reg_data.csv')\n", 181 | "data.head()\n", 182 | "X=data.iloc[:,0]\n", 183 | "Y=data.iloc[:,1]\n", 184 | "plt.scatter(X,Y)\n", 185 | "plt.show()\n" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 4, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "l=LinearRegression(X,Y,0.005,100)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 5, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "l.fit()" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 6, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "image/png": "\n", 214 | "text/plain": [ 215 | "" 216 | ] 217 | }, 218 | "metadata": {}, 219 | "output_type": "display_data" 220 | }, 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "Mean Squared Error= 6.0803681265392814e-05\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "l.results()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [] 239 | } 240 | ], 241 | "metadata": { 242 | "kernelspec": { 243 | "display_name": "Python 3", 244 | "language": "python", 245 | "name": "python3" 246 | }, 247 | "language_info": { 248 | "codemirror_mode": { 249 | "name": "ipython", 250 | "version": 3 251 | }, 252 | "file_extension": ".py", 253 | "mimetype": "text/x-python", 254 | "name": "python", 255 | "nbconvert_exporter": "python", 256 | "pygments_lexer": "ipython3", 257 | "version": "3.6.4" 258 | }, 259 | "widgets": { 260 | "application/vnd.jupyter.widget-state+json": { 261 | "state": {}, 262 | "version_major": 2, 263 | "version_minor": 0 264 | } 265 | } 266 | }, 267 | "nbformat": 4, 268 | "nbformat_minor": 2 269 | } 270 | -------------------------------------------------------------------------------- /linear regression l1 regularized from scratch-final.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/html": [ 13 | "
\n", 14 | "\n", 27 | "\n", 28 | " \n", 29 | " \n", 30 | " \n", 31 | " \n", 32 | " \n", 33 | " \n", 34 | " \n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | "
32.5023452731.70700585
053.42680468.777596
161.53035862.562382
247.47564071.546632
359.81320887.230925
455.14218878.211518
\n", 63 | "
" 64 | ], 65 | "text/plain": [ 66 | " 32.50234527 31.70700585\n", 67 | "0 53.426804 68.777596\n", 68 | "1 61.530358 62.562382\n", 69 | "2 47.475640 71.546632\n", 70 | "3 59.813208 87.230925\n", 71 | "4 55.142188 78.211518" 72 | ] 73 | }, 74 | "execution_count": 1, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "import numpy as np\n", 81 | "import pandas as pd\n", 82 | "import matplotlib.pyplot as plt\n", 83 | "import statistics as st\n", 84 | "plt.rcParams['figure.figsize'] = (5.0, 5.0)\n", 85 | "\n", 86 | "data=pd.read_csv('lin_reg_data.csv')\n", 87 | "data.head()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 2, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | " class LinearRegressionL1:\n", 97 | " def __init__(self,X_data,Y_data,l,l1,iterations): #initialize all parameters\n", 98 | " self.X=X_data\n", 99 | " self.Y=Y_data\n", 100 | " #weight and bias\n", 101 | " self.m=np.random.randn(self.X.shape[0])\n", 102 | " self.c=0\n", 103 | " self.L=l #learning rate\n", 104 | " self.l1=l1 #regularization parameter\n", 105 | " self.iter=iterations #num of iterations\n", 106 | " self.n=float(len(X)) #size of data\n", 107 | " \n", 108 | " def cost(self,pred_y): #cost function\n", 109 | " cost=np.sum(np.square(self.Y-pred_y))/(2*self.n) + self.l1*np.sum(self.m)\n", 110 | " return(cost)\n", 111 | " \n", 112 | " def fit(self):\n", 113 | " self.history=np.zeros(self.iter)\n", 114 | " pred_y=np.zeros((self.Y.shape))\n", 115 | " for j in range(self.iter):\n", 116 | " for i in range(0,len(self.X)):\n", 117 | " pred_y[i]=self.m[i]*X[i]+self.c\n", 118 | " if self.m[i]>0:\n", 119 | " Dm= (-2/self.n)*(self.X[i]*(self.Y[i]-pred_y[i]))-self.l1 \n", 120 | " else:\n", 121 | " Dm= (-2/self.n)*(self.X[i]*(self.Y[i]-pred_y[i]))+self.l1\n", 122 | "\n", 123 | "\n", 124 | " Dc= (-2/self.n)*(self.Y[i]-pred_y[i])\n", 125 | " #update\n", 126 | " self.m[i]=self.m[i]-Dm*self.L\n", 127 | " self.c=self.c-Dc*self.L\n", 128 | " self.history[j]=self.cost(pred_y)\n", 129 | " self.mse=self.MSE(self.Y,pred_y)\n", 130 | "\n", 131 | " \n", 132 | " \n", 133 | " def MSE(self,pred_y,Y):\n", 134 | " errors=Y-pred_y #error is the difference between actual and predicted value\n", 135 | " mse=np.sum(np.square(errors))/self.n #mean of sum of square of erros\n", 136 | " return mse\n", 137 | " \n", 138 | " def results(self):\n", 139 | " fig=plt.figure(figsize=(14,14))\n", 140 | " a1=fig.add_subplot(211)\n", 141 | "\n", 142 | " plt.title('minimisation of errors across the iterations')\n", 143 | " a1.plot(self.history)\n", 144 | "\n", 145 | "\n", 146 | " #making predictions\n", 147 | " a2=fig.add_subplot(212)\n", 148 | " final_y=self.m*self.X +self.c \n", 149 | " plt.scatter(self.X,self.Y)\n", 150 | " plt.title('regrssion line')\n", 151 | " a2.plot([min(self.X),max(self.X)],[min(final_y),max(final_y)],color='red') #plotting the red line \n", 152 | " \n", 153 | " plt.show()\n", 154 | " \n", 155 | " print ('Mean Squared Error=',self.mse)\n", 156 | "\n", 157 | "\n" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "Plotting the dataset using a scatter plot" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 3, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "image/png": "\n", 175 | "text/plain": [ 176 | "" 177 | ] 178 | }, 179 | "metadata": {}, 180 | "output_type": "display_data" 181 | } 182 | ], 183 | "source": [ 184 | "data=pd.read_csv('lin_reg_data.csv')\n", 185 | "data.head()\n", 186 | "X=data.iloc[:,0]\n", 187 | "Y=data.iloc[:,1]\n", 188 | "plt.scatter(X,Y)\n", 189 | "plt.show()\n" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 4, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "l=LinearRegressionL1(X,Y,0.005,0.001,100)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 5, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "l.fit()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 6, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "image/png": "\n", 218 | "text/plain": [ 219 | "" 220 | ] 221 | }, 222 | "metadata": {}, 223 | "output_type": "display_data" 224 | }, 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "Mean Squared Error= 5.6577784983058404e-05\n" 230 | ] 231 | } 232 | ], 233 | "source": [ 234 | "l.results()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [] 250 | } 251 | ], 252 | "metadata": { 253 | "kernelspec": { 254 | "display_name": "Python 3", 255 | "language": "python", 256 | "name": "python3" 257 | }, 258 | "language_info": { 259 | "codemirror_mode": { 260 | "name": "ipython", 261 | "version": 3 262 | }, 263 | "file_extension": ".py", 264 | "mimetype": "text/x-python", 265 | "name": "python", 266 | "nbconvert_exporter": "python", 267 | "pygments_lexer": "ipython3", 268 | "version": "3.6.4" 269 | }, 270 | "widgets": { 271 | "application/vnd.jupyter.widget-state+json": { 272 | "state": {}, 273 | "version_major": 2, 274 | "version_minor": 0 275 | } 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 2 280 | } 281 | -------------------------------------------------------------------------------- /linear regression l2 regularized from scratch-final.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/html": [ 13 | "
\n", 14 | "\n", 27 | "\n", 28 | " \n", 29 | " \n", 30 | " \n", 31 | " \n", 32 | " \n", 33 | " \n", 34 | " \n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | "
32.5023452731.70700585
053.42680468.777596
161.53035862.562382
247.47564071.546632
359.81320887.230925
455.14218878.211518
\n", 63 | "
" 64 | ], 65 | "text/plain": [ 66 | " 32.50234527 31.70700585\n", 67 | "0 53.426804 68.777596\n", 68 | "1 61.530358 62.562382\n", 69 | "2 47.475640 71.546632\n", 70 | "3 59.813208 87.230925\n", 71 | "4 55.142188 78.211518" 72 | ] 73 | }, 74 | "execution_count": 1, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "import numpy as np\n", 81 | "import pandas as pd\n", 82 | "import matplotlib.pyplot as plt\n", 83 | "import statistics as st\n", 84 | "plt.rcParams['figure.figsize'] = (5.0, 5.0)\n", 85 | "\n", 86 | "data=pd.read_csv('lin_reg_data.csv')\n", 87 | "data.head()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 2, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | " class LinearRegressionL2:\n", 97 | " def __init__(self,X_data,Y_data,l,l2,iterations): #initialize all parameters\n", 98 | " self.X=X_data\n", 99 | " self.Y=Y_data\n", 100 | " #weight and bias\n", 101 | " self.m=0\n", 102 | " self.c=0\n", 103 | " self.L=l #learning rate\n", 104 | " self.l2=l2 #regularization parameter\n", 105 | " self.iter=iterations #num of iterations\n", 106 | " self.n=float(len(self.X)) #size of data\n", 107 | " \n", 108 | " def cost(self,pred_y): #cost function\n", 109 | " cost=np.sum(np.square(self.Y-pred_y))/(2*self.n) + self.l2*np.sum(np.square(self.m))\n", 110 | " return(cost)\n", 111 | " \n", 112 | " def fit(self):\n", 113 | " self.history=np.zeros(self.iter)\n", 114 | " #updating values of m and c\n", 115 | " for i in range(self.iter):\n", 116 | " pred_y=self.m*self.X + self.c\n", 117 | " #print(pred_y)\n", 118 | " Dm= (-2/self.n)*(self.X*(self.Y-pred_y))+2*self.l2*self.m\n", 119 | " Dc= (-2/self.n)*(self.Y-pred_y)\n", 120 | " #update\n", 121 | " self.m=self.m-Dm*self.L\n", 122 | " self.c=self.c-Dc*self.L\n", 123 | " #cost is calculated for every iteration\n", 124 | " self.history[i]=self.cost(pred_y)\n", 125 | " self.mse=self.MSE(self.Y,pred_y)\n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " def MSE(self,pred_y,Y):\n", 131 | " errors=Y-pred_y #error is the difference between actual and predicted value\n", 132 | " mse=np.sum(np.square(errors))/self.n #mean of sum of square of erros\n", 133 | " return mse\n", 134 | " \n", 135 | " def results(self):\n", 136 | " fig=plt.figure(figsize=(14,14))\n", 137 | " a1=fig.add_subplot(211)\n", 138 | "\n", 139 | " plt.title('minimisation of errors across the iterations')\n", 140 | " a1.plot(self.history)\n", 141 | "\n", 142 | "\n", 143 | " #making predictions\n", 144 | " a2=fig.add_subplot(212)\n", 145 | " final_y=self.m*self.X +self.c \n", 146 | " plt.scatter(self.X,self.Y)\n", 147 | " plt.title('regrssion line')\n", 148 | " a2.plot([min(self.X),max(self.X)],[min(final_y),max(final_y)],color='red') #plotting the red line \n", 149 | " \n", 150 | " plt.show()\n", 151 | " \n", 152 | " print ('Mean Squared Error=',self.mse)\n", 153 | "\n", 154 | "\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "Plotting the dataset using a scatter plot" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 3, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "image/png": "\n", 172 | "text/plain": [ 173 | "" 174 | ] 175 | }, 176 | "metadata": {}, 177 | "output_type": "display_data" 178 | } 179 | ], 180 | "source": [ 181 | "data=pd.read_csv('lin_reg_data.csv')\n", 182 | "data.head()\n", 183 | "X=data.iloc[:,0]\n", 184 | "Y=data.iloc[:,1]\n", 185 | "plt.scatter(X,Y)\n", 186 | "plt.show()\n" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 5, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "l=LinearRegressionL2(X,Y,0.005,0.001,100)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 6, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "l.fit()" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 7, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "data": { 214 | "image/png": "\n", 215 | "text/plain": [ 216 | "" 217 | ] 218 | }, 219 | "metadata": {}, 220 | "output_type": "display_data" 221 | }, 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "Mean Squared Error= 8.5291379739768e-05\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "l.results()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": "Python 3", 252 | "language": "python", 253 | "name": "python3" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.6.4" 266 | }, 267 | "widgets": { 268 | "application/vnd.jupyter.widget-state+json": { 269 | "state": {}, 270 | "version_major": 2, 271 | "version_minor": 0 272 | } 273 | } 274 | }, 275 | "nbformat": 4, 276 | "nbformat_minor": 2 277 | } 278 | --------------------------------------------------------------------------------