├── LinearRegression ├── __init__.py ├── Plots.py ├── Linear Regression Scikit-learn.py ├── Metrics.py ├── LinearModel.py └── main.py ├── LogisticRegression ├── __init__.py ├── main.py ├── Model.py └── data │ └── marks.txt └── PolynomialRegression ├── __init__.py ├── Models.py └── main.py /LinearRegression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LogisticRegression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PolynomialRegression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LinearRegression/Plots.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | 4 | def scatter_plot(x, y, size=10, x_label='x', y_label='y', color='b'): 5 | plt.scatter(x, y, s=size, color=color) 6 | set_labels(x_label, y_label) 7 | 8 | 9 | def plot(x, y, x_label='x', y_label='y', color='r'): 10 | plt.plot(x, y, color=color) 11 | set_labels(x_label, y_label) 12 | 13 | 14 | def ploty(y, x_label='x', y_label='y'): 15 | plt.plot(y) 16 | set_labels(x_label, y_label) 17 | 18 | 19 | def set_labels(x_label, y_label): 20 | plt.xlabel(x_label) 21 | plt.ylabel(y_label) 22 | plt.show() 23 | -------------------------------------------------------------------------------- /LinearRegression/Linear Regression Scikit-learn.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.linear_model import LinearRegression 5 | from sklearn.metrics import mean_squared_error, r2_score 6 | 7 | # generate random data-set 8 | np.random.seed(0) 9 | x = np.random.rand(100, 1) 10 | y = 2 + 3 * x + np.random.rand(100, 1) 11 | 12 | # model initialization 13 | regression_model = LinearRegression() 14 | # fit the data(train the model) 15 | regression_model.fit(x, y) 16 | # predict 17 | y_predicted = regression_model.predict(x) 18 | 19 | # model evaluation 20 | rmse = mean_squared_error(y, y_predicted) 21 | r2 = r2_score(y, y_predicted) 22 | 23 | # printing values 24 | print('The coefficient is {}'.format(regression_model.coef_)) 25 | print('The intercept is {}'.format(regression_model.intercept_)) 26 | print('Root mean squared error of the model is {}.'.format(rmse)) 27 | print('R-squared score is {}.'.format(r2)) 28 | 29 | # plotting 30 | plt.scatter(x, y, s=10) 31 | plt.xlabel('x') 32 | plt.ylabel('y') 33 | plt.plot(x, y_predicted, color='r') 34 | plt.show() 35 | -------------------------------------------------------------------------------- /LinearRegression/Metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class PerformanceMetrics: 5 | """Defines methods to evaluate the model 6 | 7 | Parameters 8 | ---------- 9 | y_actual : array-like, shape = [n_samples] 10 | Observed values from the training samples 11 | y_predicted : array-like, shape = [n_samples] 12 | Predicted values from the model 13 | 14 | """ 15 | 16 | def __init__(self, y_actual, y_predicted): 17 | self.y_actual = y_actual 18 | self.y_predicted = y_predicted 19 | 20 | def compute_rmse(self): 21 | """Compute the root mean squared error 22 | 23 | Returns 24 | ------ 25 | rmse : root mean squared error 26 | 27 | """ 28 | return np.sqrt(self.sum_of_square_of_residuals()) 29 | 30 | def compute_r2_score(self): 31 | """Compute the r-squared score 32 | 33 | 34 | Returns 35 | ------ 36 | r2_score : r-squared score 37 | 38 | """ 39 | # sum of square of residuals 40 | ssr = self.sum_of_square_of_residuals() 41 | 42 | # total sum of errors 43 | sst = np.sum((self.y_actual - np.mean(self.y_actual)) ** 2) 44 | 45 | return 1 - (ssr / sst) 46 | 47 | def sum_of_square_of_residuals(self): 48 | return np.sum((self.y_actual - self.y_predicted) ** 2) 49 | -------------------------------------------------------------------------------- /PolynomialRegression/Models.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import mean_squared_error, r2_score 2 | 3 | 4 | class LinearModel: 5 | """ Trains a Linear Model model and computes the metrics 6 | 7 | Parameters 8 | ---------- 9 | 10 | model : linear model on which the data will be trained 11 | 12 | Attributes 13 | ---------- 14 | 15 | intercept_ : intercept of the linear line 16 | coeffs_ :coeffs of the model 17 | rmse_ : root mean squared error of the model 18 | r2_: r-squared error of the model 19 | 20 | 21 | """ 22 | 23 | def __init__(self, model): 24 | self.model = model 25 | 26 | def compute_metrics(self, x, y): 27 | """Trains a model and computes the metrics 28 | 29 | Parameters 30 | ---------- 31 | 32 | 33 | x : array-like, shape = [n_samples, n_features] 34 | Training samples 35 | y : array-like, shape = [n_samples, n_target_values] 36 | Target values 37 | 38 | Returns 39 | ------- 40 | self: instance of self 41 | 42 | """ 43 | 44 | self.model.fit(x, y) 45 | y_predicted = self.model.predict(x) 46 | self.intercept_ = self.model.intercept_ 47 | self.coeffs_ = self.model.coeffs_ 48 | self.rmse_ = mean_squared_error(y, y_predicted) 49 | self.r2_ = r2_score(y, y_predicted) 50 | return self 51 | -------------------------------------------------------------------------------- /PolynomialRegression/main.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import numpy as np 3 | from sklearn.linear_model import LinearRegression 4 | from sklearn.preprocessing import PolynomialFeatures 5 | from sklearn.pipeline import make_pipeline 6 | from PolynomialRegression.Models import LinearModel 7 | 8 | 9 | def generate_data_set(): 10 | np.random.seed(0) 11 | x = 2 - 3 * np.random.normal(0, 1, 20) 12 | y = x - 2 * (x ** 2) + 0.5 * (x ** 3) + np.random.normal(-3, 3, 20) 13 | 14 | # transforming the data to include another axis 15 | x = x[:, np.newaxis] 16 | y = y[:, np.newaxis] 17 | return x, y 18 | 19 | 20 | if __name__ == "__main__": 21 | x_train, y_train = generate_data_set() 22 | 23 | # create a linear regression model and fit the data 24 | model = LinearRegression() 25 | linear_regression = LinearModel(model) 26 | linear_regression.compute_metrics(x_train, y_train) 27 | 28 | # printing metrics of the linear model 29 | print('The RMSE of the linear regression model is {}'.format(linear_regression.rmse_)) 30 | print('The R2 score of the linear regression model is {}'.format(linear_regression.r2_)) 31 | 32 | # transform the features to higher degree 33 | polynomial_features = PolynomialFeatures(degree=2, include_bias=False) 34 | x_poly_train = polynomial_features.fit_transform(x_train) 35 | 36 | # train a linear model with higher degree features 37 | model = make_pipeline(PolynomialFeatures(degree=2, include_bias=False), LinearRegression()) 38 | polynomial_regression = LinearModel(model) 39 | polynomial_regression.compute_metrics(x_poly_train, y_train) 40 | 41 | print('The RMSE of the polynomial regression model is {}'.format(polynomial_regression.rmse_)) 42 | print('The R2 score of the polynomial regression model is {}'.format(polynomial_regression.r2_)) 43 | -------------------------------------------------------------------------------- /LinearRegression/LinearModel.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import numpy as np 3 | 4 | 5 | class LinearRegressionUsingGD: 6 | """Linear Regression Using Gradient Descent. 7 | 8 | Parameters 9 | ---------- 10 | eta : float 11 | Learning rate 12 | n_iterations : int 13 | No of passes over the training set 14 | 15 | Attributes 16 | ---------- 17 | w_ : weights/ after fitting the model 18 | cost_ : total error of the model after each iteration 19 | 20 | """ 21 | 22 | def __init__(self, eta=0.05, n_iterations=1000): 23 | self.eta = eta 24 | self.n_iterations = n_iterations 25 | 26 | def fit(self, x, y): 27 | """Fit the training data 28 | 29 | Parameters 30 | ---------- 31 | x : array-like, shape = [n_samples, n_features] 32 | Training samples 33 | y : array-like, shape = [n_samples, n_target_values] 34 | Target values 35 | 36 | Returns 37 | ------- 38 | self : object 39 | 40 | """ 41 | 42 | self.cost_ = [] 43 | self.w_ = np.zeros((x.shape[1], 1)) 44 | m = x.shape[0] 45 | 46 | for _ in range(self.n_iterations): 47 | y_pred = np.dot(x, self.w_) 48 | residuals = y_pred - y 49 | gradient_vector = np.dot(x.T, residuals) 50 | self.w_ -= (self.eta / m) * gradient_vector 51 | cost = np.sum((residuals ** 2)) / (2 * m) 52 | self.cost_.append(cost) 53 | return self 54 | 55 | def predict(self, x): 56 | """ Predicts the value after the model has been trained. 57 | 58 | Parameters 59 | ---------- 60 | x : array-like, shape = [n_samples, n_features] 61 | Test samples 62 | 63 | Returns 64 | ------- 65 | Predicted value 66 | 67 | """ 68 | return np.dot(x, self.w_) 69 | -------------------------------------------------------------------------------- /LinearRegression/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from LinearRegression.LinearModel import LinearRegressionUsingGD 3 | from LinearRegression.Metrics import PerformanceMetrics 4 | from LinearRegression.Plots import scatter_plot, plot, ploty 5 | 6 | 7 | def generate_data_set(): 8 | """ Generates Random Data 9 | 10 | Returns 11 | ------- 12 | x : array-like, shape = [n_samples, n_features] 13 | Training samples 14 | y : array-like, shape = [n_samples, n_target_values] 15 | Target values 16 | 17 | """ 18 | np.random.seed(0) 19 | x = np.random.rand(100, 1) 20 | y = 2 + 3 * x + np.random.rand(100, 1) 21 | return x, y 22 | 23 | 24 | if __name__ == "__main__": 25 | # initializing the model 26 | linear_regression_model = LinearRegressionUsingGD() 27 | 28 | # generate the data set 29 | x, y = generate_data_set() 30 | 31 | # transform the feature vectors to include the bias term 32 | # adding 1 to all the instances of the training set. 33 | m = x.shape[0] 34 | x_train = np.c_[np.ones((m, 1)), x] 35 | 36 | # fit/train the model 37 | linear_regression_model.fit(x_train, y) 38 | 39 | # predict values 40 | predicted_values = linear_regression_model.predict(x_train) 41 | 42 | # model parameters 43 | print(linear_regression_model.w_) 44 | intercept, coeffs = linear_regression_model.w_ 45 | 46 | # cost_function 47 | cost_function = linear_regression_model.cost_ 48 | 49 | # plotting 50 | scatter_plot(x, y) 51 | plot(x, predicted_values) 52 | ploty(cost_function, 'no of iterations', 'cost function') 53 | 54 | # computing metrics 55 | metrics = PerformanceMetrics(y, predicted_values) 56 | rmse = metrics.compute_rmse() 57 | r2_score = metrics.compute_r2_score() 58 | 59 | print('The coefficient is {}'.format(coeffs)) 60 | print('The intercept is {}'.format(intercept)) 61 | print('Root mean squared error of the model is {}.'.format(rmse)) 62 | print('R-squared score is {}.'.format(r2_score)) 63 | -------------------------------------------------------------------------------- /LogisticRegression/main.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | from sklearn.linear_model import LogisticRegression 6 | from LogisticRegression.Model import LogisticRegressionUsingGD 7 | from sklearn.metrics import accuracy_score 8 | 9 | 10 | def load_data(path, header): 11 | marks_df = pd.read_csv(path, header=header) 12 | return marks_df 13 | 14 | 15 | if __name__ == "__main__": 16 | # load the data from the file 17 | data = load_data("data/marks.txt", None) 18 | 19 | # X = feature values, all the columns except the last column 20 | X = data.iloc[:, :-1] 21 | 22 | # y = target values, last column of the data frame 23 | y = data.iloc[:, -1] 24 | 25 | # filter out the applicants that got admitted 26 | admitted = data.loc[y == 1] 27 | 28 | # filter out the applicants that din't get admission 29 | not_admitted = data.loc[y == 0] 30 | 31 | # plots 32 | plt.scatter(admitted.iloc[:, 0], admitted.iloc[:, 1], s=10, label='Admitted') 33 | plt.scatter(not_admitted.iloc[:, 0], not_admitted.iloc[:, 1], s=10, 34 | label='Not Admitted') 35 | 36 | # preparing the data for building the model 37 | 38 | X = np.c_[np.ones((X.shape[0], 1)), X] 39 | y = y[:, np.newaxis] 40 | theta = np.zeros((X.shape[1], 1)) 41 | 42 | # Logistic Regression from scratch using Gradient Descent 43 | model = LogisticRegressionUsingGD() 44 | model.fit(X, y, theta) 45 | accuracy = model.accuracy(X, y.flatten()) 46 | parameters = model.w_ 47 | print("The accuracy of the model is {}".format(accuracy)) 48 | print("The model parameters using Gradient descent") 49 | print("\n") 50 | print(parameters) 51 | 52 | # plotting the decision boundary 53 | # As there are two features 54 | # wo + w1x1 + w2x2 = 0 55 | # x2 = - (wo + w1x1)/(w2) 56 | 57 | x_values = [np.min(X[:, 1] - 2), np.max(X[:, 2] + 2)] 58 | y_values = - (parameters[0] + np.dot(parameters[1], x_values)) / parameters[2] 59 | 60 | plt.plot(x_values, y_values, label='Decision Boundary') 61 | plt.xlabel('Marks in 1st Exam') 62 | plt.ylabel('Marks in 2nd Exam') 63 | plt.legend() 64 | plt.show() 65 | 66 | # Using scikit-learn 67 | model = LogisticRegression() 68 | model.fit(X, y) 69 | parameters = model.coef_ 70 | predicted_classes = model.predict(X) 71 | accuracy = accuracy_score(y.flatten(),predicted_classes) 72 | print('The accuracy score using scikit-learn is {}'.format(accuracy)) 73 | print("The model parameters using scikit learn") 74 | print(parameters) 75 | 76 | -------------------------------------------------------------------------------- /LogisticRegression/Model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import fmin_tnc 3 | 4 | 5 | class LogisticRegressionUsingGD: 6 | 7 | @staticmethod 8 | def sigmoid(x): 9 | # Activation function used to map any real value between 0 and 1 10 | return 1 / (1 + np.exp(-x)) 11 | 12 | @staticmethod 13 | def net_input(theta, x): 14 | # Computes the weighted sum of inputs Similar to Linear Regression 15 | 16 | return np.dot(x, theta) 17 | 18 | def probability(self, theta, x): 19 | # Calculates the probability that an instance belongs to a particular class 20 | 21 | return self.sigmoid(self.net_input(theta, x)) 22 | 23 | def cost_function(self, theta, x, y): 24 | # Computes the cost function for all the training samples 25 | m = x.shape[0] 26 | total_cost = -(1 / m) * np.sum( 27 | y * np.log(self.probability(theta, x)) + (1 - y) * np.log( 28 | 1 - self.probability(theta, x))) 29 | return total_cost 30 | 31 | def gradient(self, theta, x, y): 32 | # Computes the gradient of the cost function at the point theta 33 | m = x.shape[0] 34 | return (1 / m) * np.dot(x.T, self.sigmoid(self.net_input(theta, x)) - y) 35 | 36 | def fit(self, x, y, theta): 37 | """trains the model from the training data 38 | 39 | Uses the fmin_tnc function that is used to find the minimum for any function 40 | It takes arguments as 41 | 1) func : function to minimize 42 | 2) x0 : initial values for the parameters 43 | 3) fprime: gradient for the function defined by 'func' 44 | 4) args: arguments passed to the function 45 | 46 | Parameters 47 | ---------- 48 | x: array-like, shape = [n_samples, n_features] 49 | Training samples 50 | 51 | y: array-like, shape = [n_samples, n_target_values] 52 | Target classes 53 | 54 | theta: initial weights 55 | 56 | Returns 57 | ------- 58 | 59 | self: An instance of self 60 | """ 61 | 62 | opt_weights = fmin_tnc(func=self.cost_function, x0=theta, fprime=self.gradient, 63 | args=(x, y.flatten())) 64 | self.w_ = opt_weights[0] 65 | return self 66 | 67 | def predict(self, x): 68 | """ Predicts the class labels 69 | 70 | Parameters 71 | ---------- 72 | x: array-like, shape = [n_samples, n_features] 73 | Test samples 74 | 75 | Returns 76 | ------- 77 | predicted class labels 78 | 79 | 80 | """ 81 | theta = self.w_[:, np.newaxis] 82 | return self.probability(theta, x) 83 | 84 | def accuracy(self, x, actual_classes, probab_threshold=0.5): 85 | """Computes the accuracy of the classifier 86 | 87 | Parameters 88 | ---------- 89 | x: array-like, shape = [n_samples, n_features] 90 | Training samples 91 | 92 | actual_classes : class labels from the training data set 93 | 94 | probab_threshold: threshold/cutoff to categorize the samples into different classes 95 | 96 | Returns 97 | ------- 98 | accuracy: accuracy of the model 99 | 100 | """ 101 | predicted_classes = (self.predict(x) >= probab_threshold).astype(int) 102 | predicted_classes = predicted_classes.flatten() 103 | accuracy = np.mean(predicted_classes == actual_classes) 104 | return accuracy * 100 105 | -------------------------------------------------------------------------------- /LogisticRegression/data/marks.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | --------------------------------------------------------------------------------