├── Linear_Regression ├── linear_reg_1.py ├── linear_reg_2.py ├── reg1.png └── reidual_error.png └── Logistic_Regression ├── dataset1.csv ├── log_reg_1.py ├── log_reg_2.py ├── result_1.png └── sigmoid.png /Linear_Regression/linear_reg_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | def estimate_coef(x, y): 5 | # number of observations/points 6 | n = np.size(x) 7 | 8 | # mean of x and y vector 9 | m_x, m_y = np.mean(x), np.mean(y) 10 | 11 | # calculating cross-deviation and deviation about x 12 | SS_xy = np.sum(y*x - n*m_y*m_x) 13 | SS_xx = np.sum(x*x - n*m_x*m_x) 14 | 15 | # calculating regression coefficients 16 | b_1 = SS_xy / SS_xx 17 | b_0 = m_y - b_1*m_x 18 | 19 | return(b_0, b_1) 20 | 21 | def plot_regression_line(x, y, b): 22 | # plotting the actual points as scatter plot 23 | plt.scatter(x, y, color = "m", 24 | marker = "o", s = 30) 25 | 26 | # predicted response vector 27 | y_pred = b[0] + b[1]*x 28 | 29 | # plotting the regression line 30 | plt.plot(x, y_pred, color = "g") 31 | 32 | # putting labels 33 | plt.xlabel('x') 34 | plt.ylabel('y') 35 | 36 | # function to show plot 37 | plt.show() 38 | 39 | def main(): 40 | # observations 41 | x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 42 | y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) 43 | 44 | # estimating coefficients 45 | b = estimate_coef(x, y) 46 | print("Estimated coefficients:\nb_0 = {} \ 47 | \nb_1 = {}".format(b[0], b[1])) 48 | 49 | # plotting regression line 50 | plot_regression_line(x, y, b) 51 | 52 | if __name__ == "__main__": 53 | main() -------------------------------------------------------------------------------- /Linear_Regression/linear_reg_2.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from sklearn import datasets, linear_model, metrics 4 | 5 | # load the boston dataset 6 | boston = datasets.load_boston(return_X_y=False) 7 | 8 | # defining feature matrix(X) and response vector(y) 9 | X = boston.data 10 | y = boston.target 11 | 12 | # splitting X and y into training and testing sets 13 | from sklearn.model_selection import train_test_split 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 15 | random_state=1) 16 | 17 | # create linear regression object 18 | reg = linear_model.LinearRegression() 19 | 20 | # train the model using the training sets 21 | reg.fit(X_train, y_train) 22 | 23 | # regression coefficients 24 | print('Coefficients: \n', reg.coef_) 25 | 26 | # variance score: 1 means perfect prediction 27 | print('Variance score: {}'.format(reg.score(X_test, y_test))) 28 | 29 | # plot for residual error 30 | 31 | ## setting plot style 32 | plt.style.use('fivethirtyeight') 33 | 34 | ## plotting residual errors in training data 35 | plt.scatter(reg.predict(X_train), reg.predict(X_train) - y_train, 36 | color = "green", s = 10, label = 'Train data') 37 | 38 | ## plotting residual errors in test data 39 | plt.scatter(reg.predict(X_test), reg.predict(X_test) - y_test, 40 | color = "blue", s = 10, label = 'Test data') 41 | 42 | ## plotting line for zero residual error 43 | plt.hlines(y = 0, xmin = 0, xmax = 50, linewidth = 2) 44 | 45 | ## plotting legend 46 | plt.legend(loc = 'upper right') 47 | 48 | ## plot title 49 | plt.title("Residual errors") 50 | 51 | ## function to show plot 52 | plt.show() -------------------------------------------------------------------------------- /Linear_Regression/reg1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Linear_Regression/reg1.png -------------------------------------------------------------------------------- /Linear_Regression/reidual_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Linear_Regression/reidual_error.png -------------------------------------------------------------------------------- /Logistic_Regression/dataset1.csv: -------------------------------------------------------------------------------- 1 | 4.5192,2.6487,1.0 2 | 2.4443,1.5438,1.0 3 | 4.2409,1.899,1.0 4 | 5.8097,2.4711,1.0 5 | 6.4423,3.359,1.0 6 | 5.8097,3.2406,1.0 7 | 6.3917,3.8128,1.0 8 | 6.8725,4.4441,1.0 9 | 6.7966,3.6747,1.0 10 | 8.163,4.7401,1.0 11 | 7.4038,3.8917,1.0 12 | 7.6316,4.602,1.0 13 | 7.7581,5.7265,1.0 14 | 6.5688,4.9571,1.0 15 | 5.3543,3.9903,1.0 16 | 4.4686,3.0236,1.0 17 | 2.9757,2.0568,1.0 18 | 2.4443,1.2676,1.0 19 | 0.9008,1.169,1.0 20 | 2.1154,1.7411,1.0 21 | 3.2794,1.386,1.0 22 | 4.165,1.5636,1.0 23 | 4.8482,1.8793,1.0 24 | 3.33,2.7868,1.0 25 | 5.1518,3.5563,1.0 26 | 6.2652,4.0693,1.0 27 | 6.2652,4.3849,1.0 28 | 7.2014,1.5438,1.0 29 | 7.6569,2.412,1.0 30 | 6.1387,1.7806,1.0 31 | 4.4939,1.4057,1.0 32 | 4.8735,2.6093,1.0 33 | 5.5314,3.0828,1.0 34 | 6.0121,3.9311,1.0 35 | 7.1508,4.7598,1.0 36 | 7.7075,5.3122,1.0 37 | 8.3148,5.7068,1.0 38 | 8.5172,5.1149,1.0 39 | 8.7449,5.4109,1.0 40 | 7.8593,3.8128,1.0 41 | 6.999,3.2406,1.0 42 | 5.5061,2.9052,1.0 43 | 4.9241,2.6882,1.0 44 | 6.6447,3.8325,1.0 45 | 7.6822,4.5428,1.0 46 | 8.0364,5.7857,1.0 47 | 8.9221,6.5552,1.0 48 | 7.8593,5.253,1.0 49 | 6.5941,5.2333,1.0 50 | 6.0374,4.7598,1.0 51 | 2.7227,4.5822,0.0 52 | 1.9383,3.6549,0.0 53 | 1.6852,2.9841,0.0 54 | 4.3168,4.4244,0.0 55 | 3.4312,3.7536,0.0 56 | 5.4808,5.2728,0.0 57 | 4.1144,4.8387,0.0 58 | 3.2034,4.4244,0.0 59 | 4.1144,5.3911,0.0 60 | 5.1012,6.0817,0.0 61 | 4.8988,5.5687,0.0 62 | 5.9615,6.4565,0.0 63 | 5.7591,6.0028,0.0 64 | 6.6953,6.7722,0.0 65 | 5.7338,6.6538,0.0 66 | 6.6194,7.1471,0.0 67 | 7.2014,7.5219,0.0 68 | 7.2014,6.8314,0.0 69 | 8.5931,7.6206,0.0 70 | 7.7581,7.1865,0.0 71 | 7.7581,7.7784,0.0 72 | 5.1012,7.6009,0.0 73 | 4.2156,6.496,0.0 74 | 3.4818,5.8055,0.0 75 | 2.3684,5.0163,0.0 76 | 1.7864,4.1876,0.0 77 | 0.9008,3.4379,0.0 78 | 0.9008,5.7857,0.0 79 | 1.9636,6.3382,0.0 80 | 1.4069,4.9571,0.0 81 | 2.419,6.8511,0.0 82 | 2.8745,6.0817,0.0 83 | 4.0132,7.1668,0.0 84 | 4.6711,7.226,0.0 85 | 5.1771,8.1533,0.0 86 | 6.2146,7.4825,0.0 87 | 5.4555,7.0484,0.0 88 | 5.9868,8.5084,0.0 89 | 4.0891,7.5417,0.0 90 | 2.3937,7.2063,0.0 91 | 1.331,6.5355,0.0 92 | 1.7358,5.4503,0.0 93 | 2.4443,5.8449,0.0 94 | 3.1781,4.8979,0.0 95 | 4.6711,5.8055,0.0 96 | 5.9868,7.3641,0.0 97 | 4.6711,6.2592,0.0 98 | 7.581,8.3703,0.0 99 | 4.6457,8.5676,0.0 100 | 4.6457,8.1676,0.0 101 | -------------------------------------------------------------------------------- /Logistic_Regression/log_reg_1.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def loadCSV(filename): 7 | ''' 8 | function to load dataset 9 | ''' 10 | with open(filename,"r") as csvfile: 11 | lines = csv.reader(csvfile) 12 | dataset = list(lines) 13 | for i in range(len(dataset)): 14 | dataset[i] = [float(x) for x in dataset[i]] 15 | return np.array(dataset) 16 | 17 | 18 | def normalize(X): 19 | ''' 20 | function to normalize feature matrix, X 21 | ''' 22 | mins = np.min(X, axis = 0) 23 | maxs = np.max(X, axis = 0) 24 | rng = maxs - mins 25 | norm_X = 1 - ((maxs - X)/rng) 26 | return norm_X 27 | 28 | 29 | def logistic_func(beta, X): 30 | ''' 31 | logistic(sigmoid) function 32 | ''' 33 | return 1.0/(1 + np.exp(-np.dot(X, beta.T))) 34 | 35 | 36 | def log_gradient(beta, X, y): 37 | ''' 38 | logistic gradient function 39 | ''' 40 | first_calc = logistic_func(beta, X) - y.reshape(X.shape[0], -1) 41 | final_calc = np.dot(first_calc.T, X) 42 | return final_calc 43 | 44 | 45 | def cost_func(beta, X, y): 46 | ''' 47 | cost function, J 48 | ''' 49 | log_func_v = logistic_func(beta, X) 50 | y = np.squeeze(y) 51 | step1 = y * np.log(log_func_v) 52 | step2 = (1 - y) * np.log(1 - log_func_v) 53 | final = -step1 - step2 54 | return np.mean(final) 55 | 56 | 57 | def grad_desc(X, y, beta, lr=.01, converge_change=.001): 58 | ''' 59 | gradient descent function 60 | ''' 61 | cost = cost_func(beta, X, y) 62 | change_cost = 1 63 | num_iter = 1 64 | 65 | while(change_cost > converge_change): 66 | old_cost = cost 67 | beta = beta - (lr * log_gradient(beta, X, y)) 68 | cost = cost_func(beta, X, y) 69 | change_cost = old_cost - cost 70 | num_iter += 1 71 | 72 | return beta, num_iter 73 | 74 | 75 | def pred_values(beta, X): 76 | ''' 77 | function to predict labels 78 | ''' 79 | pred_prob = logistic_func(beta, X) 80 | pred_value = np.where(pred_prob >= .5, 1, 0) 81 | return np.squeeze(pred_value) 82 | 83 | 84 | def plot_reg(X, y, beta): 85 | ''' 86 | function to plot decision boundary 87 | ''' 88 | # labelled observations 89 | x_0 = X[np.where(y == 0.0)] 90 | x_1 = X[np.where(y == 1.0)] 91 | 92 | # plotting points with diff color for diff label 93 | plt.scatter(x_0[:, 1], x_0[:, 2], c='b', label='y = 0') 94 | plt.scatter(x_1[:, 1], x_1[:, 2], c='r', label='y = 1') 95 | 96 | # plotting decision boundary 97 | x1 = np.arange(0, 1, 0.1) 98 | x2 = -(beta[0,0] + beta[0,1]*x1)/beta[0,2] 99 | plt.plot(x1, x2, c='k', label='reg line') 100 | 101 | plt.xlabel('x1') 102 | plt.ylabel('x2') 103 | plt.legend() 104 | plt.show() 105 | 106 | 107 | 108 | if __name__ == "__main__": 109 | # load the dataset 110 | dataset = loadCSV('dataset1.csv') 111 | 112 | # normalizing feature matrix 113 | X = normalize(dataset[:, :-1]) 114 | 115 | # stacking columns wth all ones in feature matrix 116 | X = np.hstack((np.matrix(np.ones(X.shape[0])).T, X)) 117 | 118 | # response vector 119 | y = dataset[:, -1] 120 | 121 | # initial beta values 122 | beta = np.matrix(np.zeros(X.shape[1])) 123 | 124 | # beta values after running gradient descent 125 | beta, num_iter = grad_desc(X, y, beta) 126 | 127 | # estimated beta values and number of iterations 128 | print("Estimated regression coefficients:", beta) 129 | print("No. of iterations:", num_iter) 130 | 131 | # predicted labels 132 | y_pred = pred_values(beta, X) 133 | 134 | # number of correctly predicted labels 135 | print("Correctly predicted labels:", np.sum(y == y_pred)) 136 | 137 | # plotting regression line 138 | plot_reg(X, y, beta) -------------------------------------------------------------------------------- /Logistic_Regression/log_reg_2.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets, linear_model, metrics 2 | 3 | # load the digit dataset 4 | digits = datasets.load_digits() 5 | 6 | # defining feature matrix(X) and response vector(y) 7 | X = digits.data 8 | y = digits.target 9 | 10 | # splitting X and y into training and testing sets 11 | from sklearn.model_selection import train_test_split 12 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 13 | random_state=1) 14 | 15 | # create logistic regression object 16 | reg = linear_model.LogisticRegression() 17 | 18 | # train the model using the training sets 19 | reg.fit(X_train, y_train) 20 | 21 | # making predictions on the testing set 22 | y_pred = reg.predict(X_test) 23 | 24 | # comparing actual response values (y_test) with predicted response values (y_pred) 25 | print("Logistic Regression model accuracy(in %):", metrics.accuracy_score(y_test, y_pred)*100) -------------------------------------------------------------------------------- /Logistic_Regression/result_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Logistic_Regression/result_1.png -------------------------------------------------------------------------------- /Logistic_Regression/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Logistic_Regression/sigmoid.png --------------------------------------------------------------------------------