├── Linear_Regression
    ├── linear_reg_1.py
    ├── linear_reg_2.py
    ├── reg1.png
    └── reidual_error.png
└── Logistic_Regression
    ├── dataset1.csv
    ├── log_reg_1.py
    ├── log_reg_2.py
    ├── result_1.png
    └── sigmoid.png


/Linear_Regression/linear_reg_1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 |  
 4 | def estimate_coef(x, y):
 5 |     # number of observations/points
 6 |     n = np.size(x)
 7 |  
 8 |     # mean of x and y vector
 9 |     m_x, m_y = np.mean(x), np.mean(y)
10 |  
11 |     # calculating cross-deviation and deviation about x
12 |     SS_xy = np.sum(y*x - n*m_y*m_x)
13 |     SS_xx = np.sum(x*x - n*m_x*m_x)
14 |  
15 |     # calculating regression coefficients
16 |     b_1 = SS_xy / SS_xx
17 |     b_0 = m_y - b_1*m_x
18 |  
19 |     return(b_0, b_1)
20 |  
21 | def plot_regression_line(x, y, b):
22 |     # plotting the actual points as scatter plot
23 |     plt.scatter(x, y, color = "m",
24 |                marker = "o", s = 30)
25 |  
26 |     # predicted response vector
27 |     y_pred = b[0] + b[1]*x
28 |  
29 |     # plotting the regression line
30 |     plt.plot(x, y_pred, color = "g")
31 |  
32 |     # putting labels
33 |     plt.xlabel('x')
34 |     plt.ylabel('y')
35 |  
36 |     # function to show plot
37 |     plt.show()
38 |  
39 | def main():
40 |     # observations
41 |     x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
42 |     y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
43 |  
44 |     # estimating coefficients
45 |     b = estimate_coef(x, y)
46 |     print("Estimated coefficients:\nb_0 = {}  \
47 |           \nb_1 = {}".format(b[0], b[1]))
48 |  
49 |     # plotting regression line
50 |     plot_regression_line(x, y, b)
51 |  
52 | if __name__ == "__main__":
53 |     main()


--------------------------------------------------------------------------------
/Linear_Regression/linear_reg_2.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from sklearn import datasets, linear_model, metrics
 4 |  
 5 | # load the boston dataset
 6 | boston = datasets.load_boston(return_X_y=False)
 7 |  
 8 | # defining feature matrix(X) and response vector(y)
 9 | X = boston.data
10 | y = boston.target
11 |  
12 | # splitting X and y into training and testing sets
13 | from sklearn.model_selection import train_test_split
14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
15 |                                                     random_state=1)
16 |  
17 | # create linear regression object
18 | reg = linear_model.LinearRegression()
19 |  
20 | # train the model using the training sets
21 | reg.fit(X_train, y_train)
22 |  
23 | # regression coefficients
24 | print('Coefficients: \n', reg.coef_)
25 |  
26 | # variance score: 1 means perfect prediction
27 | print('Variance score: {}'.format(reg.score(X_test, y_test)))
28 |  
29 | # plot for residual error
30 |  
31 | ## setting plot style
32 | plt.style.use('fivethirtyeight')
33 |  
34 | ## plotting residual errors in training data
35 | plt.scatter(reg.predict(X_train), reg.predict(X_train) - y_train,
36 |             color = "green", s = 10, label = 'Train data')
37 |  
38 | ## plotting residual errors in test data
39 | plt.scatter(reg.predict(X_test), reg.predict(X_test) - y_test,
40 |             color = "blue", s = 10, label = 'Test data')
41 |  
42 | ## plotting line for zero residual error
43 | plt.hlines(y = 0, xmin = 0, xmax = 50, linewidth = 2)
44 |  
45 | ## plotting legend
46 | plt.legend(loc = 'upper right')
47 |  
48 | ## plot title
49 | plt.title("Residual errors")
50 |  
51 | ## function to show plot
52 | plt.show()


--------------------------------------------------------------------------------
/Linear_Regression/reg1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Linear_Regression/reg1.png


--------------------------------------------------------------------------------
/Linear_Regression/reidual_error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Linear_Regression/reidual_error.png


--------------------------------------------------------------------------------
/Logistic_Regression/dataset1.csv:
--------------------------------------------------------------------------------
  1 | 4.5192,2.6487,1.0
  2 | 2.4443,1.5438,1.0
  3 | 4.2409,1.899,1.0
  4 | 5.8097,2.4711,1.0
  5 | 6.4423,3.359,1.0
  6 | 5.8097,3.2406,1.0
  7 | 6.3917,3.8128,1.0
  8 | 6.8725,4.4441,1.0
  9 | 6.7966,3.6747,1.0
 10 | 8.163,4.7401,1.0
 11 | 7.4038,3.8917,1.0
 12 | 7.6316,4.602,1.0
 13 | 7.7581,5.7265,1.0
 14 | 6.5688,4.9571,1.0
 15 | 5.3543,3.9903,1.0
 16 | 4.4686,3.0236,1.0
 17 | 2.9757,2.0568,1.0
 18 | 2.4443,1.2676,1.0
 19 | 0.9008,1.169,1.0
 20 | 2.1154,1.7411,1.0
 21 | 3.2794,1.386,1.0
 22 | 4.165,1.5636,1.0
 23 | 4.8482,1.8793,1.0
 24 | 3.33,2.7868,1.0
 25 | 5.1518,3.5563,1.0
 26 | 6.2652,4.0693,1.0
 27 | 6.2652,4.3849,1.0
 28 | 7.2014,1.5438,1.0
 29 | 7.6569,2.412,1.0
 30 | 6.1387,1.7806,1.0
 31 | 4.4939,1.4057,1.0
 32 | 4.8735,2.6093,1.0
 33 | 5.5314,3.0828,1.0
 34 | 6.0121,3.9311,1.0
 35 | 7.1508,4.7598,1.0
 36 | 7.7075,5.3122,1.0
 37 | 8.3148,5.7068,1.0
 38 | 8.5172,5.1149,1.0
 39 | 8.7449,5.4109,1.0
 40 | 7.8593,3.8128,1.0
 41 | 6.999,3.2406,1.0
 42 | 5.5061,2.9052,1.0
 43 | 4.9241,2.6882,1.0
 44 | 6.6447,3.8325,1.0
 45 | 7.6822,4.5428,1.0
 46 | 8.0364,5.7857,1.0
 47 | 8.9221,6.5552,1.0
 48 | 7.8593,5.253,1.0
 49 | 6.5941,5.2333,1.0
 50 | 6.0374,4.7598,1.0
 51 | 2.7227,4.5822,0.0
 52 | 1.9383,3.6549,0.0
 53 | 1.6852,2.9841,0.0
 54 | 4.3168,4.4244,0.0
 55 | 3.4312,3.7536,0.0
 56 | 5.4808,5.2728,0.0
 57 | 4.1144,4.8387,0.0
 58 | 3.2034,4.4244,0.0
 59 | 4.1144,5.3911,0.0
 60 | 5.1012,6.0817,0.0
 61 | 4.8988,5.5687,0.0
 62 | 5.9615,6.4565,0.0
 63 | 5.7591,6.0028,0.0
 64 | 6.6953,6.7722,0.0
 65 | 5.7338,6.6538,0.0
 66 | 6.6194,7.1471,0.0
 67 | 7.2014,7.5219,0.0
 68 | 7.2014,6.8314,0.0
 69 | 8.5931,7.6206,0.0
 70 | 7.7581,7.1865,0.0
 71 | 7.7581,7.7784,0.0
 72 | 5.1012,7.6009,0.0
 73 | 4.2156,6.496,0.0
 74 | 3.4818,5.8055,0.0
 75 | 2.3684,5.0163,0.0
 76 | 1.7864,4.1876,0.0
 77 | 0.9008,3.4379,0.0
 78 | 0.9008,5.7857,0.0
 79 | 1.9636,6.3382,0.0
 80 | 1.4069,4.9571,0.0
 81 | 2.419,6.8511,0.0
 82 | 2.8745,6.0817,0.0
 83 | 4.0132,7.1668,0.0
 84 | 4.6711,7.226,0.0
 85 | 5.1771,8.1533,0.0
 86 | 6.2146,7.4825,0.0
 87 | 5.4555,7.0484,0.0
 88 | 5.9868,8.5084,0.0
 89 | 4.0891,7.5417,0.0
 90 | 2.3937,7.2063,0.0
 91 | 1.331,6.5355,0.0
 92 | 1.7358,5.4503,0.0
 93 | 2.4443,5.8449,0.0
 94 | 3.1781,4.8979,0.0
 95 | 4.6711,5.8055,0.0
 96 | 5.9868,7.3641,0.0
 97 | 4.6711,6.2592,0.0
 98 | 7.581,8.3703,0.0
 99 | 4.6457,8.5676,0.0
100 | 4.6457,8.1676,0.0
101 | 


--------------------------------------------------------------------------------
/Logistic_Regression/log_reg_1.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | def loadCSV(filename):
  7 |     '''
  8 |     function to load dataset
  9 |     '''
 10 |     with open(filename,"r") as csvfile:
 11 |         lines = csv.reader(csvfile)
 12 |         dataset = list(lines)
 13 |         for i in range(len(dataset)):
 14 |             dataset[i] = [float(x) for x in dataset[i]]     
 15 |     return np.array(dataset)
 16 | 
 17 | 
 18 | def normalize(X):
 19 |     '''
 20 |     function to normalize feature matrix, X
 21 |     '''
 22 |     mins = np.min(X, axis = 0)
 23 |     maxs = np.max(X, axis = 0)
 24 |     rng = maxs - mins
 25 |     norm_X = 1 - ((maxs - X)/rng)
 26 |     return norm_X
 27 | 
 28 | 
 29 | def logistic_func(beta, X):
 30 |     '''
 31 |     logistic(sigmoid) function
 32 |     '''
 33 |     return 1.0/(1 + np.exp(-np.dot(X, beta.T)))
 34 | 
 35 | 
 36 | def log_gradient(beta, X, y):
 37 |     '''
 38 |     logistic gradient function
 39 |     '''
 40 |     first_calc = logistic_func(beta, X) - y.reshape(X.shape[0], -1)
 41 |     final_calc = np.dot(first_calc.T, X)
 42 |     return final_calc
 43 | 
 44 | 
 45 | def cost_func(beta, X, y):
 46 |     '''
 47 |     cost function, J
 48 |     '''
 49 |     log_func_v = logistic_func(beta, X)
 50 |     y = np.squeeze(y)
 51 |     step1 = y * np.log(log_func_v)
 52 |     step2 = (1 - y) * np.log(1 - log_func_v)
 53 |     final = -step1 - step2
 54 |     return np.mean(final)
 55 | 
 56 | 
 57 | def grad_desc(X, y, beta, lr=.01, converge_change=.001):
 58 |     '''
 59 |     gradient descent function
 60 |     '''
 61 |     cost = cost_func(beta, X, y)
 62 |     change_cost = 1
 63 |     num_iter = 1
 64 |     
 65 |     while(change_cost > converge_change):
 66 |         old_cost = cost
 67 |         beta = beta - (lr * log_gradient(beta, X, y))
 68 |         cost = cost_func(beta, X, y)
 69 |         change_cost = old_cost - cost
 70 |         num_iter += 1
 71 |     
 72 |     return beta, num_iter 
 73 | 
 74 | 
 75 | def pred_values(beta, X):
 76 |     '''
 77 |     function to predict labels
 78 |     '''
 79 |     pred_prob = logistic_func(beta, X)
 80 |     pred_value = np.where(pred_prob >= .5, 1, 0)
 81 |     return np.squeeze(pred_value)
 82 | 
 83 | 
 84 | def plot_reg(X, y, beta):
 85 |     '''
 86 |     function to plot decision boundary
 87 |     '''
 88 |     # labelled observations
 89 |     x_0 = X[np.where(y == 0.0)]
 90 |     x_1 = X[np.where(y == 1.0)]
 91 |     
 92 |     # plotting points with diff color for diff label
 93 |     plt.scatter(x_0[:, 1], x_0[:, 2], c='b', label='y = 0')
 94 |     plt.scatter(x_1[:, 1], x_1[:, 2], c='r', label='y = 1')
 95 |     
 96 |     # plotting decision boundary
 97 |     x1 = np.arange(0, 1, 0.1)
 98 |     x2 = -(beta[0,0] + beta[0,1]*x1)/beta[0,2]
 99 |     plt.plot(x1, x2, c='k', label='reg line')
100 | 
101 |     plt.xlabel('x1')
102 |     plt.ylabel('x2')
103 |     plt.legend()
104 |     plt.show()
105 |     
106 | 
107 |     
108 | if __name__ == "__main__":
109 |     # load the dataset
110 |     dataset = loadCSV('dataset1.csv')
111 |     
112 |     # normalizing feature matrix
113 |     X = normalize(dataset[:, :-1])
114 |     
115 |     # stacking columns wth all ones in feature matrix
116 |     X = np.hstack((np.matrix(np.ones(X.shape[0])).T, X))
117 | 
118 |     # response vector
119 |     y = dataset[:, -1]
120 | 
121 |     # initial beta values
122 |     beta = np.matrix(np.zeros(X.shape[1]))
123 | 
124 |     # beta values after running gradient descent
125 |     beta, num_iter = grad_desc(X, y, beta)
126 | 
127 |     # estimated beta values and number of iterations
128 |     print("Estimated regression coefficients:", beta)
129 |     print("No. of iterations:", num_iter)
130 | 
131 |     # predicted labels
132 |     y_pred = pred_values(beta, X)
133 |     
134 |     # number of correctly predicted labels
135 |     print("Correctly predicted labels:", np.sum(y == y_pred))
136 |     
137 |     # plotting regression line
138 |     plot_reg(X, y, beta)


--------------------------------------------------------------------------------
/Logistic_Regression/log_reg_2.py:
--------------------------------------------------------------------------------
 1 | from sklearn import datasets, linear_model, metrics
 2 |  
 3 | # load the digit dataset
 4 | digits = datasets.load_digits()
 5 |  
 6 | # defining feature matrix(X) and response vector(y)
 7 | X = digits.data
 8 | y = digits.target
 9 | 
10 | # splitting X and y into training and testing sets
11 | from sklearn.model_selection import train_test_split
12 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
13 |                                                     random_state=1)
14 |  
15 | # create logistic regression object
16 | reg = linear_model.LogisticRegression()
17 |  
18 | # train the model using the training sets
19 | reg.fit(X_train, y_train)
20 | 
21 | # making predictions on the testing set
22 | y_pred = reg.predict(X_test)
23 |  
24 | # comparing actual response values (y_test) with predicted response values (y_pred)
25 | print("Logistic Regression model accuracy(in %):", metrics.accuracy_score(y_test, y_pred)*100)


--------------------------------------------------------------------------------
/Logistic_Regression/result_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Logistic_Regression/result_1.png


--------------------------------------------------------------------------------
/Logistic_Regression/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikhilkumarsingh/Machine-Learning-Samples/6e3e9a02ab421b7fc51ae00e7d441980d5bee786/Logistic_Regression/sigmoid.png


--------------------------------------------------------------------------------