├── mlwithpytorch.png
├── Day-30-Loss-Functions
└── loss.py
├── Day-26-Normalization
└── normalization.py
├── Day-09-PCA
└── pca.py
├── Day-01-Linear-Regression
└── LinearRegression.py
├── Day-17-K-Medoids
├── utility.py
└── PAM.py
├── Day-23-Gradient-Descent
└── gd.py
├── Day-06-KNN
└── KNN.py
├── Day-19-ElasticNet
└── ElasticNetRegression.py
├── Day-12-LDA
├── lda.py
└── NaiveBayes.py
├── README.md
├── Day-21-LatentDirichlet
└── LDA_TopicModeling.py
├── Day-28-Activations
├── activation.py
└── MLP.py
├── Day-15-MultiClassLDA
└── multi-class-LDA.py
├── Day-02-Logistic-Regression
└── LogisticRegression.py
├── Day-22-AffinityPropagation
└── AffinityPropagation.py
├── Day-05-Naive-Bayes
└── NaiveBayes.py
├── Day-29-Optimizers
└── optimizer.py
├── Day-07-SVM
└── svm.py
├── Day-25-RANSAC
└── ransac.py
├── Day-24-Regularization
└── regularization.py
├── Day-08-tf-idf
└── tfidf.py
├── Day-14-DBSCAN
└── dbscan.py
├── Day-27-MLP
└── mlp.py
├── Day-13-Adaboost
└── adaboost.py
├── Day-20-SpectralClustering
└── spectralClustering.py
├── Day-04-KMeans-Clustering
└── KMeans.py
├── Day-18-TSNE
└── tsne.py
├── Day-10-Lasso-Ridge-Regression
└── Lasso_Ridge_Regression.py
├── Day-11-Gaussian-Mixture-Model
└── gmm.py
├── Day-03-Decision-Tree
└── DecisionTree.py
└── Day-16-Bayesian-Regression
└── BayesianRegression.py
/mlwithpytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mayurji/MLWithPytorch/HEAD/mlwithpytorch.png
--------------------------------------------------------------------------------
/Day-30-Loss-Functions/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | class MeanSquareLoss:
4 | def __init__(self): pass
5 |
6 | def loss(self, y, y_pred):
7 | return torch.sum(torch.power((y - y_pred), 2),dim=1) / y.shape[0]
8 |
9 | def gradient(self, y, y_pred):
10 | return -(y - y_pred)
11 |
12 | class CrossEntropy:
13 | def __init__(self): pass
14 |
15 | def loss(self, y, p):
16 | # Avoid division by zero
17 | p = np.clip(p, 1e-15, 1 - 1e-15)
18 | return - y * torch.log(p) - (1 - y) * torch.log(1 - p)
19 |
20 | def gradient(self, y, p):
21 | # Avoid division by zero
22 | p = torch.clip(p, 1e-15, 1 - 1e-15)
23 | return - (y / p) + (1 - y) / (1 - p)
24 |
25 | class MeanAbsoluteLoss:
26 | def __init__(self): pass
27 |
28 | def loss(self, y, y_pred):
29 | return torch.sum(torch.abs(y - y_pred), dim=1) / y.shape[0]
30 |
31 | def gradient(self, y, y_pred):
32 | return -(y - y_pred)
33 |
34 | class HuberLoss:
35 | def __init__(self):pass
36 |
37 | def loss(self, y, y_pred, delta):
38 | if torch.abs(y - y_pred) <=delta:
39 | return 0.5 * torch.pow(y - y_pred, 2)
40 | else:
41 | return (delta * torch.abs(y - y_pred)) - (0.5 * torch.pow(delta, 2))
42 |
43 | class HingeLoss:
44 | def __init__(self):
45 | pass
46 |
47 | def loss(self, y, y_pred):
48 | return torch.max(0, (1-y) * y_pred).values
49 |
50 | class KLDivergence:
51 | def __init__(self):
52 | pass
53 |
54 | def loss(self, y, y_pred):
55 | return torch.sum(y_pred * torch.log((y_pred / y)))
56 |
--------------------------------------------------------------------------------
/Day-26-Normalization/normalization.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.neighbors import KNeighborsClassifier
3 | from sklearn.datasets import load_iris
4 | from sklearn.metrics import accuracy_score
5 | class Normalization:
6 | def __init__(self, X):
7 | self.X = X
8 |
9 | def z_score(self):
10 | mean = torch.mean(self.X, dim=0)
11 | return self.X.subtract(mean)/ torch.std(self.X, dim=0)
12 |
13 | def min_max(self):
14 | min = torch.min(self.X, dim=0)
15 | max = torch.max(self.X, dim=0)
16 | return self.X.subtract(min.values) / (max.values - min.values)
17 |
18 | def log_scaling(self):
19 | return torch.log(self.X)
20 |
21 | def clipping(self, max, min):
22 | if self. X > max:
23 | mask = self. X > max
24 | self.X = self.X * mask
25 |
26 | if self. X < min:
27 | mask = self. X < min
28 | self.X = self.X * mask
29 |
30 | return self.X
31 |
32 | if __name__ == '__main__':
33 | data = load_iris()
34 | X = torch.tensor(data.data)
35 | y = torch.tensor(data.target).unsqueeze(1)
36 | cls = KNeighborsClassifier()
37 | normalizer = Normalization(X)
38 | X_transform = normalizer.z_score()
39 | cls.fit(X, y)
40 | y_pred = cls.predict(X)
41 | print('Without Normalization',accuracy_score(y, y_pred))
42 | cls.fit(X_transform, y)
43 | y_pred = cls.predict(X_transform)
44 | print('Z-Score Normalization' ,accuracy_score(y, y_pred))
45 | X_transform = normalizer.min_max()
46 | cls.fit(X_transform, y)
47 | y_pred = cls.predict(X_transform)
48 | print('Min-Max Normalization' ,accuracy_score(y, y_pred))
49 | X_transform = normalizer.log_scaling()
50 | cls.fit(X_transform, y)
51 | y_pred = cls.predict(X_transform)
52 | print('Log Scaling', accuracy_score(y, y_pred))
53 |
--------------------------------------------------------------------------------
/Day-09-PCA/pca.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.datasets import load_iris
3 | import seaborn as sb
4 | import matplotlib.pyplot as plt
5 |
6 | class pca:
7 | def __init__(self, n_components):
8 | """
9 | :param n_components: Number of principal components the data should be reduced too.
10 | """
11 | self.components = n_components
12 |
13 | def fit_transform(self, X):
14 | """
15 | * Centering our inputs with mean
16 | * Finding covariance matrix using centered tensor
17 | * Finding eigen value and eigen vector using torch.eig()
18 | * Sorting eigen values in descending order and finding index of high eigen values
19 | * Using sorted index, get the eigen vectors
20 | * Tranforming the Input vectors with n columns into PCA components with reduced dimension
21 | :param X: Input tensor with n columns.
22 | :return: Output tensor with reduced principal components
23 | """
24 | centering_X = X - torch.mean(X, dim=0)
25 | covariance_matrix = torch.mm(centering_X.T, centering_X)/(centering_X.shape[0] - 1)
26 | eigen_values, eigen_vectors = torch.eig(covariance_matrix, eigenvectors=True)
27 | eigen_sorted_index = torch.argsort(eigen_values[:,0],descending=True)
28 | eigen_vectors_sorted = eigen_vectors[:,eigen_sorted_index]
29 | component_vector = eigen_vectors_sorted[:,0:self.components]
30 | transformed = torch.mm(component_vector.T, centering_X.T).T
31 | return transformed
32 |
33 | if __name__ == '__main__':
34 | data = load_iris()
35 | X = torch.tensor(data.data,dtype=torch.double)
36 | y = torch.tensor(data.target)
37 | pca = pca(n_components=2)
38 | pca_vector = pca.fit_transform(X)
39 | plt.figure(figsize=(6, 6))
40 | sb.scatterplot(pca_vector[:, 0], pca_vector[:, 1], hue=y, s=60, palette='icefire')
41 | plt.show()
42 |
--------------------------------------------------------------------------------
/Day-01-Linear-Regression/LinearRegression.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | class LinearRegression:
4 |
5 | def __init__(self):
6 | """
7 | :desc lr: Learning Rate
8 | :desc iteration: Number of iterations over complete data set
9 | """
10 |
11 | self.lr = 0.01
12 | self.iterations = 1000
13 |
14 | def y_pred(self, X, w):
15 | """
16 | :desc w: weight tensor
17 | :desc X: input tensor
18 | """
19 | return torch.mm(torch.transpose(w, 0, 1), X)
20 |
21 | def loss(self, ypred, y):
22 | """
23 | :desc c: cost function - to measure the loss between estimated vs ground truth
24 | """
25 | l = 1 / self.m * torch.sum(torch.pow(ypred - y, 2))
26 | return l
27 |
28 | def gradient_descent(self, w, X, y, ypred):
29 | """
30 | :desc dCdW: derivative of cost function
31 | :desc w_update: change in weight tensor after each iteration
32 | """
33 | dCdW = 2 / self.m * torch.mm(X, torch.transpose(ypred - y, 0, 1))
34 | w_update = w - self.lr * dCdW
35 | return w_update
36 |
37 | def run(self, X, y):
38 | """
39 | :type y: tensor object
40 | :type X: tensor object
41 | """
42 | bias = torch.ones((1, X.shape[1]))
43 | X = torch.cat((bias, X), dim=0)
44 | self.m = X.shape[1]
45 | self.n = X.shape[0]
46 | w = torch.zeros((self.n, 1))
47 |
48 | for iteration in range(1, self.iterations + 1):
49 | ypred = self.y_pred(X, w)
50 | cost = self.loss(ypred, y)
51 |
52 | if iteration % 100 == 0:
53 | print(f'Loss at iteration {iteration} is {cost}')
54 | w = self.gradient_descent(w, X, y, ypred)
55 |
56 | return w
57 |
58 |
59 | if __name__ == '__main__':
60 | """
61 | :desc X: random initialization of input tensor
62 | :desc y: random initialization of output tensor
63 | """
64 | X = torch.rand(1, 500)
65 | y = 2 * X + 3 + torch.randn(1, 500) * 0.1
66 | regression = LinearRegression()
67 | w = regression.run(X, y)
68 |
--------------------------------------------------------------------------------
/Day-17-K-Medoids/utility.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.metrics import accuracy_score
3 | import numpy as np
4 |
5 | class SquareLoss:
6 | def __init__(self):
7 | pass
8 |
9 | def loss(self, y, y_pred):
10 | return 0.5 * torch.pow((y - y_pred), 2)
11 |
12 | def gradient(self, y, y_pred):
13 | return -(y - y_pred)
14 |
15 | class CrossEntropy:
16 | def __init__(self):
17 | pass
18 |
19 | def loss(self, y, p):
20 | p = torch.clip(p, 1e-15, 1 - 1e-15)
21 | return - y * torch.log(p) - (1 - y) * torch.log(1 - p)
22 |
23 | def accuracy(self, y, p):
24 | return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1))
25 |
26 | def gradient(self, y, p):
27 | p = torch.clip(p, 1e-15, 1 - 1e-15)
28 | return -(y/p) + (1-y) / (1-p)
29 |
30 | def euclidean_distance(x1, x2):
31 | """
32 | :param x1: input tensor
33 | :param x2: input tensor
34 | :return: distance between tensors
35 | """
36 |
37 | return torch.cdist(x1.unsqueeze(0), x2.unsqueeze(0))
38 |
39 | def to_categorical(X, n_columns=None):
40 | if not n_columns:
41 | n_columns = torch.amax(X) + 1
42 | one_hot = torch.zeros((X.shape[0], n_columns))
43 | one_hot[torch.arange(X.shape[0])] = 1
44 | return one_hot
45 |
46 | def mean_squared_error(y_true, y_pred):
47 | mse = torch.mean(torch.pow(y_true - y_pred, 2))
48 | return mse
49 |
50 | def divide_on_feature(X, feature_i, threshold):
51 |
52 | split_func = None
53 | if isinstance(threshold, int) or isinstance(threshold, float):
54 | split_func = lambda sample: sample[feature_i] >= threshold
55 | else:
56 | split_func = lambda sample: sample[feature_i] == threshold
57 |
58 |
59 | X_1 = torch.tensor([sample.numpy() for sample in X if split_func(sample)])
60 | X_2 = torch.tensor([sample.numpy() for sample in X if not split_func(sample)])
61 |
62 | return np.array([X_1.numpy(), X_2.numpy()], dtype='object')
63 |
64 | def calculate_variance(X):
65 | mean = torch.ones(X.shape) * torch.mean(X, dim=0)
66 | n_samples = X.shape[0]
67 | variance = (1/ n_samples) * torch.diag(torch.mm((X-mean).T, (X-mean)))
68 | return variance
69 |
--------------------------------------------------------------------------------
/Day-23-Gradient-Descent/gd.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from sklearn.datasets import load_boston
4 |
5 | class GradientDescent:
6 | def __init__(self, learning_rate=0.01, max_iterations=100):
7 | self.lr = learning_rate
8 | self.max_iterations = max_iterations
9 |
10 | def normalization(self, X):
11 | """
12 | :param X: Input tensor
13 | :return: Normalized input using l2 norm.
14 | """
15 | l2 = torch.norm(X, p=2, dim=-1)
16 | l2[l2 == 0] = 1
17 | return X / l2.unsqueeze(1)
18 |
19 | def compute_error(self, b, m, X, y):
20 | total_error = 0
21 | for i in range(0, X.shape[0]):
22 | total_error += (y - (torch.mm(m , X.T)) + b) ** 2
23 | return total_error / float(X.shape[0])
24 |
25 | def step(self, b_curr, m_curr, X, y, learning_rate):
26 | b_gradient = 0
27 | m_gradient = 0
28 | N = float(X.shape[0])
29 | for i in range(X.shape[0]):
30 | b_gradient += -(2/N) * torch.sum(y - (torch.mm(X, m_curr.T) + b_curr), dim=0)
31 | m_gradient += -(2/N) * torch.sum(torch.mm(X.T, (y - (torch.mm(X, m_curr.T) + b_curr))), dim=0)
32 |
33 | new_b = b_curr - (learning_rate * b_gradient)
34 | new_m = m_curr - (learning_rate * m_gradient)
35 | return [new_b, new_m]
36 |
37 | def gradient_descent(self, X, y, start_b, start_m):
38 | b = start_b
39 | m = start_m
40 | for i in range(self.max_iterations):
41 | b, m = self.step(b_curr=b, m_curr=m, X=X, y=y, learning_rate=self.lr)
42 |
43 | return b, m
44 |
45 | if __name__ == '__main__':
46 | data = load_boston()
47 | X = torch.tensor(data.data)
48 | y = torch.tensor(data.target).unsqueeze(1)
49 | initial_b = 0.0
50 | initial_m = torch.zeros((X.shape[1], 1), dtype=torch.double).T
51 | nn.init.normal(initial_m)
52 | gd = GradientDescent(learning_rate=0.0001,max_iterations=100)
53 | gd.compute_error(X=gd.normalization(X), y=y, b=initial_b, m=initial_m)
54 | bias, slope = gd.gradient_descent(gd.normalization(X), y, start_b=initial_b, start_m=initial_m)
55 | X = gd.normalization(X)
56 | print('y: ', y[0].item())
57 | print('y_pred: ', (torch.mm(slope, X[0].unsqueeze(0).T)+bias).item())
58 |
59 |
--------------------------------------------------------------------------------
/Day-06-KNN/KNN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from scipy.stats import mode
3 | from sklearn.datasets import load_iris
4 | from sklearn.model_selection import train_test_split
5 | from sklearn.metrics import accuracy_score
6 |
7 | class KNN:
8 | def __init__(self, k, X):
9 | """
10 | :param k: Number of Neighbors
11 | """
12 | self.k = k
13 |
14 | def distance(self, point_1, point_2, default='euclidean', p=2):
15 | if default == 'euclidean':
16 | return torch.norm(point_1 - point_2, 2, 0)
17 | elif default == 'manhattan':
18 | return torch.sum(torch.abs(point_1 - point_2))
19 | elif default == "minkowski":
20 | return torch.pow(torch.sum(torch.abs(point_1 - point_2)**p), 1/p)
21 | else:
22 | raise ValueError("Unknown similarity distance type")
23 |
24 | def fit_predict(self, X, y, item):
25 | """
26 | * Iterate through each datapoints (item/y_test) that needs to be classified
27 | * Find distance between all train data points and each datapoint (item/y_test)
28 | using euclidean distance
29 | * Sort the distance using argsort, it gives indices of the y_test
30 | * Find the majority label whose distance closest to each datapoint of y_test.
31 |
32 |
33 | :param X: Input tensor
34 | :param y: Ground truth label
35 | :param item: tensors to be classified
36 | :return: predicted labels
37 | """
38 | y_predict = []
39 | for i in item:
40 | point_distances = []
41 | for ipt in range(X.shape[0]):
42 | distances = self.distance(X[ipt, :], i)
43 | point_distances.append(distances)
44 |
45 | point_distances = torch.tensor(point_distances)
46 | k_neighbors = torch.argsort(point_distances)[:self.k]
47 | y_label = y[k_neighbors]
48 | major_class = mode(y_label)
49 | major_class = major_class.mode[0]
50 | y_predict.append(major_class)
51 |
52 | return torch.tensor(y_predict)
53 |
54 | if __name__ == '__main__':
55 | iris = load_iris()
56 | X = torch.tensor(iris.data)
57 | y = torch.tensor(iris.target)
58 | torch.manual_seed(0)
59 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
60 | knn = KNN(k=5, X=x_train)
61 | y_pred = knn.fit_predict(x_train, y_train, x_test)
62 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
63 |
--------------------------------------------------------------------------------
/Day-19-ElasticNet/ElasticNetRegression.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.datasets import load_boston
3 | from sklearn.model_selection import train_test_split
4 |
5 | class ElasticNetRegression:
6 | def __init__(self, learning_rate, max_iterations, l1_penality, l2_penality):
7 | self.lr = learning_rate
8 | self.max_iterations = max_iterations
9 | self.l1_penality = l1_penality
10 | self.l2_penality = l2_penality
11 |
12 | def normalization(self, X):
13 | """
14 | :param X: Input tensor
15 | :return: Normalized input using l2 norm.
16 | """
17 | l2 = torch.norm(X, p=2, dim=-1)
18 | l2[l2 == 0] = 1
19 | return X / l2.unsqueeze(1)
20 |
21 | def fit(self, X, y):
22 | self.m, self.n = X.shape
23 | self.w = torch.zeros(self.n, dtype=torch.double).unsqueeze(1)
24 | self.b = 0.0
25 | self.X = X
26 | self.y = y
27 | for i in range(self.max_iterations):
28 | self.update_weights()
29 |
30 | return self
31 |
32 | def update_weights(self):
33 | y_pred = self.predict(self.X)
34 | dw = torch.zeros(self.n).unsqueeze(1)
35 | for j in range(self.n):
36 | if self.w[j] > 0:
37 | dw[j] = ( - (2* torch.mm(self.X[:, j].unsqueeze(0), (self.y - y_pred)) + self.l1_penality + 2 * self.l2_penality * self.w[j])) / self.m
38 | else:
39 | dw[j] = (- (2 * torch.mm(self.X[:, j].unsqueeze(0), (self.y - y_pred)) - self.l1_penality + 2 * self.l2_penality * self.w[j])) / self.m
40 |
41 | db = -2 * torch.sum(self.y - y_pred) / self.m
42 | self.w = self.w - self.lr * dw
43 | self.b = self.b - self.lr * db
44 | return self
45 |
46 | def predict(self, X):
47 | return torch.mm(X, self.w) + self.b
48 |
49 | if __name__ == '__main__':
50 | data = load_boston()
51 | regression = ElasticNetRegression(max_iterations=1000, learning_rate=0.001, l1_penality=500, l2_penality=1)
52 | X, y = regression.normalization(torch.tensor(data.data, dtype=torch.double)), torch.tensor(data.target).unsqueeze(1)
53 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
54 | regression.fit(x_train, y_train)
55 | Y_pred = regression.predict(x_test)
56 | print("Predicted values: ", torch.round(Y_pred[:3]))
57 | print("Real values: ", y_test[:3])
58 | print("Trained W: ", torch.round(regression.w[0]))
59 | print("Trained b: ", torch.round(regression.b))
60 |
61 |
--------------------------------------------------------------------------------
/Day-12-LDA/lda.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from NaiveBayes import NaiveBayes
3 | from sklearn.datasets import load_breast_cancer
4 | from sklearn.preprocessing import MinMaxScaler
5 | from sklearn.model_selection import train_test_split
6 | from sklearn.metrics import accuracy_score
7 |
8 | class LDA:
9 | def __init__(self):
10 | self.w = None
11 |
12 | def covariance_matrix(self, X):
13 | """
14 | :param X: Input tensor
15 | :return: cavariance of input tensor
16 | """
17 | centering_X = X - torch.mean(X, dim=0)
18 | cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1)
19 | return cov
20 |
21 | def fit(self, X, y):
22 | """
23 | :param X: Input tensor
24 | :param y: output tensor
25 | :return: transformation vector - to convert high dimensional input space into lower dimensional
26 | subspace.
27 | X1, X2 are samples based on class. cov_1 and cov_2 measures how features of samples of each class are related.
28 |
29 | """
30 | X1 = X[y==0]
31 | X2 = X[y==1]
32 | cov_1 = self.covariance_matrix(X1)
33 | cov_2 = self.covariance_matrix(X2)
34 | cov_total = cov_1 + cov_2
35 | mean1 = torch.mean(X1, dim=0)
36 | mean2 = torch.mean(X2, dim=0)
37 | mean_diff = mean1 - mean2
38 |
39 | # Determine the vector which when X is projected onto it best separates the
40 | # data by class. w = (mean1 - mean2) / (cov1 + cov2)
41 | self.w = torch.mm(torch.pinverse(cov_total), mean_diff.unsqueeze(1))
42 |
43 | def transform(self, X, y):
44 | self.fit(X, y)
45 | X_transformed = torch.mm(X, self.w)
46 | return X_transformed
47 |
48 | def predict(self, X):
49 | y_pred = []
50 | for sample in X:
51 | h = torch.mm(sample.unsqueeze(0), self.w)
52 | y = 1 * (h < 0)
53 | y_pred.append(y)
54 |
55 | return y_pred
56 |
57 | if __name__ == '__main__':
58 | breast_cancer = load_breast_cancer()
59 | X = breast_cancer.data
60 | X_normalized = MinMaxScaler().fit_transform(X)
61 | X = torch.tensor(X_normalized)
62 | y = torch.tensor(breast_cancer.target)#.unsqueeze(1)
63 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
64 | lda = LDA()
65 | X_transformed = lda.transform(x_train, y_train)
66 | GNB = NaiveBayes(X_transformed, y_train)
67 | GNB.find_mu_and_sigma(X_transformed, y_train)
68 | X_test_transformed = lda.transform(x_test, y_test)
69 | y_pred = GNB.predict_probability(X_test_transformed)
70 | print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}')
71 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Machine Learning Using Pytorch
2 |
3 |
4 |
5 | The objective of the repository is to learn and build machine learning models using Pytorch.
6 |
7 | 
8 | 
9 | 
10 | 
11 |
12 | 
13 | 
14 | 
15 | 
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | 
25 |
26 | **List of Algorithms Covered**
27 |
28 | 📌 Day 1 - Linear Regression \
29 | 📌 Day 2 - Logistic Regression \
30 | 📌 Day 3 - Decision Tree \
31 | 📌 Day 4 - KMeans Clustering \
32 | 📌 Day 5 - Naive Bayes \
33 | 📌 Day 6 - K Nearest Neighbour (KNN) \
34 | 📌 Day 7 - Support Vector Machine \
35 | 📌 Day 8 - Tf-Idf Model \
36 | 📌 Day 9 - Principal Components Analysis \
37 | 📌 Day 10 - Lasso and Ridge Regression \
38 | 📌 Day 11 - Gaussian Mixture Model \
39 | 📌 Day 12 - Linear Discriminant Analysis \
40 | 📌 Day 13 - Adaboost Algorithm \
41 | 📌 Day 14 - DBScan Clustering \
42 | 📌 Day 15 - Multi-Class LDA \
43 | 📌 Day 16 - Bayesian Regression \
44 | 📌 Day 17 - K-Medoids \
45 | 📌 Day 18 - TSNE \
46 | 📌 Day 19 - ElasticNet Regression \
47 | 📌 Day 20 - Spectral Clustering \
48 | 📌 Day 21 - Latent Dirichlet \
49 | 📌 Day 22 - Affinity Propagation \
50 | 📌 Day 23 - Gradient Descent Algorithm \
51 | 📌 Day 24 - Regularization Techniques \
52 | 📌 Day 25 - RANSAC Algorithm \
53 | 📌 Day 26 - Normalizations \
54 | 📌 Day 27 - Multi-Layer Perceptron \
55 | 📌 Day 28 - Activations \
56 | 📌 Day 29 - Optimizers \
57 | 📌 Day 30 - Loss Functions
58 |
59 | ### Let me know if there is any correction. Feedback is welcomed.
60 |
61 | ## References
62 |
63 | * Sklearn Library
64 | * ML-Glossary
65 | * ML From Scratch (Github)
66 |
--------------------------------------------------------------------------------
/Day-21-LatentDirichlet/LDA_TopicModeling.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from sklearn.datasets import fetch_20newsgroups
5 | from sklearn.feature_extraction.text import CountVectorizer
6 |
7 | class LatentDirichlet:
8 | def __init__(self, D, V, T):
9 | self.D = len(D)
10 | self.V = len(V)
11 | self.T = T
12 | self.alpha = 1 / T
13 | self.beta = 1 / T
14 |
15 | def fit_transform(self, documents):
16 | z_d_n = [[0 for _ in range(len(d))] for d in documents]
17 | theta_d_z = torch.zeros((self.D, self.T))
18 | phi_z_w = torch.zeros((self.T, self.V))
19 | n_z = torch.zeros((self.T))
20 | n_d = torch.zeros((self.D))
21 |
22 | for d, doc in enumerate(documents):
23 | for n, w in enumerate(doc):
24 | z_d_n[d][n] = n % self.T
25 | z = z_d_n[d][n]
26 | theta_d_z[d][z] += 1
27 | phi_z_w[z, w] += 1
28 | n_z[z] += 1
29 | n_d[d] += 1
30 |
31 | for iter in range(10):
32 | for d, doc in enumerate(documents):
33 | for n,w in enumerate(doc):
34 | z = z_d_n[d][n]
35 | theta_d_z[d][z] -= 1
36 | phi_z_w[z, w] -= 1
37 | n_z[z] -= 1
38 | p_d_t = (theta_d_z[d] + self.alpha) / (n_d[d] - 1 + self.T * self.alpha)
39 | p_t_w = (phi_z_w[:, w] + self.beta) / (n_z + self.V * self.beta)
40 | p_z = p_d_t * p_t_w
41 | p_z /= torch.sum(p_z)
42 | new_z = torch.multinomial(p_z, 1)
43 | z_d_n[d][n] = new_z[0]
44 | theta_d_z[d][new_z] += 1
45 | phi_z_w[new_z, w] += 1
46 | n_z[new_z] += 1
47 |
48 | return theta_d_z, phi_z_w
49 |
50 | if __name__ == '__main__':
51 | n_samples = 10000
52 | documents = []
53 | data, _ = fetch_20newsgroups(shuffle=True, random_state=2,
54 | remove=('headers', 'footers', 'quotes'), return_X_y=True)
55 | data_samples = data[:n_samples]
56 | cnt_vectorizer = CountVectorizer(max_df=0.95, min_df=2,
57 | max_features=10000,
58 | stop_words='english')
59 | vectorizer = cnt_vectorizer.fit_transform(data_samples)
60 | vocabulary = cnt_vectorizer.vocabulary_
61 | for row in vectorizer.toarray():
62 | present_words = np.where(row != 0)[0].tolist()
63 | present_words_with_count = []
64 | for w_i in present_words:
65 | for count in range(row[w_i]):
66 | present_words_with_count.append(w_i)
67 | documents.append(present_words_with_count)
68 |
69 | LD = LatentDirichlet(D=documents, V=vocabulary, T=20)
70 | topic_distribution, word_distribution = LD.fit_transform(documents)
71 | i = 1
72 | plt.plot(topic_distribution[i] / sum(topic_distribution[i]));
73 | plt.title("Topic distribution $theta_i$ for document {}".format(i));
74 | plt.show()
75 |
--------------------------------------------------------------------------------
/Day-28-Activations/activation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.datasets import load_digits
3 | from sklearn.model_selection import train_test_split
4 | from MLP import MultiLayerPerceptron, CrossEntropy, normalization, accuracy_score, to_categorical
5 |
6 | class Sigmoid:
7 | def __call__(self, X):
8 | return 1 / (1 + torch.exp(-X))
9 |
10 | def gradient(self, X):
11 | return self.__call__(X) * (1 - self.__call__(X))
12 |
13 | class Softmax:
14 | def __call__(self, X):
15 | e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True).values)
16 | return e_x / torch.sum(e_x, dim=1, keepdim=True)
17 |
18 | def gradient(self, X):
19 | p = self.__call__(X)
20 | return p * (1 - p)
21 |
22 | class TanH:
23 | def __call__(self, X):
24 | return 2 / (1 + torch.exp(-2 * X)) - 1
25 |
26 | def gradient(self,X):
27 | return 1 - torch.pow(self.__call__(X), 2)
28 |
29 | class Relu:
30 | def __call__(self, X):
31 | return torch.where(X>0.0, X, 0.0)
32 |
33 | def gradient(self, X):
34 | return torch.where(X >=0.0, 1.0, 0.0)
35 |
36 | class LeakyRelu:
37 | def __init__(self, alpha):
38 | self.alpha = alpha
39 |
40 | def __call__(self, X):
41 | return torch.where(X > 0.0, X, self.alpha * X)
42 |
43 | def gradient(self, X):
44 | return torch.where(X > 0.0, 1.0, self.alpha)
45 |
46 | class ELU:
47 | def __init__(self, alpha):
48 | self.alpha = alpha
49 |
50 | def __call__(self, X):
51 | return torch.where(X>=0.0, X, self.alpha * (torch.exp(X) - 1))
52 |
53 | def gradient(self, X):
54 | return torch.where(X >= 0.0, 1.0, self.__call__(X) + self.alpha)
55 |
56 | class SELU():
57 | def __init__(self):
58 | self.alpha = 1.6732632423543772848170429916717
59 | self.scale = 1.0507009873554804934193349852946
60 |
61 | def __call__(self, x):
62 | return self.scale * torch.where(x >= 0.0, x, self.alpha*(torch.exp(x)-1))
63 |
64 | def gradient(self, x):
65 | return self.scale * torch.where(x >= 0.0, 1.0, self.alpha * torch.exp(x))
66 |
67 | class SoftPlus():
68 | def __call__(self, x):
69 | return torch.log(1 + torch.exp(x))
70 |
71 | def gradient(self, x):
72 | return 1 / (1 + torch.exp(-x))
73 |
74 | if __name__ == '__main__':
75 | data = load_digits()
76 | X = normalization(torch.tensor(data.data, dtype=torch.double))
77 | y = torch.tensor(data.target)
78 |
79 | # Convert the nominal y values to binary
80 | y = to_categorical(y)
81 |
82 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
83 | # MLP
84 | clf = MultiLayerPerceptron(n_hidden=16,
85 | n_iterations=1000,
86 | learning_rate=0.01, activation_function_hidden_layer=Sigmoid(),
87 | activation_function_output_layer=Softmax())
88 |
89 | clf.fit(X_train, y_train)
90 | y_pred = torch.argmax(clf.predict(X_test), dim=1)
91 | y_test = torch.argmax(y_test, dim=1)
92 |
93 | accuracy = accuracy_score(y_test, y_pred)
94 | print("Accuracy:", accuracy)
95 |
--------------------------------------------------------------------------------
/Day-15-MultiClassLDA/multi-class-LDA.py:
--------------------------------------------------------------------------------
1 | """
2 | Checkout below url on Multi-Class LDA
3 | https://multivariatestatsjl.readthedocs.io/en/latest/mclda.html
4 | """
5 | import torch
6 | from sklearn.datasets import load_iris
7 | import matplotlib.pyplot as plt
8 |
9 | class MultiClassLDA:
10 | def __init__(self, solver='svd'):
11 | self.solver = solver
12 |
13 | def covariance_matrix(self, X):
14 | """
15 | :param X: Input tensor
16 | :return: cavariance of input tensor
17 | """
18 | centering_X = X - torch.mean(X, dim=0)
19 | cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1)
20 | return cov
21 |
22 | def scatter_matrix(self, X, y):
23 | """
24 | :param X: Input tensor
25 | :param y: Output tensor
26 | :return: How features are related to each other in within-class distribution and between class distribution
27 | """
28 | n_features = X.shape[1]
29 | labels = y.unique()
30 |
31 | # Within-Class Scatter Matrix
32 | sw = torch.zeros((n_features, n_features))
33 | for label in labels:
34 | X_class = X[y==label]
35 | sw += (X_class.shape[0] - 1) * self.covariance_matrix(X_class)
36 |
37 | # Between-Class Scatter Matrix
38 | n_samples_mean = torch.sum(X, dim=0)
39 | sb = torch.zeros((n_features, n_features))
40 | for label in labels:
41 | X_class = X[y==label]
42 | mean_class = torch.mean(X_class, dim=0).unsqueeze(0)
43 |
44 | sb += (X_class.shape[0]) * torch.mm((mean_class - n_samples_mean), (mean_class - n_samples_mean).T)
45 |
46 | return sw, sb
47 |
48 | def transform(self, X, y, n_components):
49 | """
50 | And Why Inverse, In matrices, there is no concepts of division, thus multiplying with inverse
51 | matrix helps in acheiving what division does.
52 | :param X:
53 | :param y:
54 | :param n_components: Transforming from high dimension data to lower dimension n_components.
55 | :return: Transformed set of low dimensional X matrix
56 | """
57 | sw, sb = self.scatter_matrix(X, y)
58 | A = torch.mm(torch.pinverse(sw), sb)
59 | eigen_values, eigen_vectors = torch.eig(A, eigenvectors=True)
60 | eigen_sorted_index = torch.argsort(eigen_values[:, 0], descending=True)
61 | eigen_vectors_sorted = eigen_vectors[:, eigen_sorted_index]
62 | component_vector = eigen_vectors_sorted[:, 0:n_components]
63 | component_vector = component_vector.type(torch.DoubleTensor)
64 | transformed = torch.mm(X, component_vector)
65 | return transformed
66 |
67 | def plot_in_2d(self, X, y, title=None):
68 | """ Plot the dataset X and the corresponding labels y in 2D using the LDA
69 | transformation."""
70 | X_transformed = self.transform(X, y, n_components=2)
71 | x1 = X_transformed[:, 0]
72 | x2 = X_transformed[:, 1]
73 | plt.scatter(x1, x2, c=y)
74 | if title: plt.title(title)
75 | plt.show()
76 |
77 |
78 | if __name__ == '__main__':
79 | data = load_iris()
80 | X = torch.tensor(data.data, dtype=torch.double)
81 | y = torch.tensor(data.target)
82 | mclda = MultiClassLDA()
83 | mclda.plot_in_2d(X, y)
84 |
--------------------------------------------------------------------------------
/Day-02-Logistic-Regression/LogisticRegression.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.datasets import make_blobs
3 |
4 | class LogisticRegression:
5 | def __init__(self, X):
6 | """
7 | :param X: Input tensor
8 | :keyword lr: learning rate
9 | :keyword epochs: number of times the model iterates over complete dataset
10 | :keyword weights: parameters learned during training
11 | :keyword bias: parameter learned during training
12 | """
13 | self.lr = 0.1
14 | self.epochs = 1000
15 | self.m, self.n = X.shape
16 | self.weights = torch.zeros((self.n, 1), dtype=torch.double)
17 | self.bias = 0
18 |
19 | def sigmoid(self, z):
20 | """
21 | :param z: latent variable represents (wx + b)
22 | :return: squashes the real value between 0 and 1 representing probability score.
23 | """
24 | return 1 / (1 + torch.exp(-z))
25 |
26 | def loss(self, yhat):
27 | """
28 | :param yhat: Estimated y
29 | :return: Log loss - When y=1, it cancels out half function, remaining half is considered for loss calculation and vice-versa
30 | """
31 | return -(1 / self.m) * torch.sum(y * torch.log(yhat) + (1 - y) * torch.log(1 - yhat))
32 |
33 | def gradient(self, y_predict):
34 | """
35 | :param y_predict: Estimated y
36 | :return: gradient is calculated to find how much change is required in parameters to reduce the loss.
37 | """
38 | dw = 1 / self.m * torch.mm(X.T, (y_predict - y))
39 | db = 1 / self.m * torch.sum(y_predict - y)
40 | return dw, db
41 |
42 | def run(self, X, y):
43 | """
44 | :param X: Input tensor
45 | :param y: Output tensor
46 | :var y_predict: Predicted tensor
47 | :var cost: Difference between ground truth and predicted
48 | :var dw, db: Weight and bias update for weight tensor and bias scalar
49 | :return: updated weights and bias
50 | """
51 | for epoch in range(1, self.epochs + 1):
52 |
53 | y_predict = self.sigmoid(torch.mm(X, self.weights) + self.bias)
54 | cost = self.loss(y_predict)
55 | dw, db = self.gradient(y_predict)
56 |
57 | self.weights -= self.lr * dw
58 | self.bias -= self.lr * db
59 |
60 | if epoch % 100 == 0:
61 | print(f"Cost after iteration {epoch}: {cost}")
62 |
63 | return self.weights, self.bias
64 |
65 | def predict(self, X):
66 | """
67 | :param X: Input tensor
68 | :var y_predict_labels: Converts float value to int/bool true(1) or false(0)
69 | :return: outputs labels as 0 and 1
70 | """
71 | y_predict = self.sigmoid(torch.mm(X, self.weights) + self.bias)
72 | y_predict_labels = y_predict > 0.5
73 |
74 | return y_predict_labels
75 |
76 | if __name__ == '__main__':
77 | """
78 | :var manual_seed: for reproducing the results
79 | :desc unsqueeze: adds a dimension to the tensor at specified position.
80 | """
81 | torch.manual_seed(0)
82 | X, y = make_blobs(n_samples=1000, centers=2)
83 | X = torch.tensor(X)
84 | y = torch.tensor(y).unsqueeze(1)
85 | lr = LogisticRegression(X)
86 | w, b = lr.run(X, y)
87 | y_predict = lr.predict(X)
88 |
89 | print(f"Accuracy: {torch.sum(y == y_predict) // X.shape[0]}")
90 |
--------------------------------------------------------------------------------
/Day-22-AffinityPropagation/AffinityPropagation.py:
--------------------------------------------------------------------------------
1 | """
2 | Reading: https://en.wikipedia.org/wiki/Affinity_propagation
3 | Applicable: Human Face Clustering
4 | """
5 | import torch
6 | from scipy.spatial.distance import pdist,squareform
7 | from sklearn.datasets import load_breast_cancer
8 |
9 | class AffinityPropagation:
10 | def __init__(self, similariy_matrix, max_iteration=200, num_iter=5, alpha=0.5, print_every=100):
11 | """
12 | :param similariy_matrix:
13 | :param max_iteration:
14 | :param num_iter:
15 | :param alpha:
16 | :param print_every:
17 | """
18 | self.s = similariy_matrix
19 | self.max_iteration = max_iteration
20 | self.alpha = alpha
21 | self.print_every = print_every
22 | N, N = self.s.shape
23 | self.r = torch.zeros((N, N))
24 | self.a = torch.zeros((N, N))
25 |
26 | def step(self):
27 | """
28 | :param r is responsiblity matrix, For each data point x_i, how well-suited is x_k as it exempler along with
29 | other exemplars.
30 | :param a is availability matrix, For appropriate is x_k as exemplers for x_i, while keeping other data points
31 | who keeps x_k as exemplar.
32 | :return:
33 | """
34 | N, N = self.s.shape
35 | old_r = self.r
36 | old_a = self.a
37 | a_plus_s = self.a + self.s
38 |
39 | first_max = torch.max(a_plus_s, dim=1)
40 | first_max_indices = torch.argmax(a_plus_s, dim=1)
41 | first_max = torch.reshape(torch.repeat_interleave(first_max.values, N), (N, N))
42 | a_plus_s[range(N), first_max_indices] = float('-inf')
43 | second_max = torch.max(a_plus_s, dim=1).values
44 | # responsibility Update
45 | r = self.s - first_max
46 | r[range(N), first_max_indices] = self.s[range(N), first_max_indices] - second_max[range(N)]
47 | r = self.alpha * old_r + (1 - self.alpha) * r
48 | rp = torch.maximum(r, torch.scalar_tensor(0))
49 | m = rp.size(0)
50 | rp.as_strided([m], [m + 1]).copy_(torch.diag(r))
51 | a = torch.reshape(torch.repeat_interleave(torch.sum(rp, dim=0), N),(N, N)).T - rp
52 | da = torch.diag(a)
53 | a = torch.minimum(a, torch.scalar_tensor(0))
54 | k = a.size(0)
55 | a.as_strided([k], [k+1]).copy_(da)
56 | # Availibility Update
57 | a = self.alpha * old_a + (1 - self.alpha) * a
58 |
59 | return r, a
60 |
61 | def solve(self):
62 | for i in range(self.max_iteration):
63 | self.r, self.a = self.step()
64 |
65 | e = self.r + self.a
66 |
67 | N, N = e.shape
68 | I = torch.where(torch.diag(e) > 0)[0]
69 | K = len(I)
70 |
71 | c = self.s[:, I]
72 | c = torch.argmax(c, dim=1)
73 | c[I] = torch.arange(0, K)
74 | idx = I[c]
75 | exemplar_indices = I
76 | exemplar_assignment = idx
77 | return exemplar_indices, exemplar_assignment
78 |
79 | if __name__ == "__main__":
80 | """
81 | :param similarity_matrix
82 | It finds the similarity between data points.
83 |
84 | """
85 | data = load_breast_cancer()
86 | x = torch.tensor(data.data, dtype=torch.double)
87 |
88 | similarity_matrix = squareform(pdist(x, metric='euclidean'))
89 | similarity_matrix = torch.from_numpy(similarity_matrix)
90 | max_iteration = 3000
91 | affinity_prop = AffinityPropagation(similarity_matrix, max_iteration=max_iteration,
92 | alpha=0.5)
93 | indices, assignment = affinity_prop.solve()
94 |
--------------------------------------------------------------------------------
/Day-12-LDA/NaiveBayes.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from sklearn.datasets import load_iris
4 | from sklearn.metrics import accuracy_score
5 | from sklearn.model_selection import train_test_split
6 |
7 | class NaiveBayes:
8 | def __init__(self, X, y):
9 | """
10 | :param X: input tensor
11 | :param y: target tensor
12 | :var total_samples: Number of Samples
13 | :var feature_count: Number of Features
14 | :var mu: mean
15 | :var sigma: variance
16 | :var e: epsilon
17 | :var n_classes: number of classes
18 |
19 | why e - epsilon ?
20 | # If the ratio of data variance between dimensions is too small, it
21 | # will cause numerical errors. To address this, we artificially
22 | # boost the variance by epsilon, a small fraction of the standard
23 | # deviation of the largest dimension.
24 | """
25 | self.total_samples, self.feature_count = X.shape[0], X.shape[1]
26 | self.mu = {}
27 | self.sigma = {}
28 | self.prior_probability_X = {}
29 | self.e = 1e-4
30 | self.n_classes = len(y.unique())
31 |
32 | def find_mu_and_sigma(self, X, y):
33 | """
34 | Bayes Theorem:
35 | P(Y|X) = P(X|Y) * P(Y) / P(X)
36 |
37 | :type mu: dict
38 | :type sigma: dict
39 | :type prior_probability: dict
40 | :describe mu: keys are class label and values are feature's mean values.
41 | :describe sigma: keys are class label and values are feature's variance values.
42 | :describe prior probability of x: It calculates the prior prabability of X for each class. P(X).
43 | :return:
44 | """
45 | for cls in range(self.n_classes):
46 | X_class = X[y==cls]
47 | self.mu[cls] = torch.mean(X_class, dim=0)
48 | self.sigma[cls] = torch.var(X_class, dim=0)
49 | self.prior_probability_X[cls] = X_class.shape[0] / X.shape[0]
50 |
51 | def gaussian_naive_bayes(self, X, mu, sigma):
52 | """
53 | :return: Multivariate normal(gaussian) distribution - Maximum Likelihood Estimation
54 | https://www.statlect.com/fundamentals-of-statistics/multivariate-normal-distribution-maximum-likelihood
55 |
56 | Log Likelihood Function = Constant - probability
57 | """
58 | constant = -self.feature_count / 2 * torch.log(2 * torch.tensor(np.pi)) - 0.5 * torch.sum(torch.log(sigma+self.e))
59 | probability = 0.5 * torch.sum(torch.pow(X-mu, 2) / (sigma + self.e), dim=1)
60 | return constant - probability
61 |
62 | def predict_probability(self, X):
63 | """
64 | Calculating probabilities for each sample input in X using prior probability
65 | and gaussian density function.
66 | torch.argmax: To find the class with max-probability.
67 | Note: We are calculate log probabilities as in Sklearn's predict_log_proba, that why we have + sign between
68 | prior probabilites and likelihood (class probability).
69 |
70 | :return:
71 | """
72 | probabilities = torch.zeros((X.shape[0], self.n_classes))
73 | for cls in range(self.n_classes):
74 | class_probability = self.gaussian_naive_bayes(X, self.mu[cls], self.sigma[cls])
75 | probabilities[:, cls] = class_probability + torch.log(torch.scalar_tensor(self.prior_probability_X[cls]))
76 |
77 |
78 | return torch.argmax(probabilities, dim=1)
79 |
80 | if __name__ == '__main__':
81 | iris = load_iris()
82 | X = torch.tensor(iris.data)
83 | y = torch.tensor(iris.target)
84 | torch.manual_seed(0)
85 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
86 | GNB = NaiveBayes(x_train, y_train)
87 | GNB.find_mu_and_sigma(x_train, y_train)
88 | y_pred = GNB.predict_probability(x_test)
89 | print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}')
90 |
--------------------------------------------------------------------------------
/Day-28-Activations/MLP.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.datasets import load_digits
3 | from sklearn.model_selection import train_test_split
4 | def accuracy_score(y, p):
5 | accuracy = torch.sum(y == p, dim=0) / len(y)
6 | return accuracy
7 |
8 | def to_categorical(X, n_col=None):
9 | if not n_col:
10 | n_col = torch.amax(X) + 1
11 |
12 | one_hot = torch.zeros((X.shape[0], n_col))
13 | one_hot[torch.arange(X.shape[0]), X] = 1
14 | return one_hot
15 |
16 | def normalization(X):
17 | """
18 | :param X: Input tensor
19 | :return: Normalized input using l2 norm.
20 | """
21 | l2 = torch.norm(X, p=2, dim=-1)
22 | l2[l2 == 0] = 1
23 | return X / l2.unsqueeze(1)
24 |
25 | class CrossEntropy:
26 | def __init__(self):
27 | pass
28 | def loss(self, y, p):
29 | p = torch.clip(p, 1e-15, 1-1e-15)
30 | return - y * torch.log(p) - (1 -y) * torch.log(1 - p)
31 |
32 | def accuracy_score(self, y, p):
33 | return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1))
34 |
35 | def gradient(self, y, p):
36 | p = torch.clip(p, 1e-15, 1 - 1e-15)
37 | return - (y / p) + (1 - y) / (1 -p)
38 | class MultiLayerPerceptron:
39 | def __init__(self, n_hidden, activation_function_hidden_layer, activation_function_output_layer, n_iterations=1000, learning_rate=0.001):
40 | self.n_hidden = n_hidden
41 | self.n_iterations = n_iterations
42 | self.learning_rate = learning_rate
43 | self.hidden_activation = activation_function_hidden_layer
44 | self.output_activation = activation_function_output_layer
45 | self.loss = CrossEntropy()
46 |
47 | def initalize_weight(self, X, y):
48 | n_samples, n_features = X.shape
49 | _, n_outputs = y.shape
50 | limit = 1 / torch.sqrt(torch.scalar_tensor(n_features))
51 | self.W = torch.DoubleTensor(n_features, self.n_hidden).uniform_(-limit, limit)
52 |
53 | self.W0 = torch.zeros((1, self.n_hidden))
54 | limit = 1 / torch.sqrt(torch.scalar_tensor(self.n_hidden))
55 | self.V = torch.DoubleTensor(self.n_hidden, n_outputs).uniform_(-limit, limit)
56 | self.V0 = torch.zeros((1, n_outputs))
57 |
58 | def fit(self, X, y):
59 | self.initalize_weight(X, y)
60 | for i in range(self.n_iterations):
61 | hidden_input = torch.mm(X, self.W) + self.W0
62 | hidden_output = self.hidden_activation(hidden_input)
63 |
64 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0
65 | y_pred = self.output_activation(output_layer_input)
66 |
67 | grad_wrt_first_output = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input)
68 | grad_v = torch.mm(hidden_output.T, grad_wrt_first_output)
69 | grad_v0 = torch.sum(grad_wrt_first_output, dim=0, keepdim=True)
70 |
71 | grad_wrt_first_hidden = torch.mm(grad_wrt_first_output, self.V.T) * self.hidden_activation.gradient(hidden_input)
72 | grad_w = torch.mm(X.T, grad_wrt_first_hidden)
73 | grad_w0 = torch.sum(grad_wrt_first_hidden, dim=0, keepdim=True)
74 |
75 | # Update weights (by gradient descent)
76 | # Move against the gradient to minimize loss
77 | self.V -= self.learning_rate * grad_v
78 | self.V0 -= self.learning_rate * grad_v0
79 | self.W -= self.learning_rate * grad_w
80 | self.W0 -= self.learning_rate * grad_w0
81 |
82 | # Use the trained model to predict labels of X
83 |
84 | def predict(self, X):
85 | # Forward pass:
86 | hidden_input = torch.mm(X,self.W) + self.W0
87 | hidden_output = self.hidden_activation(hidden_input)
88 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0
89 | y_pred = self.output_activation(output_layer_input)
90 | return y_pred
91 |
92 |
93 |
94 |
95 |
--------------------------------------------------------------------------------
/Day-05-Naive-Bayes/NaiveBayes.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from sklearn.datasets import load_iris
4 | from sklearn.metrics import accuracy_score
5 | from sklearn.model_selection import train_test_split
6 |
7 | class NaiveBayes:
8 | def __init__(self, X, y):
9 | """
10 | why e - epsilon ?
11 | # If the ratio of data variance between dimensions is too small, it
12 | # will cause numerical errors. To address this, we artificially
13 | # boost the variance by epsilon, a small fraction of the standard
14 | # deviation of the largest dimension.
15 |
16 | :param X: input tensor
17 | :param y: target tensor
18 | :var total_samples: Number of Samples
19 | :var feature_count: Number of Features
20 | :var mu: mean
21 | :var sigma: variance
22 | :var e: epsilon
23 | :var n_classes: number of classes
24 | """
25 | self.total_samples, self.feature_count = X.shape[0], X.shape[1]
26 | self.mu = {}
27 | self.sigma = {}
28 | self.prior_probability_X = {}
29 | self.e = 1e-4
30 | self.n_classes = len(y.unique())
31 |
32 | def find_mu_and_sigma(self, X, y):
33 | """
34 | Bayes Theorem:
35 | P(Y|X) = P(X|Y) * P(Y) / P(X)
36 |
37 | :type mu: dict
38 | :type sigma: dict
39 | :type prior_probability: dict
40 | :describe mu: keys are class label and values are feature's mean values.
41 | :describe sigma: keys are class label and values are feature's variance values.
42 | :describe prior probability of x: It calculates the prior prabability of X for each class. P(X).
43 | :return:
44 | """
45 | for cls in range(self.n_classes):
46 | X_class = X[y==cls]
47 | self.mu[cls] = torch.mean(X_class, dim=0)
48 | self.sigma[cls] = torch.var(X_class, dim=0)
49 | self.prior_probability_X[cls] = X_class.shape[0] / X.shape[0]
50 |
51 | def gaussian_naive_bayes(self, X, mu, sigma):
52 | """
53 | :return: Multivariate normal(gaussian) distribution - Maximum Likelihood Estimation
54 | https://www.statlect.com/fundamentals-of-statistics/multivariate-normal-distribution-maximum-likelihood
55 |
56 | Log Likelihood Function = Constant - probability
57 | """
58 | constant = - self.feature_count / 2 * torch.log(2 * torch.tensor(np.pi)) - 0.5 * torch.sum(torch.log(sigma+self.e))
59 | probability = 0.5 * torch.sum(torch.pow(X-mu, 2) / (sigma + self.e), dim=1)
60 | return constant - probability
61 |
62 | def predict_probability(self, X):
63 | """
64 | Calculating probabilities for each sample input in X using prior probability
65 | and gaussian density function.
66 |
67 | torch.argmax: To find the class with max-probability.
68 |
69 | Note: We are calculate log probabilities as in Sklearn's predict_log_proba, that why we have + sign between
70 | prior probabilites and likelihood (class probability).
71 |
72 | :return:
73 | """
74 | probabilities = torch.zeros((X.shape[0], self.n_classes))
75 | for cls in range(self.n_classes):
76 | class_probability = self.gaussian_naive_bayes(X, self.mu[cls], self.sigma[cls])
77 | probabilities[:, cls] = class_probability + torch.log(torch.scalar_tensor(self.prior_probability_X[cls]))
78 |
79 |
80 | return torch.argmax(probabilities, dim=1)
81 |
82 | if __name__ == '__main__':
83 | iris = load_iris()
84 | X = torch.tensor(iris.data)
85 | y = torch.tensor(iris.target)
86 | torch.manual_seed(0)
87 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
88 | GNB = NaiveBayes(x_train, y_train)
89 | GNB.find_mu_and_sigma(x_train, y_train)
90 | y_pred = GNB.predict_probability(x_test)
91 | print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}')
92 |
--------------------------------------------------------------------------------
/Day-29-Optimizers/optimizer.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | class StochasticGradientDescentWithMomentum:
4 | def __init__(self, learning_rate=0.001, momentum=0):
5 | self.lr = learning_rate
6 | self.momentum = momentum
7 | self.w_update = None
8 |
9 | def update(self, w, gradient_wrt_w):
10 | if self.w_update is None:
11 | self.w_update = torch.zeros(w.shape)
12 |
13 | self.w_update = self.momentum * self.w_update + (1 - self.momentum) * gradient_wrt_w
14 | return w - self.lr * self.w_update
15 |
16 | class NesterovAcceleratedGradient:
17 | def __init__(self, learning_rate=0.001, momentum=0.4):
18 | self.lr = learning_rate
19 | self.momentum = momentum
20 | self.w_update = torch.tensor([])
21 |
22 | def update(self, w, gradient_function):
23 | approx_future_gradient = torch.clip(gradient_function(w - self.momentum * self.w_update), -1, 1)
24 |
25 | if not self.w_update.any():
26 | self.w_update = torch.zeros(w.shape)
27 |
28 | self.w_update = self.momentum * self.w_update + self.lr * approx_future_gradient
29 | return w - self.w_update
30 |
31 | class Adagrad:
32 | def __init__(self, learning_rate=0.01):
33 | self.lr = learning_rate
34 | self.G = None
35 | self.eps = 1e-8
36 |
37 | def update(self, w, gradient_wrt_w):
38 | if self.G is None:
39 | self.G = torch.zeros(w.shape)
40 |
41 | self.G += torch.pow(gradient_wrt_w, 2)
42 | return w - self.lr * gradient_wrt_w / torch.sqrt(self.G + self.eps)
43 |
44 | class Adadelta:
45 | def __init__(self, rho=0.95, eps=1e-6):
46 | self.E_W_update = None
47 | self.E_gradient = None
48 | self.w_update = None
49 | self.eps = eps
50 | self.rho = rho
51 |
52 | def update(self, w, gradient_wrt_w):
53 | if self.w_update is None:
54 | self.w_update = torch.zeros(w.shape)
55 | self.E_gradient = torch.zeros(gradient_wrt_w.shape)
56 | self.E_W_update = torch.zeros(w.shape)
57 |
58 | self.E_gradient = self.rho * self.E_gradient + (1 - self.rho) * torch.pow(gradient_wrt_w, 2)
59 | RMS_Delta_W = torch.sqrt(self.E_W_update + self.eps)
60 | RMS_gradient = torch.sqrt(self.E_gradient + self.eps)
61 |
62 | adaptive_lr = RMS_Delta_W / RMS_gradient
63 | self.w_update = adaptive_lr * gradient_wrt_w
64 | self.E_W_update = self.rho * self.E_W_update + (1 - self.rho) * torch.pow(self.w_update, 2)
65 | return w - self.w_update
66 |
67 | class RMSprop:
68 | def __init__(self, learning_rate=0.01, rho=0.9):
69 | self.lr = learning_rate
70 | self.Eg = None
71 | self.eps = 1e-8
72 | self.rho = rho
73 |
74 | def update(self, w, gradient_wrt_w):
75 | if self.Eg is None:
76 | self.Eg = torch.zeros(gradient_wrt_w.shape)
77 |
78 | self.Eg = self.rho * self.Eg + (1 - self.rho) * torch.pow(gradient_wrt_w, 2)
79 | return w - self.lr * gradient_wrt_w / torch.sqrt(self.Eg + self.eps)
80 |
81 | class Adam:
82 | def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999):
83 | self.lr = learning_rate
84 | self.eps = 1e-8
85 | self.m = None
86 | self.v = None
87 | self.b1 = b1
88 | self.b2 = b2
89 |
90 | def update(self, w, gradient_wrt_w):
91 | if self.m is None:
92 | self.m = torch.zeros(gradient_wrt_w.shape)
93 | self.v = torch.zeros(gradient_wrt_w.shape)
94 |
95 | self.m = self.b1 * self.m + (1 - self.b1) * gradient_wrt_w
96 | self.v = self.b2 * self.v + (1 - self.b2) * torch.pow(gradient_wrt_w, 2)
97 |
98 | m_hat = self.m / (1 - self.b1)
99 | v_hat = self.v / (1 - self.b2)
100 |
101 | self.w_update = self.lr * m_hat / torch.sqrt(v_hat) + self.eps
102 |
103 | return w - self.w_update
104 |
--------------------------------------------------------------------------------
/Day-07-SVM/svm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.preprocessing import MinMaxScaler
3 | from sklearn.model_selection import train_test_split
4 | from sklearn.datasets import load_breast_cancer
5 | from sklearn.metrics import accuracy_score
6 | from sklearn.utils import shuffle
7 |
8 | class SVM:
9 | def __init__(self, X, y, C=1.0):
10 | self.total_samples, self.features_count = X.shape[0], X.shape[1]
11 | self.n_classes = len(y.unique())
12 | self.learning_rate = 0.001
13 | self.C = C
14 |
15 | def loss(self, X, W, y):
16 | """
17 | C parameter tells the SVM optimization how much you want to avoid misclassifying each training
18 | example. For large values of C, the optimization will choose a smaller-margin hyperplane if that
19 | hyperplane does a better job of getting all the training points classified correctly. Conversely,
20 | a very small value of C will cause the optimizer to look for a larger-margin separating hyperplane,
21 | even if that hyperplane misclassifies more points. For very tiny values of C, you should get
22 | misclassified examples, often even if your training data is linearly separable.
23 |
24 | :param X:
25 | :param W:
26 | :param y:
27 | :return:
28 | """
29 | num_samples = X.shape[0]
30 | distances = 1 - y * (torch.mm(X, W.T))
31 |
32 | distances[distances < 0] = 0
33 | hinge_loss = self.C * (torch.sum(distances) // num_samples)
34 | cost = 1 / 2 * torch.mm(W, W.T) + hinge_loss
35 | return cost
36 |
37 | def gradient_update(self, W, X, y):
38 | """
39 | :param W: Weight Matrix
40 | :param X: Input Tensor
41 | :param y: Ground truth tensor
42 | :return: change in weight
43 | """
44 | distance = 1 - (y * torch.mm(X, W.T))
45 | dw = torch.zeros((1, X.shape[1]),dtype=torch.double)
46 | for idx, dist in enumerate(distance):
47 | if max(0, dist) == 0:
48 | di = W
49 | else:
50 | di = W - (self.C * y[idx] * X[idx])
51 |
52 | dw += di
53 |
54 | dw = dw / len(y)
55 | return dw
56 |
57 | def fit(self, X, y, max_epochs):
58 | """
59 | :param X: Input Tensor
60 | :param y: Output tensor
61 | :param max_epochs: Number of epochs the complete dataset is passed through the model
62 | :return: learned weight of the svm model
63 | """
64 | weight = torch.randn((1, X.shape[1]), dtype=torch.double) * torch.sqrt(torch.scalar_tensor(1./X.shape[1]))
65 | cost_threshold = 0.0001
66 | previous_cost = float('inf')
67 | nth = 0
68 | for epoch in range(1, max_epochs+1):
69 | X, y = shuffle(X, y)
70 | for idx, x in enumerate(X):
71 | weight_update = self.gradient_update(weight, torch.tensor(x).unsqueeze(0), y[idx])
72 | weight = weight - (self.learning_rate * weight_update)
73 |
74 | if epoch % 100 == 0:
75 | cost = self.loss(X, weight, y)
76 | print(f'Loss at epoch {epoch}: {cost}')
77 | if abs(previous_cost - cost) < cost_threshold * previous_cost:
78 | return weight
79 | previous_cost = cost
80 | nth += 1
81 | return weight
82 |
83 | if __name__ == '__main__':
84 | num_epochs = 1000
85 | breast_cancer = load_breast_cancer()
86 | X = breast_cancer.data
87 | X_normalized = MinMaxScaler().fit_transform(X)
88 | X = torch.tensor(X_normalized)
89 | y = torch.tensor(breast_cancer.target).unsqueeze(1)
90 | bias = torch.ones((X.shape[0], 1))
91 | X = torch.cat((bias, X), dim=1)
92 | torch.manual_seed(0)
93 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
94 | svm = SVM(x_train, y_train)
95 | model_weights = svm.fit(x_train, y_train, max_epochs=num_epochs)
96 | y_pred = torch.sign(torch.mm(x_test, model_weights.T))
97 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
98 |
--------------------------------------------------------------------------------
/Day-25-RANSAC/ransac.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import math
3 | import random
4 | from sklearn.datasets import make_regression
5 | import matplotlib.pyplot as plt
6 | import pandas as pd
7 |
8 | class LeastSquareModel:
9 | def fit(self, A, Y):
10 | A_T = A.T
11 | A_T_A = torch.mm(A_T, A)
12 | A_T_Y = torch.mm(A_T, Y)
13 | model = torch.mm(torch.pinverse(A_T_A),A_T_Y)
14 |
15 | return model
16 |
17 | class RansacModel:
18 | def __init__(self, curve_fitting_model):
19 | self.curve_fitting_model = curve_fitting_model
20 |
21 | def fit(self, A, Y, num_sample, threshold):
22 | num_iterations = math.inf
23 | iterations_done = 0
24 | num_samples = 3
25 | max_inlier_count = 0
26 | best_model = None
27 | probability_outlier = torch.scalar_tensor(0.5, dtype=torch.double)
28 | desired_prob = torch.scalar_tensor(0.95, dtype=torch.double)
29 | total_data = torch.column_stack((A, Y))
30 | data_size = len(total_data)
31 |
32 | while num_iterations > iterations_done:
33 |
34 | random.shuffle(total_data)
35 | sample_data = total_data[:num_samples, :]
36 | estimated_model = self.curve_fitting_model.fit(sample_data[:, :-1], sample_data[:, -1:])
37 | y_cap = torch.mm(A, estimated_model)
38 | error = torch.abs(Y - y_cap.T)
39 | inlier_count = torch.count_nonzero(error max_inlier_count:
42 | max_inlier_count = inlier_count
43 | best_model = estimated_model
44 |
45 | probability_outlier = 1 - inlier_count/data_size
46 | #print('# inliers:', inlier_count)
47 | #print('# prob_outlier:', probability_outlier)
48 | num_iterations = torch.log(1 - desired_prob) / torch.log(1 - (1 - probability_outlier) ** num_sample)
49 | iterations_done = iterations_done + 1
50 |
51 | #print('# s:', iterations_done)
52 | #print('# n:', num_iterations)
53 | #print('# max_inlier_count: ', max_inlier_count)
54 |
55 | return best_model
56 |
57 | def fit_curve(X, y):
58 | x_square = torch.pow(X, 2)
59 |
60 | A = torch.stack((x_square, X, torch.ones(X.shape[0]).unsqueeze(1)),dim=1)
61 | A = A.squeeze(2)
62 | threshold = torch.std(y) / 5
63 | ls_model = LeastSquareModel()
64 | ls_model_estimate = ls_model.fit(A, y)
65 |
66 | ls_model_y = torch.mm(A, ls_model_estimate)
67 |
68 | ransac_model = RansacModel(ls_model)
69 | ransac_model_estimate = ransac_model.fit(A, y, 3, threshold)
70 | ransac_model_y = torch.mm(A, ransac_model_estimate)
71 |
72 | return ls_model_y, ransac_model_y
73 |
74 | if __name__ == '__main__':
75 | X1, y1 = make_regression(n_features=1, n_targets=1)
76 | X2, y2 = make_regression(n_features=1, n_targets=1)
77 |
78 | # X1, y1 = data1['x '], data1['y']
79 | # X2, y2 = data2['X'], data2['y']
80 | X1, y1 = torch.tensor(X1, dtype=torch.double), torch.tensor(y1, dtype=torch.double).unsqueeze(1)
81 | X2, y2 = torch.tensor(X2, dtype=torch.double), torch.tensor(y2, dtype=torch.double).unsqueeze(1)
82 | ls_model_y1, ransac_model_y1 = fit_curve(X1, y1)
83 | ls_model_y2, ransac_model_y2 = fit_curve(X2, y2)
84 |
85 | fig, (ax1, ax2) = plt.subplots(1, 2)
86 |
87 | ax1.set_title('Dataset-1')
88 | ax1.scatter(X1, y1, marker='o', color=(0, 1, 0), label='data points')
89 | ax1.plot(X1, ls_model_y1, color='red', label='Least sqaure model')
90 | ax1.plot(X1, ransac_model_y1, color='blue', label='Ransac model')
91 | ax1.set(xlabel='x-axis', ylabel='y-axis')
92 | ax1.legend()
93 |
94 | ax2.set_title('Dataset-2')
95 | ax2.scatter(X2, y2, marker='o', color=(0, 1, 0), label='data points')
96 | ax2.plot(X2, ls_model_y2, color='red', label='Least sqaure model')
97 | ax2.plot(X2, ransac_model_y2, color='blue', label='Ransac model')
98 | ax2.set(xlabel='x-axis', ylabel='y-axis')
99 | ax2.legend()
100 |
101 | plt.show()
102 |
--------------------------------------------------------------------------------
/Day-24-Regularization/regularization.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.datasets import load_iris
3 |
4 | class Regularization:
5 | def __init__(self, X):
6 | self.X = X
7 |
8 | def dropout(self, drop_probability):
9 | """
10 | Dropout is a regularization technique for neural networks that drops a unit (along with connections) at
11 | training time with a specified probability P (a common value is P = 0.5). At test time, all units are present,
12 | but with weights scaled by p(i.e. w becomes pw ).
13 | The idea is to prevent co-adaptation, where the neural network becomes too reliant on particular
14 | connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating
15 | an implicit ensemble of neural networks.
16 | :param drop_probability: float value between 0 to 1
17 | """
18 | if drop_probability < 1.0:
19 | keep_probability = 1 - drop_probability
20 | masker = torch.FloatTensor(self.X.shape).uniform_(0, 1)
21 | masked = masker < keep_probability
22 |
23 | if keep_probability > 0.0:
24 | scale = 1 / keep_probability
25 | else:
26 | scale = 0.0
27 |
28 | return masked * self.X * scale
29 |
30 | def L2_Regularization(self, y, W, lambda_value):
31 | """
32 | Weight Decay, or L2 Regularization, is a regularization technique applied to the weights of a neural network.
33 | We minimize a loss function compromising both the primary loss function and a penalty on the L2 Norm of the
34 | weights:
35 | L_new(w) = L_original(w) + lambda * W_T * W
36 | where is a value determining the strength of the penalty (encouraging smaller weights).
37 | Weight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining
38 | it through to objective function. Often weight decay refers to the implementation where we specify it directly
39 | in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the
40 | objective function).
41 | """
42 | Regularization_term = (lambda_value * torch.mm(W, W.T)).type(torch.DoubleTensor) / (2 * y.shape[0])
43 | output = torch.sum((y - torch.mm(X, W.T))**2, dim=0) + Regularization_term
44 | return output
45 |
46 | def L1_Regularization(self, y, W, lambda_value):
47 | """
48 | L1 Regularization is a regularization technique applied to the weights of a neural network. We minimize a loss
49 | function compromising both the primary loss function and a penalty on the L1 Norm of the weights:
50 | L_new(w) = L_original(w) + lambda * ||W||
51 | where is a value determining the strength of the penalty. In contrast to weight decay, regularization promotes
52 | sparsity; i.e. some parameters have an optimal value of zero.
53 | """
54 | Regularization_term = torch.sum((lambda_value * torch.abs(W)).type(torch.DoubleTensor) / (2 * y.shape[0]),dim=1)
55 | output = torch.sum((y - torch.mm(X, W.T))**2, dim=0) + Regularization_term
56 | return output
57 |
58 |
59 | if __name__ == '__main__':
60 | """
61 | Dropout:
62 |
63 | A = torch.arange(20).reshape((5, 4))
64 | print(A)
65 | Regularizer = Regularization(X=A)
66 | print(Regularizer.dropout(drop_probability=0.5))
67 |
68 | L2 Regularization or Weight Decay:
69 |
70 | data = load_iris()
71 | X = torch.tensor(data.data, dtype=torch.double)
72 | y = torch.tensor(data.target).unsqueeze(1)
73 | W = torch.FloatTensor(X.shape[1]).uniform_(0, 1).unsqueeze(0).type(torch.DoubleTensor)
74 | Regularizer = Regularization(X)
75 | Regularizer.L2_Regularization(y=y, W=W, lambda_value=0.7)
76 |
77 | L1 Regularization:
78 |
79 | data = load_iris()
80 | X = torch.tensor(data.data, dtype=torch.double)
81 | y = torch.tensor(data.target).unsqueeze(1)
82 | W = torch.FloatTensor(X.shape[1]).uniform_(0, 1).unsqueeze(0).type(torch.DoubleTensor)
83 | Regularizer = Regularization(X)
84 | print(Regularizer.L1_Regularization(y=y, W=W, lambda_value=0.7))
85 | """
86 |
--------------------------------------------------------------------------------
/Day-08-tf-idf/tfidf.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from nltk.tokenize import word_tokenize
3 |
4 | class TF_IDF:
5 | """
6 | TF - Term Frequency: count of specific word in document / total no. of words in document
7 | IDF - Inverse Document Frequency: Log ratio of (Total no. of documents / no. of documents containing words)
8 | """
9 | def text_processing(self, X):
10 | """
11 | text processing: We clean our text by removing special character and keeping them as lower case and each
12 | line sentence is converted into list of words and then finding the total number of unqiue words in the all the
13 | documents combined together.
14 | :param X: List of documents
15 | :return: Unique words (Vocabulary), all documents [[d_1], [d_2], ..[d_n]]
16 | """
17 | documents = []
18 | vocabulary = []
19 | for document in X:
20 | document_words = [word.lower() for word in word_tokenize(document) if word.isalpha()]
21 | documents.append(document_words)
22 | for word in document_words:
23 | if word not in vocabulary:
24 | vocabulary.append(word)
25 |
26 | vocabulary = set(vocabulary)
27 | return vocabulary, documents
28 |
29 | def strtoint(self, vocabulary):
30 | """
31 | :param vocabulary: all unique in the documents
32 | :return: mapping words to integer such as {'the': 1}
33 | """
34 | wordToInt = {}
35 | for i, vocab in enumerate(vocabulary):
36 | wordToInt[vocab] = i
37 |
38 | return wordToInt
39 |
40 | def vocab_frequency(self, vocabulary, documents):
41 | """
42 | :param vocabulary: all unique in the documents
43 | :param documents: all the documents
44 | :return: Frequency of word in all the documents combined together
45 | """
46 | word_frequency = {}
47 | for word in vocabulary:
48 | word_frequency[word] = 0
49 | for document in documents:
50 | if word in document:
51 | word_frequency[word] += 1
52 |
53 | return word_frequency
54 |
55 | def tf(self, input_document, word):
56 | """
57 | Calculating term_frequency
58 | :param input_document: test document
59 | :param word: each word in the test document
60 | :return: tf value (refer the formula above)
61 | """
62 | num_words = len(input_document)
63 | word_frequency = len([token for token in input_document if token==word])
64 | return word_frequency/num_words
65 |
66 | def idf(self, word, word_frequency, documents):
67 | """
68 | :param word: words of the test input document
69 | :param word_frequency: word frequency w.r.t all the documents available.
70 | :param documents: all the documents
71 | :return: idf value
72 | """
73 | try:
74 | word_frequency = word_frequency[word] + 1
75 | except:
76 | word_frequency = 1
77 |
78 | return torch.log(torch.scalar_tensor(len(documents))/word_frequency)
79 |
80 | def fit_tranform(self, document, vocabulary, wordToInt, word_frequency, documents):
81 | """
82 | :param document: test input document
83 | :param vocabulary: all unique words
84 | :param wordToInt: word to int mapping
85 | :param word_frequency: each word frequency throughout all the documents
86 | :param documents: all the documents
87 | :return: tf_idf vector for test input document
88 | """
89 | tfidf_vector = torch.zeros((len(vocabulary), ), dtype=torch.double)
90 | for word in document:
91 | tf = self.tf(document, word)
92 | idf = self.idf(word, word_frequency, documents)
93 | tfidf_values = tf * idf
94 | tfidf_vector[wordToInt[word]] = tfidf_values
95 |
96 | return tfidf_vector
97 |
98 | if __name__ == '__main__':
99 | vectors = []
100 | documents = ['Hi, how are you?',
101 | 'What are you doing?',
102 | 'what is your name?',
103 | 'who are you?']
104 |
105 | tfidf_vectorizer = TF_IDF()
106 | vocabulary, processed_documents = tfidf_vectorizer.text_processing(documents)
107 | wordToInt = tfidf_vectorizer.strtoint(vocabulary)
108 | vocab_frequecy = tfidf_vectorizer.vocab_frequency(vocabulary, processed_documents)
109 | _, new_document = tfidf_vectorizer.text_processing([documents[0]])
110 | print(tfidf_vectorizer.fit_tranform(new_document[0],vocabulary, wordToInt, vocab_frequecy, documents))
111 |
--------------------------------------------------------------------------------
/Day-14-DBSCAN/dbscan.py:
--------------------------------------------------------------------------------
1 | """
2 | Checkout Density Based Spectral Clustering Blag:
3 | https://blog.dominodatalab.com/topology-and-density-based-clustering/
4 |
5 | - Compared to centroid-based clustering like k-means, density-based clustering works by
6 | identifying “dense” clusters of points, allowing it to learn clusters of arbitrary shape
7 | and identify outliers in the data.
8 | """
9 | import torch
10 | from sklearn.datasets import load_iris
11 | from sklearn.model_selection import train_test_split
12 | from sklearn.metrics import accuracy_score
13 | from sklearn import datasets
14 |
15 | class DBScan:
16 | def __init__(self, eps = 2.5, min_points=30):
17 | """
18 | eps - radius distance around which a cluster is considered.
19 | min_points - Number of points to be present inside the radius
20 | (check out density reachable or border points from blog to understand how cluster points are considered)
21 | """
22 | self.eps = eps
23 | self.minimum_points = min_points
24 |
25 | def euclidean_distance(self, x1, x2):
26 | """
27 | :param x1: input tensor
28 | :param x2: input tensor
29 | :return: distance between tensors
30 | """
31 | return torch.cdist(x1, x2)
32 |
33 | def direct_neighbours(self, sample):
34 | """
35 | :param sample: Sample whose neighbors needs to be identified
36 | :return: all the neighbors within eps distance
37 | """
38 | neighbors = []
39 | idxs = torch.arange(self.X.shape[0])
40 | for i, _sample in enumerate(self.X[idxs != sample]):
41 |
42 | distance = self.euclidean_distance(self.X[sample].unsqueeze(0), _sample.unsqueeze(0))
43 | if distance < self.eps:
44 | neighbors.append(i)
45 |
46 | return torch.tensor(neighbors)
47 |
48 | def density_neighbors(self, sample, neighbors):
49 | """
50 | Recursive method which expands the cluster until we have reached the border
51 | of the dense area (density determined by eps and min_samples)
52 |
53 | :param sample: Sample whose border points to be identified
54 | :param neighbors: samples and its neighbors within eps distance
55 | :return: It updates the number of points assigned to each cluster, by finding
56 | border points and its relative points. In a sense, it expands cluster.
57 | """
58 | cluster = [sample]
59 | for neighbor_i in neighbors:
60 | if not neighbor_i in self.visited_samples:
61 | self.visited_samples.append(neighbor_i)
62 | self.neighbors[neighbor_i] = self.direct_neighbours(neighbor_i)
63 |
64 | if len(self.neighbors[neighbor_i]) >= self.minimum_points:
65 | expanded_cluster = self.density_neighbors(
66 | neighbor_i, self.neighbors[neighbor_i])
67 | cluster = cluster + expanded_cluster
68 | else:
69 | cluster.append(neighbor_i)
70 |
71 | return cluster
72 |
73 | def get_cluster_label(self):
74 | """
75 | :return: assign cluster label based on expanded clusters
76 | """
77 | labels = torch.zeros(self.X.shape[0]).fill_(len(self.clusters))
78 | for cluster_i, cluster in enumerate(self.clusters):
79 | for sample_i in cluster:
80 | labels[sample_i] = cluster_i
81 |
82 | return labels
83 |
84 | def predict(self, X):
85 | """
86 | :param X: input tensor
87 | :return: predicting the labels os samples depending on its distance from clusters
88 | """
89 | self.X = X
90 | self.clusters = []
91 | self.visited_samples = []
92 | self.neighbors = {}
93 | n_samples = X.shape[0]
94 |
95 | for sample_i in range(n_samples):
96 | if sample_i in self.visited_samples:
97 | continue
98 | self.neighbors[sample_i] = self.direct_neighbours(sample_i)
99 | if len(self.neighbors[sample_i]) >= self.minimum_points:
100 | self.visited_samples.append(sample_i)
101 | new_cluster = self.density_neighbors(
102 | sample_i, self.neighbors[sample_i])
103 | self.clusters.append(new_cluster)
104 |
105 | cluster_labels = self.get_cluster_label()
106 | return cluster_labels
107 |
108 | if __name__ == '__main__':
109 | iris = load_iris()
110 | torch.manual_seed(0)
111 | X = torch.tensor(iris.data, dtype=torch.float)
112 | y = torch.tensor(iris.target)
113 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
114 | dbscan = DBScan(eps=0.25, min_points=20)
115 | ypred = dbscan.predict(x_train)
116 | print(f'Accuracy Score: {accuracy_score(y_train, ypred)}')
117 |
--------------------------------------------------------------------------------
/Day-27-MLP/mlp.py:
--------------------------------------------------------------------------------
1 | """
2 | Multi-Layer Preceptron
3 | """
4 | import torch
5 | from sklearn.datasets import load_digits
6 | from sklearn.model_selection import train_test_split
7 |
8 | class Sigmoid:
9 | def __call__(self, X):
10 | return 1 / (1 + torch.exp(-X))
11 |
12 | def gradient(self, X):
13 | return self.__call__(X) * (1 - self.__call__(X))
14 |
15 | class Softmax:
16 | def __call__(self, X):
17 | e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True).values)
18 | return e_x / torch.sum(e_x, dim=1, keepdim=True)
19 |
20 | def gradient(self, X):
21 | p = self.__call__(X)
22 | return p * (1 - p)
23 |
24 | def accuracy_score(y, p):
25 | accuracy = torch.sum(y == p, dim=0) / len(y)
26 | return accuracy
27 |
28 | def to_categorical(X, n_col=None):
29 | if not n_col:
30 | n_col = torch.amax(X) + 1
31 |
32 | one_hot = torch.zeros((X.shape[0], n_col))
33 | one_hot[torch.arange(X.shape[0]), X] = 1
34 | return one_hot
35 |
36 | def normalization(X):
37 | """
38 | :param X: Input tensor
39 | :return: Normalized input using l2 norm.
40 | """
41 | l2 = torch.norm(X, p=2, dim=-1)
42 | l2[l2 == 0] = 1
43 | return X / l2.unsqueeze(1)
44 |
45 | class CrossEntropy:
46 | def __init__(self):
47 | pass
48 | def loss(self, y, p):
49 | p = torch.clip(p, 1e-15, 1-1e-15)
50 | return - y * torch.log(p) - (1 -y) * torch.log(1 - p)
51 |
52 | def accuracy_score(self, y, p):
53 | return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1))
54 |
55 | def gradient(self, y, p):
56 | p = torch.clip(p, 1e-15, 1 - 1e-15)
57 | return - (y / p) + (1 - y) / (1 -p)
58 |
59 | class MultiLayerPerceptron:
60 | def __init__(self, n_hidden, n_iterations=1000, learning_rate=0.001):
61 | self.n_hidden = n_hidden
62 | self.n_iterations = n_iterations
63 | self.learning_rate = learning_rate
64 | self.hidden_activation = Sigmoid()
65 | self.output_activation = Softmax()
66 | self.loss = CrossEntropy()
67 |
68 | def initalize_weight(self, X, y):
69 | n_samples, n_features = X.shape
70 | _, n_outputs = y.shape
71 | limit = 1 / torch.sqrt(torch.scalar_tensor(n_features))
72 | self.W = torch.DoubleTensor(n_features, self.n_hidden).uniform_(-limit, limit)
73 |
74 | self.W0 = torch.zeros((1, self.n_hidden))
75 | limit = 1 / torch.sqrt(torch.scalar_tensor(self.n_hidden))
76 | self.V = torch.DoubleTensor(self.n_hidden, n_outputs).uniform_(-limit, limit)
77 | self.V0 = torch.zeros((1, n_outputs))
78 |
79 | def fit(self, X, y):
80 | self.initalize_weight(X, y)
81 | for i in range(self.n_iterations):
82 | hidden_input = torch.mm(X, self.W) + self.W0
83 | hidden_output = self.hidden_activation(hidden_input)
84 |
85 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0
86 | y_pred = self.output_activation(output_layer_input)
87 |
88 | grad_wrt_first_output = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input)
89 | grad_v = torch.mm(hidden_output.T, grad_wrt_first_output)
90 | grad_v0 = torch.sum(grad_wrt_first_output, dim=0, keepdim=True)
91 |
92 | grad_wrt_first_hidden = torch.mm(grad_wrt_first_output, self.V.T) * self.hidden_activation.gradient(hidden_input)
93 | grad_w = torch.mm(X.T, grad_wrt_first_hidden)
94 | grad_w0 = torch.sum(grad_wrt_first_hidden, dim=0, keepdim=True)
95 |
96 | # Update weights (by gradient descent)
97 | # Move against the gradient to minimize loss
98 | self.V -= self.learning_rate * grad_v
99 | self.V0 -= self.learning_rate * grad_v0
100 | self.W -= self.learning_rate * grad_w
101 | self.W0 -= self.learning_rate * grad_w0
102 |
103 | # Use the trained model to predict labels of X
104 |
105 | def predict(self, X):
106 | # Forward pass:
107 | hidden_input = torch.mm(X,self.W) + self.W0
108 | hidden_output = self.hidden_activation(hidden_input)
109 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0
110 | y_pred = self.output_activation(output_layer_input)
111 | return y_pred
112 |
113 |
114 | if __name__ == '__main__':
115 | data = load_digits()
116 | X = normalization(torch.tensor(data.data, dtype=torch.double))
117 | y = torch.tensor(data.target)
118 |
119 | # Convert the nominal y values to binary
120 | y = to_categorical(y)
121 |
122 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
123 | # MLP
124 | clf = MultiLayerPerceptron(n_hidden=16,
125 | n_iterations=1000,
126 | learning_rate=0.01)
127 |
128 | clf.fit(X_train, y_train)
129 | y_pred = torch.argmax(clf.predict(X_test), dim=1)
130 | y_test = torch.argmax(y_test, dim=1)
131 |
132 | accuracy = accuracy_score(y_test, y_pred)
133 | print("Accuracy:", accuracy)
134 |
135 |
136 |
--------------------------------------------------------------------------------
/Day-13-Adaboost/adaboost.py:
--------------------------------------------------------------------------------
1 | """
2 | Adaboost Algorithm Blog post:
3 | https://www.mygreatlearning.com/blog/adaboost-algorithm/
4 | """
5 | import torch
6 | from sklearn.datasets import load_breast_cancer
7 | from sklearn.model_selection import train_test_split
8 | from sklearn.metrics import accuracy_score
9 |
10 | class stump:
11 | "Each Stump is a weak classifier and combination of them are referred as Boosting Mechanism"
12 | def __init__(self):
13 | """
14 | * Polarity is used to classify sample as either 1 or -1
15 | * feature index is for identifying node for separating classes
16 | * features are compared against threshold value
17 | * Alpha value indicates the classifier accuracy
18 | """
19 | self.polarity = 1
20 | self.feature_index = None
21 | self.threshold = None
22 | self.alpha = None
23 |
24 | class Adaboost:
25 | def __init__(self, num_classifiers):
26 | """
27 | :param num_classifiers: Number of weak classifiers
28 | """
29 | self.num_classifiers = num_classifiers
30 |
31 | def fit(self, X, y):
32 | """
33 | :param X: Input tensor
34 | :param y: output tensor
35 | :return: Creates a list of weak classifier with set of properties as
36 | mentioned in stump class.
37 | * Initialize weights to 1/N, N is number of samples
38 | * Iterate through different weak classifiers
39 | * Minimum error given for using a certain feature value threshold for predicting sample label
40 | * Iterate through each feature and its unique values to find the threshold value
41 | * Label samples with value less than threshold as -1
42 | * Error, Sum of weights of misclassified samples
43 | * If the error is over 50% we flip the polarity so that samples that were classified as 0 are
44 | classified as 1, and vice versa. E.g error = 0.8 => (1 - error) = 0.2
45 | * If this threshold resulted in the smallest error we save the configuration
46 | * Calculate the alpha which is used to update the sample weights,
47 | Alpha is also an approximation of this classifier's proficiency
48 | * set all predictions to '1' initially
49 | * The indexes where the sample values are below threshold, label them as -1
50 | * Updated weights and normalize to one
51 | * save each weak classifier
52 | """
53 | n_samples, n_features = X.shape[0], X.shape[1]
54 | weight = torch.zeros(n_samples).fill_(1/n_samples)
55 | self.clfs = []
56 | for _ in range(self.num_classifiers):
57 | clf = stump()
58 | minimum_error = float('inf')
59 | for feature_i in range(n_features):
60 | feature_values = X[:, feature_i].unsqueeze(1)
61 | unqiue_values = feature_values.unique()
62 | for threshold in unqiue_values:
63 | p = 1
64 | prediction = torch.ones(y.shape)
65 | prediction[X[:, feature_i] < threshold] = -1
66 | error = torch.sum(weight[y != prediction])
67 | if error > 0.5:
68 | error = 1 - error
69 | p = -1
70 |
71 | if error < minimum_error:
72 | clf.polarity = p
73 | clf.threshold = threshold
74 | clf.feature_index = feature_i
75 | minimum_error = error
76 |
77 | clf.alpha = 0.5 * torch.log(1.0 - minimum_error) / (minimum_error + 1e-10)
78 | predictions = torch.ones(y.shape)
79 | negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
80 | predictions[negative_idx] = -1
81 |
82 | weight *= torch.exp(-clf.alpha * y * predictions)
83 | weight /= torch.sum(weight)
84 |
85 | self.clfs.append(clf)
86 |
87 | def predict(self, X):
88 | """
89 | same process as mentioned above.
90 | :param X:
91 | :return: predicted estimate of ground truth.
92 | """
93 | n_samples = X.shape[0]
94 | y_pred = torch.zeros((n_samples, 1))
95 | for clf in self.clfs:
96 | predictions = torch.ones(y_pred.shape)
97 | negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
98 | predictions[negative_idx] = -1
99 | y_pred += clf.alpha * predictions
100 |
101 | print(y_pred)
102 | y_pred = torch.sign(y_pred).flatten()
103 | print(y_pred)
104 | return y_pred
105 |
106 | if __name__ == '__main__':
107 | breast_cancer = load_breast_cancer()
108 | torch.manual_seed(0)
109 | X = torch.tensor(breast_cancer.data, dtype=torch.float)
110 | y = torch.tensor(breast_cancer.target)
111 | n_classes = len(torch.unique(y))
112 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
113 | clf = Adaboost(num_classifiers=20)
114 | clf.fit(x_train, y_train)
115 | y_pred = clf.predict(x_test)
116 |
117 | accuracy = accuracy_score(y_test, y_pred)
118 | print ("Accuracy:", accuracy)
119 |
--------------------------------------------------------------------------------
/Day-20-SpectralClustering/spectralClustering.py:
--------------------------------------------------------------------------------
1 | """
2 | Reference: https://en.wikipedia.org/wiki/Spectral_clustering
3 | Blog Post: https://towardsdatascience.com/spectral-clustering-aba2640c0d5b
4 | """
5 | import torch
6 | from sklearn.datasets import make_moons
7 | import matplotlib.pyplot as plt
8 | from scipy.spatial.distance import pdist, squareform
9 | from sklearn.cluster import KMeans
10 |
11 | def SpectralClustering(X, K=8, adj=True, metric='euclidean', sim_graph='fully_connect', sigma=1, knn=10, epsilon=0.5, normalized=1):
12 | """
13 | :param X: Input tensor
14 | :param K: cluster to look out for using KMeans
15 | :param adj: Adjacency Matrix
16 | :param metric:
17 | :param sim_graph: Technique to create edges between nodes in graph.
18 | :param sigma: Parameter for RBF kernel
19 | :param knn: To connect with 10 nearest neighors with edges
20 | :param epsilon:Parameter for finding edges
21 | :param normalized:
22 | :return:
23 | """
24 |
25 | # To convert our adjacency matrix as connected graph we can use technique like KNN.
26 |
27 | if not adj:
28 | adjacency_matrix = squareform(X, metric=metric)
29 | else:
30 | adjacency_matrix = X
31 |
32 | if sim_graph == 'fully_connect':
33 | adjacency_matrix = torch.from_numpy(adjacency_matrix)
34 | w = torch.exp(-adjacency_matrix/ (2 * sigma))
35 |
36 | elif sim_graph =='eps_neighbor':
37 | adjacency_matrix = torch.from_numpy(adjacency_matrix)
38 | w = (adjacency_matrix <= epsilon).type(torch.DoubleTensor)
39 | elif sim_graph == 'knn':
40 | adjacency_matrix = torch.from_numpy(adjacency_matrix)
41 | w = torch.zeros(adjacency_matrix.shape)
42 | adjacency_sort = torch.argsort(adjacency_matrix, dim=1)
43 | for i in range(adjacency_sort.shape[0]):
44 | w[i, adjacency_sort[i, :][:(knn+1)]] = 1
45 | elif sim_graph == 'mutual_knn':
46 | adjacency_matrix = torch.from_numpy(adjacency_matrix)
47 | w1 = torch.zeros(adjacency_matrix.shape)
48 | adjacency_sort = torch.argsort(adjacency_matrix, dim=1)
49 | for i in range(adjacency_matrix.shape[0]):
50 | for j in adjacency_sort[i, :][:(knn+1)]:
51 | if i==j:
52 | w1[i, i] = 1
53 | elif w1[i, j] == 0 and w1[j, i]==0:
54 | w1[i, j] = 0.5
55 | else:
56 | w1[i, j] = w1[j, i] = 1
57 | w = w1[w1>0.5].type(torch.DoubleTensor).clone
58 | else:
59 | raise ValueError("The 'sim_graph' argument should be one of the strings, 'fully_connect', 'eps_neighbor', 'knn', or 'mutual_knn'!")
60 |
61 | #Degree Matrix
62 | D = torch.diag(torch.sum(w, dim=1))
63 |
64 | #Graph Laplacian
65 | L = D - w
66 |
67 | # Finding eigen Value of Graph Laplacian Matrix,
68 | """
69 | The eigenvalues of the Laplacian indicated that there were four clusters.
70 | The vectors associated with those eigenvalues contain information on how to segment the nodes.
71 | """
72 | if normalized == 1:
73 | D_INV = torch.diag(1/torch.diag(D))
74 | lambdas, V = torch.eig(torch.mm(D_INV, L), eigenvectors=True)
75 | ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0))
76 | V_K = V[:, ind[:K]]
77 |
78 | elif normalized == 2:
79 | D_INV_SQRT = torch.diag(1/torch.sqrt(torch.diag(D)))
80 | lambdas, V = torch.eig(torch.matmul(torch.matmul(D_INV_SQRT, L), D_INV_SQRT))
81 | ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0))
82 | V_K = torch.real(V[:, ind[:,K]])
83 | if any(V_K.sum(dim=1) == 0):
84 | raise ValueError("Can't normalize the matrix with the first K eigenvectors as columns! Perhaps the \
85 | number of clusters K or the number of neighbors in k-NN is too small.")
86 | V_K = V_K/torch.reshape(torch.norm(V_K, dim=1), (V_K.shape[0], 1))
87 | else:
88 | lambdas, V = torch.eig(L)
89 | ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0))
90 | V_K = torch.real(V[:, ind[:K]])
91 |
92 | # KMeans is used for assigning the labels to the clusters.
93 | kmeans = KMeans(n_clusters=K, init='k-means++', random_state=0).fit(V_K)
94 | return kmeans
95 |
96 | if __name__ == '__main__':
97 | moon_data, moon_labels = make_moons(100, noise=0.05)
98 | moon_data = torch.tensor(moon_data)
99 | moon_labels = torch.tensor(moon_labels)
100 | # Compute the adjacency matrix, Similarity Matrix.
101 | Adj_mat = squareform(pdist(moon_data, metric='euclidean', p=2))
102 | # Spectral clustering...
103 | spec_re1 = SpectralClustering(Adj_mat, K=2, sim_graph='fully_connect', sigma=0.01, normalized=1)
104 | spec_re2 = SpectralClustering(Adj_mat, K=2, sim_graph='knn', knn=10, normalized=1)
105 |
106 | # Often need to change figsize when doing subplots
107 | plt.figure(figsize=(8, 4))
108 | plt.subplot(1, 2, 1)
109 | plt.scatter(x=moon_data[:, 0], y=moon_data[:, 1], c=spec_re1.labels_, s=2)
110 | plt.colorbar()
111 | plt.title('Fully connected graph with RBF kernel ($\sigma=0.01$)')
112 |
113 | plt.subplot(1, 2, 2)
114 | plt.scatter(x=moon_data[:, 0], y=moon_data[:, 1], c=spec_re2.labels_, s=2)
115 | plt.colorbar()
116 | plt.title('$k$-Nearest Neighbor graphs ($k=10$)')
117 |
118 | plt.suptitle('Spectral Clustering', y=-0.01)
119 |
120 | # Automatrically adjust padding between subpots
121 | plt.tight_layout()
122 | plt.show()
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/Day-17-K-Medoids/PAM.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from utility import euclidean_distance
3 | from sklearn.datasets import load_iris
4 | from sklearn.metrics import accuracy_score
5 | from sklearn.model_selection import train_test_split
6 | """
7 | K-Medoids also known as Partitioned Around Medoids.
8 | """
9 | class PAM:
10 | def __init__(self, k=2):
11 | """
12 | :param k: Number of clusters to be formed using Medoids
13 | """
14 | self.k = k
15 |
16 | def random_medoids(self, X):
17 | """
18 | Similar to KMeans, selecting a random samples from dataset as medoids
19 | :param X: Input tensor
20 | :return: For iris dataset, three medoids are selected.
21 | """
22 | n_samples, n_features = X.shape[0], X.shape[1]
23 | medoids = torch.zeros((self.k, n_features))
24 | for i in range(self.k):
25 | idx = torch.randperm(len(X))[1]
26 | medoid = X[idx]
27 | medoids[i] = medoid
28 |
29 | return medoids
30 |
31 | def closest_medoid(self, sample, medoids):
32 | """
33 | Calculate distance between each sample and every medoids
34 | :param sample: Data point
35 | :param medoids: Similar to centroid in KMeans.
36 | :return: Assigining medoid to each sample
37 | """
38 | closest_i = None
39 | closest_distance = float('inf')
40 | for i, medoid in enumerate(medoids):
41 | distance = euclidean_distance(sample, medoid)
42 | if distance < closest_distance:
43 | closest_i = i
44 | closest_distance = distance
45 | return closest_i
46 |
47 | def create_clusters(self, X, medoids):
48 | """
49 | Creating clusters after assigning samples to each medoid
50 | :return:
51 | """
52 | clusters = [[] for _ in range(self.k)]
53 | for sample_i, sample in enumerate(X):
54 | medoid_i = self.closest_medoid(sample, medoids)
55 | clusters[medoid_i].append(sample_i)
56 |
57 | return clusters
58 |
59 | def calculate_cost(self, X, clusters, medoids):
60 | """
61 | Total distance between samples and their medoid
62 | :param clusters: Three medoids with samples assigned to each of them
63 | :return: Total distance as mentioned above
64 | """
65 | cost = 0
66 | for i, cluster in enumerate(clusters):
67 | medoid = medoids[i]
68 | for sample_i in cluster:
69 | cost += euclidean_distance(X[sample_i], medoid)
70 |
71 | return cost
72 |
73 | def get_non_medoids(self, X, medoids):
74 | """
75 | Mediods are points in cluster acts reference for all other points(non-medoids)
76 | to find distance between them.
77 | :return: all the data point which are not medoids.
78 | """
79 | non_medoids = []
80 | for sample in X:
81 | if not sample in medoids:
82 | non_medoids.append(sample)
83 |
84 | return non_medoids
85 |
86 | def get_cluster_label(self, clusters, X):
87 | """
88 | Assigning each sample as index to a medoid.
89 | """
90 | y_pred = torch.zeros(X.shape[0])
91 | for cluster_i in range(len(clusters)):
92 | cluster = clusters[cluster_i]
93 | for sample_i in cluster:
94 | y_pred[sample_i] = cluster_i
95 |
96 | return y_pred
97 |
98 | def predict(self, X):
99 | """
100 | Do Partitioning Around Medoids and return the cluster labels
101 | * First, randomly selection medoids
102 | * Create cluster based on medoids selected and samples
103 | * Cost(distance) of the existing cluster and the samples in it.
104 | * Iterate, until we find the least cost with best medoids.
105 | * Find all non-medoids
106 | :return: Predicting medoid for test sample or a data point.
107 | """
108 |
109 | medoids = self.random_medoids(X)
110 | clusters = self.create_clusters(X, medoids)
111 | cost = self.calculate_cost(X, clusters, medoids)
112 |
113 | while True:
114 | best_medoids = medoids
115 | lowest_cost = cost
116 | for medoid in medoids:
117 | non_medoids = self.get_non_medoids(X, medoids)
118 | # Calculate the cost when swapping medoid and samples
119 | for sample in non_medoids:
120 | # Swap sample with the medoid
121 | new_medoids = medoids.clone()
122 | new_medoids[medoids == medoid][:4] = sample
123 | # Assign samples to new medoids
124 | new_clusters = self.create_clusters(X, new_medoids)
125 | # Calculate the cost with the new set of medoids
126 | new_cost = self.calculate_cost(X, new_clusters, new_medoids)
127 | # If the swap gives us a lower cost we save the medoids and cost
128 | if new_cost < lowest_cost:
129 | lowest_cost = new_cost
130 | best_medoids = new_medoids
131 | # If there was a swap that resultet in a lower cost we save the
132 | # resulting medoids from the best swap and the new cost
133 | if lowest_cost < cost:
134 | cost = lowest_cost
135 | medoids = best_medoids
136 | else:
137 | break
138 |
139 | final_clusters = self.create_clusters(X, medoids)
140 | # Return the samples cluster indices as labels
141 | return self.get_cluster_label(final_clusters, X)
142 |
143 |
144 | if __name__ == '__main__':
145 | data = load_iris()
146 | X = data.data
147 | y = data.target
148 | # Cluster the data using K-Medoids
149 | X = torch.tensor(X, dtype=torch.float)
150 | y = torch.tensor(y)
151 | clf = PAM(k=3)
152 | y_pred = clf.predict(X)
153 | print(accuracy_score(y_pred, y))
154 |
155 |
156 |
--------------------------------------------------------------------------------
/Day-04-KMeans-Clustering/KMeans.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import scipy
3 | import numpy as np
4 | from sklearn.datasets import load_iris
5 | from sklearn.model_selection import train_test_split
6 | from sklearn.metrics import accuracy_score
7 |
8 |
9 | class KMeans:
10 | def __init__(self, X, k, iterations):
11 | """
12 | :param X: input tensor
13 | :param k: Number of clusters
14 | :variable samples: Number of samples
15 | :variable features: Number of features
16 | """
17 | self.k = k
18 | self.max_iterations = iterations
19 | self.samples = X.shape[0]
20 | self.features = X.shape[1]
21 | self.KMeans_Centroids = []
22 |
23 | # def initialize_centroid(self, X):
24 | # return X[torch.randint(X.shape[0], (self.k,))]
25 |
26 | def initialize_centroid(self, X, K):
27 | """
28 | Initialization Technique is KMeans++. Thanks to stackoverflow.
29 | https://stackoverflow.com/questions/5466323/how-could-one-implement-the-k-means-algorithm
30 | :param X: Input Tensor
31 | :param K: Number of clusters to build
32 | :return: Selection of three centroid vector from X
33 | """
34 | I = [0]
35 | C = [X[0]]
36 | for k in range(1, K):
37 | D2 = np.array([min([np.inner(c - x, c - x) for c in C]) for x in X])
38 | probs = D2 / D2.sum()
39 | cumprobs = probs.cumsum()
40 | r = torch.rand(1).item()
41 |
42 | for j, p in enumerate(cumprobs):
43 | if r < p:
44 | i = j
45 | break
46 | I.append(i)
47 | return X[I]
48 |
49 | def distance(self, sample, centroid, dim=None, default="euclidean"):
50 | if default == "euclidean":
51 | return torch.norm(sample - centroid, 2, 0)
52 | elif default == "manhattan":
53 | return torch.sum(torch.abs(sample - centroid))
54 | elif default == "cosine":
55 | return torch.sum(sample * centroid) / (torch.norm(sample) * torch.norm(centroid))
56 | else:
57 | raise ValueError("Unknown similarity distance type")
58 |
59 | def closest_centroid(self, sample, centroids):
60 | """
61 | :param sample: sample whose distance from centroid is to be measured
62 | :param centroids: all the centroids of all the clusters
63 | :return: centroid's index is passed for each sample
64 | """
65 | closest = None
66 | min_distance = float('inf')
67 | for idx, centroid in enumerate(centroids):
68 | distance = self.distance(sample, centroid)
69 | if distance < min_distance:
70 | closest = idx
71 | min_distance = distance
72 |
73 | return closest
74 |
75 | def create_clusters(self, centroids, X):
76 | """
77 | :param centroids: Centroids of all clusters
78 | :param X: Input tensor
79 | :return: Assigning each sample to a cluster.
80 | """
81 | n_samples = X.shape[0]
82 | k_clusters = [[] for _ in range(self.k)]
83 | for idx, sample in enumerate(X):
84 | centroid_index = self.closest_centroid(sample, centroids)
85 | k_clusters[centroid_index].append(idx)
86 |
87 | return k_clusters
88 |
89 | def update_centroids(self, clusters, X):
90 | """
91 | :return: Updating centroids after each iteration.
92 | """
93 | centroids = torch.zeros((self.k, self.features))
94 | for idx, cluster in enumerate(clusters):
95 | centroid = torch.mean(X[cluster], dim=0)
96 | centroids[idx] = centroid
97 |
98 | return centroids
99 |
100 | def label_clusters(self, clusters, X):
101 | """
102 | Labeling the samples with index of clusters
103 | :return: labeled samples
104 | """
105 | y_pred = torch.zeros(X.shape[0])
106 | for idx, cluster in enumerate(clusters):
107 | for sample_idx in cluster:
108 | y_pred[sample_idx] = idx
109 |
110 | return y_pred
111 |
112 | def fit(self, X):
113 | """
114 | Initializing centroid using Kmeans++, then find distance between each sample and initial centroids, then assign
115 | cluster label based on min_distance, repeat this process for max_iteration and simultaneously updating
116 | centroid by calculating distance between sample and updated centroid. Convergence happen when difference between
117 | previous and updated centroid is None.
118 | :return: updated centroids of the cluster after max_iterations.
119 | """
120 | centroids = self.initialize_centroid(X, self.k)
121 | for _ in range(self.max_iterations):
122 | clusters = self.create_clusters(centroids, X)
123 | previous_centroids = centroids
124 | centroids = self.update_centroids(clusters, X)
125 | difference = centroids - previous_centroids
126 |
127 | # print(difference)
128 | if not difference.numpy().any():
129 | break
130 |
131 | self.KMeans_Centroids = centroids
132 | return centroids
133 |
134 | def predict(self, X):
135 | """
136 | :return: label/cluster number for each input sample is returned
137 | """
138 | if not self.KMeans_Centroids.numpy().any():
139 | raise Exception("No Centroids Found. Run KMeans fit")
140 |
141 | clusters = self.create_clusters(self.KMeans_Centroids, X)
142 | labels = self.label_clusters(clusters, X)
143 |
144 | return labels
145 |
146 |
147 | if __name__ == '__main__':
148 | iris = load_iris()
149 | torch.manual_seed(0)
150 | X = torch.tensor(iris.data, dtype=torch.float)
151 | y = torch.tensor(iris.target)
152 | n_classes = len(torch.unique(y))
153 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
154 | kmeans = KMeans(x_train, k=n_classes, iterations=300)
155 | kmeans.fit(x_train)
156 | ypred = kmeans.predict(x_test)
157 | print(f'Accuracy Score: {accuracy_score(y_test, ypred)}')
158 |
--------------------------------------------------------------------------------
/Day-18-TSNE/tsne.py:
--------------------------------------------------------------------------------
1 | """
2 | Reference: https://towardsdatascience.com/t-sne-clearly-explained-d84c537f53a
3 | Playground: https://distill.pub/2016/misread-tsne/
4 | Wiki: https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
5 | """
6 | import torch
7 | import logging
8 | from sklearn.datasets import load_iris, load_digits, load_diabetes
9 | class TSNE:
10 | """
11 | The goal is to take a set of points in a high-dimensional space and find a faithful representation of those
12 | points in a lower-dimensional space, typically the 2D plane. The algorithm is non-linear and adapts to the
13 | underlying data, performing different transformations on different regions. Those differences can be a major
14 | source of confusion.
15 | """
16 | def __init__(self, n_components=2, preplexity=5.0, max_iter=1, learning_rate=200):
17 | """
18 | :param n_components:
19 | :param preplexity: how to balance attention between local and global aspects of your data. The parameter is,
20 | in a sense, a guess about the number of close neighbors each point has. Typical value between 5 to 50.
21 | With small value of preplexity, the local groups are formed and with increasing preplexity global groups are
22 | formed. A perplexity is more or less a target number of neighbors for our central point.
23 | :param max_iter: Iterations to stabilize the results and converge.
24 | :param learning_rate:
25 | """
26 | self.max_iter = max_iter
27 | self.preplexity = preplexity
28 | self.n_components = n_components
29 | self.initial_momentum = 0.5
30 | self.final_momentum = 0.8
31 | self.min_gain = 0.01
32 | self.lr = learning_rate
33 | self.tol = 1e-5
34 | self.preplexity_tries = 50
35 |
36 | def l2_distance(self, X):
37 | """
38 | :return: Distance between two vectors
39 | """
40 | sum_X = torch.sum(X * X, dim=1)
41 | return (-2* torch.mm(X, X.T) + sum_X).T + sum_X
42 |
43 | def get_pairwise_affinities(self, X):
44 | """
45 | :param X: High dimensional input
46 | :return: a (Gaussian) probability distribution over pairs of high-dimensional objects in such a way that similar
47 | objects are assigned a higher probability while dissimilar points are assigned a lower probability. To find
48 | variance for this distribution we use Binary search. The variance is calculated between fixed preplexity given
49 | by the user.
50 | """
51 | affines = torch.zeros((self.n_samples, self.n_samples), dtype=torch.float32)
52 | target_entropy = torch.log(torch.scalar_tensor(self.preplexity))
53 | distance = self.l2_distance(X)
54 | for i in range(self.n_samples):
55 | affines[i, :] = self.binary_search(distance[i], target_entropy)
56 |
57 | #affines = torch.diagonal(affines).fill_(1.0e-12)
58 | affines[torch.eye(affines.shape[0]).byte()] = 1.0e-12
59 | affines = affines.clip(min=1e-100)
60 | affines = (affines + affines.T)/(2*self.n_samples)
61 | return affines
62 |
63 | def q_distribution(self, D):
64 | """
65 | A (Student t-distirbution)distribution is learnt in lower dimensional space, n_samples and n_components
66 | (2 or 3 dimension), and similar to above method 'get_pairwise_affinities', we find the probability of the
67 | data points with high probability for closer points and less probability for disimilar points.
68 | """
69 | Q = 1.0 / (1.0 + D)
70 | Q[torch.eye(Q.shape[0]).byte()] = 0.0
71 | Q = Q.clip(min=1e-100)
72 | return Q
73 |
74 | def binary_search(self, dist, target_entropy):
75 | """
76 | SNE performs a binary search for the value of sigma that produces probability distribution with a fixed
77 | perplexity that is specified by the user.
78 | """
79 | precision_minimum = 0
80 | precision_maximum = 1.0e15
81 | precision = 1.0e5
82 |
83 | for _ in range(self.preplexity_tries):
84 | denominator = torch.sum(torch.exp(-dist[dist > 0.0] / precision))
85 | beta = torch.exp(-dist / precision) / denominator
86 |
87 | g_beta = beta[beta > 0.0]
88 | # Shannon Entropy
89 | entropy = -torch.sum(g_beta * torch.log2(g_beta))
90 | error = entropy - target_entropy
91 |
92 | if error > 0:
93 | precision_maximum = precision
94 | precision = (precision + precision_minimum) / 2.0
95 | else:
96 | precision_minimum = precision
97 | precision = (precision + precision_maximum) / 2.0
98 |
99 | if torch.abs(error) < self.tol:
100 | break
101 |
102 | return beta
103 |
104 | def fit_transform(self, X):
105 | self.n_samples, self.n_features = X.shape[0], X.shape[1]
106 | Y = torch.randn(self.n_samples, self.n_components)
107 | velocity = torch.zeros_like(Y)
108 | gains = torch.ones_like(Y)
109 | P = self.get_pairwise_affinities(X)
110 |
111 | iter_num = 0
112 | while iter_num < self.max_iter:
113 | iter_num += 1
114 | D = self.l2_distance(Y)
115 | Q = self.q_distribution(D)
116 | Q_n = Q /torch.sum(Q)
117 |
118 | pmul = 4.0 if iter_num < 100 else 1.0
119 | momentum = 0.5 if iter_num < 20 else 0.8
120 |
121 | grads = torch.zeros(Y.shape)
122 | for i in range(self.n_samples):
123 | """
124 | Optimization using gradient to converge between the true P and estimated Q distrbution.
125 | """
126 | grad = 4 * torch.mm(((pmul * P[i] - Q_n[i]) * Q[i]).unsqueeze(0), Y[i] -Y)
127 | grads[i] = grad
128 |
129 | gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ((grads > 0) == (velocity > 0))
130 | gains = gains.clip(min=self.min_gain)
131 |
132 | velocity = momentum * velocity - self.lr * (gains * grads)
133 | Y += velocity
134 | Y = Y - torch.mean(Y, 0)
135 | error = torch.sum(P * torch.log(P/Q_n))
136 | print("Iteration %s, error %s" % (iter_num, error))
137 | return Y
138 |
139 | if __name__ == '__main__':
140 | data = load_diabetes()
141 | torch.manual_seed(42)
142 | X = torch.tensor(data.data, dtype=torch.double)
143 | print(max(X[1,:]))
144 | y = torch.tensor(data.target)
145 | print(y.shape)
146 | tsne = TSNE(n_components=2)
147 | tsne.fit_transform(X)
148 |
--------------------------------------------------------------------------------
/Day-10-Lasso-Ridge-Regression/Lasso_Ridge_Regression.py:
--------------------------------------------------------------------------------
1 | """
2 | Reference: https://github.com/eriklindernoren/ML-From-Scratch
3 | This github repository implements high quality code as we see in official libraries like sklearn etc.
4 | Great reference to kickstart your journey for ML programming.
5 | """
6 | import torch
7 | from sklearn.datasets import load_boston
8 | from itertools import combinations_with_replacement
9 | from sklearn.metrics import accuracy_score
10 | from sklearn.model_selection import train_test_split
11 | import seaborn as sb
12 | import matplotlib.pyplot as plt
13 |
14 | class LassoRegularization:
15 | def __init__(self, alpha):
16 | """
17 | :param alpha:
18 | * When 0, the lasso regression turns into Linear Regression
19 | * When increases towards infinity, it turns features coefficients into zero.
20 | * Try out different value to find out optimized values.
21 | """
22 | self.alpha = alpha
23 |
24 | def __call__(self, w):
25 | """
26 | :param w: Weight vector
27 | :return: Penalization value for MSE
28 | """
29 | return self.alpha * torch.norm(w, p=1)
30 |
31 | def grad(self, w):
32 | """
33 | :param w: weight vector
34 | :return: weight update based on sign value, it helps in removing coefficients from W vector
35 | torch.sign:
36 | a
37 | tensor([ 0.7000, -1.2000, 0.0000, 2.3000])
38 | torch.sign(a)
39 | tensor([ 1., -1., 0., 1.])
40 | """
41 | return self.alpha * torch.sign(w)
42 |
43 | class RidgeRegularization:
44 | def __init__(self, alpha):
45 | """
46 | :param alpha:
47 | * When 0, the lasso regression turns into Linear Regression
48 | * When increases towards infinity, it turns features coefficients into zero.
49 | * Try out different value to find out optimized values.
50 | """
51 | self.alpha = alpha
52 |
53 | def __call__(self, w):
54 | """
55 | :param w: Weight vector
56 | :return: Penalization value for MSE
57 | """
58 | return self.alpha * 0.5 * torch.mm(w.T, w)
59 |
60 | def grad(self, w):
61 | """
62 | :param w: weight vector
63 | :return: weight update based on sign value, it helps in reducing the coefficient values from W vector
64 | """
65 | return self.alpha * w
66 |
67 | class Regression:
68 | def __init__(self, learning_rate, epochs, regression_type='lasso'):
69 | """
70 | :param learning_rate: constant step while updating weight
71 | :param epochs: Number of epochs the data is passed through the model
72 | Initalizing regularizer for Lasso Regression.
73 | """
74 | self.lr = learning_rate
75 | self.epochs = epochs
76 | if regression_type == 'lasso':
77 | self.regularization = LassoRegularization(alpha=1.0)
78 | else:
79 | self.regularization = RidgeRegularization(alpha=2.0)
80 |
81 | def normalization(self, X):
82 | """
83 | :param X: Input tensor
84 | :return: Normalized input using l2 norm.
85 | """
86 | l2 = torch.norm(X, p=2, dim=-1)
87 | l2[l2 == 0] = 1
88 | return X / l2.unsqueeze(1)
89 |
90 | def polynomial_features(self, X, degree):
91 | """
92 | It creates polynomial features from existing set of features. For instance,
93 | X_1, X_2, X_3 are available features, then polynomial features takes combinations of
94 | these features to create new feature by doing X_1*X_2, X_1*X_3, X_2*X3.
95 |
96 | combinations output: [(), (0,), (1,), (2,), (3,), (0, 0), (0, 1), (0, 2), (0, 3),
97 | (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
98 | :param X: Input tensor (For Iris Dataset, (150, 4))
99 | :param degree: Polynomial degree of 2, i.e we'll have product of two feature vector at max.
100 | :return: Output tensor (After adding polynomial features, the number of features increases to 15)
101 | """
102 | n_samples, n_features = X.shape[0], X.shape[1]
103 | def index_combination():
104 | combinations = [combinations_with_replacement(range(n_features), i) for i in range(0, degree+1)]
105 | flat_combinations = [item for sublists in combinations for item in sublists]
106 | return flat_combinations
107 |
108 | combinations = index_combination()
109 | n_output_features = len(combinations)
110 | X_new = torch.empty((n_samples, n_output_features))
111 |
112 | for i, index_combs in enumerate(combinations):
113 | X_new[:, i] = torch.prod(X[:, index_combs], dim=1)
114 |
115 | X_new = X_new.type(torch.DoubleTensor)
116 | return X_new
117 |
118 | def weight_initialization(self, n_features):
119 | """
120 | :param n_features: Number of features in the data
121 | :return: creating weight vector using uniform distribution.
122 | """
123 | limit = 1 / torch.sqrt(torch.scalar_tensor(n_features))
124 | #self.w = torch.FloatTensor((n_features,)).uniform(-limit, limit)
125 | self.w = torch.distributions.uniform.Uniform(-limit, limit).sample((n_features, 1))
126 | self.w = self.w.type(torch.DoubleTensor)
127 |
128 | def fit(self, X, y):
129 | """
130 | :param X: Input tensor
131 | :param y: ground truth tensor
132 | :return: updated weight vector for prediction
133 | """
134 | self.training_error = {}
135 | self.weight_initialization(n_features=X.shape[1])
136 | for epoch in range(1, self.epochs+1):
137 | y_pred = torch.mm(X, self.w)
138 | mse = torch.mean(0.5 * (y - y_pred)**2 + self.regularization(self.w))
139 | self.training_error[epoch] = mse.item()
140 | grad_w = torch.mm(-(y - y_pred).T, X).T + self.regularization.grad(self.w)
141 | self.w -= self.lr * grad_w
142 |
143 |
144 | def predict(self, X):
145 | """
146 | :param X: input tensor
147 | :return: predicted output using learned weight vector
148 | """
149 | y_pred = torch.mm(X, self.w)
150 | return y_pred
151 |
152 | if __name__ == '__main__':
153 | boston = load_boston()
154 | torch.manual_seed(0)
155 | X = torch.tensor(boston.data, dtype=torch.double)
156 | y = torch.tensor(boston.target, dtype=torch.double).unsqueeze(1)
157 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
158 | regression = Regression(learning_rate=0.0001, epochs=3000, regression_type='lasso')
159 | regression.fit(regression.normalization(regression.polynomial_features(x_train, degree=1)), y_train)
160 | y_pred = regression.predict(regression.normalization(regression.polynomial_features(x_test, degree=1)))
161 | plt.figure(figsize=(6, 6))
162 | sb.scatterplot(list(regression.training_error.keys()), list(regression.training_error.values()))
163 | plt.show()
164 |
--------------------------------------------------------------------------------
/Day-11-Gaussian-Mixture-Model/gmm.py:
--------------------------------------------------------------------------------
1 | """
2 | Blog post GMM: https://brilliant.org/wiki/gaussian-mixture-model/
3 | """
4 | import torch
5 | import math
6 | from sklearn.datasets import load_iris
7 | from sklearn.metrics import accuracy_score
8 | from sklearn.model_selection import train_test_split
9 |
10 | class GMM:
11 | def __init__(self, k, max_epochs=100, tolerance=1e-8):
12 | """
13 | :param k: the number of clusters the algorithm will form.
14 | :param max_epochs: The number of iterations the algorithm will run for if it does
15 | not converge before that.
16 | :param tolerance: float
17 | If the difference of the results from one iteration to the next is
18 | smaller than this value we will say that the algorithm has converged.
19 | """
20 | self.k = k
21 | self.parameters = []
22 | self.max_epochs = max_epochs
23 | self.tolerance = tolerance
24 | self.responsibility = None
25 | self.responsibilities = []
26 | self.sample_assignments = None
27 |
28 | def normalization(self, X):
29 | """
30 | :param X: Input tensor
31 | :return: Normalized input using l2 norm.
32 | """
33 | l2 = torch.norm(X, p=2, dim=-1)
34 | l2[l2 == 0] = 1
35 | return X / l2.unsqueeze(1)
36 |
37 | def covariance_matrix(self, X):
38 | """
39 | :param X: Input tensor
40 | :return: cavariance of input tensor
41 | """
42 | centering_X = X - torch.mean(X, dim=0)
43 | cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1)
44 | return cov
45 |
46 | def random_gaussian_initialization(self, X):
47 | """
48 | Since we are using iris dataset, we know the no. of class is 3.
49 | We create three gaussian distribution representing each class with
50 | random sampling of data to find parameters like μ and 𝚺/N (covariance matrix)
51 | for each class
52 | :param X: input tensor
53 | :return: 3 randomly selected mean and covariance of X, each act as a separate cluster
54 | """
55 | n_samples = X.shape[0]
56 | self.prior = (1 / self.k) * torch.ones(self.k)
57 | for cls in range(self.k):
58 | parameter = {}
59 | parameter['mean'] = X[torch.randperm(n_samples)[:1]]
60 | parameter['cov'] = self.covariance_matrix(X)
61 | self.parameters.append(parameter)
62 |
63 | def multivariate_gaussian_distribution(self, X, parameters):
64 | """
65 | Checkout the equation from Multi-Dimensional Model from blog link posted above.
66 | We find the likelihood of each sample w.r.t to the parameters initialized above for each separate cluster.
67 | :param X: Input tensor
68 | :param parameters: mean, cov of the randomly initialized gaussian
69 | :return: Likelihood of each sample belonging to a cluster with random initialization of mean and cov.
70 | Since it is a multivariate problem we have covariance and not variance.
71 | """
72 | n_features = X.shape[1]
73 | mean = parameters['mean']
74 | cov = parameters['cov']
75 | determinant = torch.det(cov)
76 | likelihoods = torch.zeros(X.shape[0])
77 | for i, sample in enumerate(X):
78 | dim = torch.scalar_tensor(n_features, dtype=torch.float)
79 | coefficients = 1.0/ torch.sqrt(torch.pow((2.0 * math.pi), dim) * determinant)
80 | exponent = torch.exp( -0.5 * torch.mm(torch.mm((sample - mean) ,torch.pinverse(cov)) , (sample - mean).T))
81 | likelihoods[i] = coefficients * exponent
82 |
83 | return likelihoods
84 |
85 | def get_likelihood(self, X):
86 | """
87 | Previously, we have initialized 3 different mean and covariance in random_gaussian_initialization(). Now around
88 | each of these mean and cov, we see likelihood of the each sample using multivariate gaussian distribution.
89 | :param X:
90 | :return: Storing the likelihood of each sample belonging to a cluster with random initialization of mean and cov.
91 | Since it is a multivariate problem we have covariance and not variance.
92 | """
93 | n_samples = X.shape[0]
94 | likelihoods_cls = torch.zeros((n_samples, self.k))
95 | for cls in range(self.k):
96 | likelihoods_cls[:, cls] = self.multivariate_gaussian_distribution(X, self.parameters[cls])
97 |
98 | return likelihoods_cls
99 |
100 | def expectation(self, X):
101 | """
102 | Expectation Maximization Algorithm is used to find the optimized value of randomly initialized mean and cov.
103 | Expectation refers to probability. Here, It calculates the probabilities of X belonging to different cluster.
104 | :param X: input tensor
105 | :return: Max probability of each sample belonging to a particular class.
106 | """
107 | weighted_likelihood = self.get_likelihood(X) * self.prior
108 | sum_likelihood = torch.sum(weighted_likelihood, dim=1).unsqueeze(1)
109 | # Determine responsibility as P(X|y)*P(y)/P(X)
110 | # responsibility stores each sample's probability score corresponding to each class
111 | self.responsibility = weighted_likelihood /sum_likelihood
112 | # Assign samples to cluster that has largest probability
113 | self.sample_assignments = self.responsibility.argmax(dim=1)
114 | # Save value for convergence check
115 | self.responsibilities.append(torch.max(self.responsibility, dim=1))
116 |
117 | def maximization(self, X):
118 | """
119 | Iterate through clusters and updating mean and covariance.
120 | Finding updated mean and covariance using probability score of each sample w.r.t each class
121 | :param X:
122 | :return: Updated mean, covariance and priors
123 | """
124 | for i in range(self.k):
125 | resp = self.responsibility[:, i].unsqueeze(1)
126 | mean = torch.sum(resp * X, dim=0) / torch.sum(resp)
127 | covariance = torch.mm((X - mean).T, (X - mean) * resp) / resp.sum()
128 | self.parameters[i]['mean'], self.parameters[i]['cov'] = mean.unsqueeze(0), covariance
129 |
130 | n_samples = X.shape[0]
131 | self.prior = self.responsibility.sum(dim=0) / n_samples
132 |
133 | def convergence(self, X):
134 | """Convergence if || likehood - last_likelihood || < tolerance """
135 | if len(self.responsibilities) < 2:
136 | return False
137 | difference = torch.norm(self.responsibilities[-1].values - self.responsibilities[-2].values)
138 | return difference <= self.tolerance
139 |
140 | def predict(self, X):
141 | self.random_gaussian_initialization(X)
142 |
143 | for _ in range(self.max_epochs):
144 | self.expectation(X)
145 | self.maximization(X)
146 | break
147 |
148 | if self.convergence(X):
149 | break
150 |
151 | self.expectation(X)
152 | return self.sample_assignments
153 |
154 | if __name__ == '__main__':
155 | iris = load_iris()
156 | torch.manual_seed(0)
157 | X = torch.tensor(iris.data, dtype=torch.float)
158 | y = torch.tensor(iris.target)
159 | n_classes = len(torch.unique(y))
160 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
161 | gmm = GMM(k=n_classes, max_epochs=2000)
162 | y_pred = gmm.predict(x_train)
163 | print(f'Accuracy Score: {accuracy_score(y_train, y_pred)}')
164 |
165 |
--------------------------------------------------------------------------------
/Day-03-Decision-Tree/DecisionTree.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from sklearn.datasets import load_breast_cancer
3 | from sklearn.model_selection import train_test_split
4 | from sklearn.metrics import accuracy_score
5 |
6 |
7 | class Node:
8 | def __init__(self, gini, num_samples, num_samples_per_class, predicted_class):
9 | self.gini = gini
10 | self.num_samples = num_samples
11 | self.num_samples_per_class = num_samples_per_class
12 | self.predicted_class = predicted_class
13 | self.feature_index = 0
14 | self.threshold = 0
15 | self.left = None
16 | self.right = None
17 |
18 | class DecisionTree_CART:
19 | def __init__(self, max_depth=None):
20 | self.max_depth = max_depth
21 |
22 | def fit(self, X, y):
23 | """Build decision tree classifier
24 | :argument X: Input Tensor
25 | :argument y: ground truth Tensor
26 | :variable n_classes_: Number of Classes in target variable
27 | :variable n_features_: Number of features
28 | :variable tree_: Making decision tree based on X, y along with max_depth
29 | """
30 | self.n_classes_ = len(y.unique()) # classes are assumed to go from 0 to n-1
31 | self.n_features_ = X.shape[1]
32 | self.tree_ = self._grow_tree(X, y)
33 |
34 | def _gini(self, y):
35 | """Compute Gini impurity of a non-empty node.
36 | Gini impurity is defined as Σ p(1-p) over all classes, with p the frequency of a
37 | class within the node. Since Σ p = 1, this is equivalent to 1 - Σ p^2.
38 |
39 | :var m: Sample Size
40 | """
41 | m = y.shape[0]
42 |
43 | return 1.0 - sum((torch.sum(y == c).item() // m) ** 2 for c in range(self.n_classes_))
44 |
45 | def _best_split(self, X, y):
46 | """Find the best split for a node.
47 | "Best" means that the average impurity of the two children, weighted by their
48 | population, is the smallest possible. Additionally it must be less than the
49 | impurity of the current node.
50 | To find the best split, we loop through all the features, and consider all the
51 | midpoints between adjacent training samples as possible thresholds. We compute
52 | the Gini impurity of the split generated by that particular feature/threshold
53 | pair, and return the pair with smallest impurity.
54 | Returns:
55 | best_idx: Index of the feature for best split, or None if no split is found.
56 | best_thr: Threshold to use for the split, or None if no split is found.
57 | """
58 | # Need at least two elements to split a node.
59 | m = y.shape[0]
60 | if m <= 1:
61 | return None, None
62 |
63 | # Count of each class in the current node.
64 | num_parent = [torch.sum(y == c).item() for c in range(self.n_classes_)]
65 | print(f'num_parent {num_parent}')
66 |
67 | # Gini of current node.
68 | best_gini = 1.0 - sum((n // m) ** 2 for n in num_parent)
69 | best_idx, best_thr = None, None
70 |
71 | # Loop through all features.
72 | for idx in range(self.n_features_):
73 | # Sort data along selected feature.
74 | thresholds, classes = zip(*sorted(zip(X[:, idx], y)))
75 |
76 | # We could actually split the node according to each feature/threshold pair
77 | # and count the resulting population for each class in the children, but
78 | # instead we compute them in an iterative fashion, making this for loop
79 | # linear rather than quadratic.
80 | num_left = [0] * self.n_classes_
81 | num_right = num_parent.copy()
82 | for i in range(1, m): # possible split positions
83 | c = classes[i - 1]
84 | num_left[c] += 1
85 | num_right[c] -= 1
86 | gini_left = 1.0 - sum(
87 | (num_left[x] / i) ** 2 for x in range(self.n_classes_)
88 | )
89 | gini_right = 1.0 - sum(
90 | (num_right[x] // (m - i)) ** 2 for x in range(self.n_classes_)
91 | )
92 |
93 | # The Gini impurity of a split is the weighted average of the Gini
94 | # impurity of the children.
95 | gini = (i * gini_left + (m - i) * gini_right) / m
96 |
97 | # The following condition is to make sure we don't try to split two
98 | # points with identical values for that feature, as it is impossible
99 | # (both have to end up on the same side of a split).
100 | if thresholds[i] == thresholds[i - 1]:
101 | continue
102 |
103 | if gini < best_gini:
104 | best_gini = gini
105 | best_idx = idx
106 | best_thr = (thresholds[i] + thresholds[i - 1]) / 2 # midpoint
107 |
108 | print("Best Index and Threshold",best_idx, best_thr)
109 |
110 | return best_idx, best_thr
111 |
112 | def _grow_tree(self, X, y, depth=0):
113 | """Build a decision tree by recursively finding the best split."""
114 | # Population for each class in current node. The predicted class is the one with
115 | # largest population.
116 | num_samples_per_class = torch.tensor([torch.sum(y == i) for i in range(self.n_classes_)])
117 | predicted_class = torch.argmax(num_samples_per_class)
118 | node = Node(
119 | gini=self._gini(y),
120 | num_samples=y.shape[0],
121 | num_samples_per_class=num_samples_per_class,
122 | predicted_class=predicted_class,
123 | )
124 |
125 | # Split recursively until maximum depth is reached.
126 | if depth < self.max_depth:
127 | idx, thr = self._best_split(X, y)
128 | if idx is not None:
129 | indices_left = X[:, idx] < thr
130 | X_left, y_left = X[indices_left], y[indices_left]
131 | X_right, y_right = X[~indices_left], y[~indices_left]
132 | node.feature_index = idx
133 | node.threshold = thr
134 | node.left = self._grow_tree(X_left, y_left, depth + 1)
135 | node.right = self._grow_tree(X_right, y_right, depth + 1)
136 | return node
137 |
138 | def predict(self, X):
139 | return [self._predict(inputs) for inputs in X]
140 |
141 | def _predict(self, inputs):
142 | """Predict class for a single sample."""
143 | node = self.tree_
144 | while node.left:
145 | if inputs[node.feature_index] < node.threshold:
146 | node = node.left
147 | else:
148 | node = node.right
149 | return node.predicted_class
150 |
151 | if __name__ == "__main__":
152 | """
153 | :variable X: Input tensor with 30 features
154 | :target y: Output tensor with 2 classes
155 |
156 | * Converting Numpy array into torch tensor.
157 | * Creating DecisionTree Object with max_depth 5.
158 | * Fit and predict with DecisionTree Object.
159 | """
160 | breast_cancer = load_breast_cancer()
161 | X = breast_cancer['data']
162 | y = breast_cancer['target']
163 | X = torch.tensor(X)
164 | y = torch.tensor(y)
165 |
166 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
167 | classifier = DecisionTree_CART(max_depth=5)
168 | classifier.fit(x_train, y_train)
169 | y_predict = classifier.predict(x_test)
170 |
171 | print(f'Accuracy: {accuracy_score(y_test, y_predict)}')
172 |
--------------------------------------------------------------------------------
/Day-16-Bayesian-Regression/BayesianRegression.py:
--------------------------------------------------------------------------------
1 | """
2 | Checkout the below url to understand, how Bayesian regression differs from Linear Regression
3 | https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7
4 | https://dzone.com/articles/bayesian-learning-for-machine-learning-part-ii-lin
5 | """
6 | import pandas as pd
7 | import torch
8 | from scipy.stats import chi2, multivariate_normal
9 | from sklearn.model_selection import train_test_split
10 | from itertools import combinations_with_replacement
11 | import matplotlib.pyplot as plt
12 |
13 | def mean_squared_error(y_true, y_pred):
14 | """ Returns the mean squared error between y_true and y_pred """
15 | mse = torch.mean(torch.pow(y_true - y_pred, 2))
16 | return mse
17 |
18 | def polynomial_features(X, degree):
19 | """
20 | It creates polynomial features from existing set of features. For instance,
21 | X_1, X_2, X_3 are available features, then polynomial features takes combinations of
22 | these features to create new feature by doing X_1*X_2, X_1*X_3, X_2*X3.
23 |
24 | For Degree 2:
25 | combinations output: [(), (0,), (1,), (2,), (3,), (0, 0), (0, 1), (0, 2), (0, 3),
26 | (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
27 | :param X: Input tensor (For Iris Dataset, (150, 4))
28 | :param degree: Polynomial degree of 2, i.e we'll have product of two feature vector at max.
29 | :return: Output tensor (After adding polynomial features, the number of features increases to 15)
30 | """
31 | n_samples, n_features = X.shape[0], X.shape[1]
32 | def index_combination():
33 | combinations = [combinations_with_replacement(range(n_features), i) for i in range(0, degree+1)]
34 | flat_combinations = [item for sublists in combinations for item in sublists]
35 | return flat_combinations
36 |
37 | combinations = index_combination()
38 | n_output_features = len(combinations)
39 | X_new = torch.empty((n_samples, n_output_features))
40 |
41 | for i, index_combs in enumerate(combinations):
42 | X_new[:, i] = torch.prod(X[:, index_combs], dim=1)
43 |
44 | X_new = X_new.type(torch.DoubleTensor)
45 | return X_new
46 |
47 |
48 | class BayesianRegression:
49 | def __init__(self, n_draws, mu_0, omega_0, nu_0, sigma_sq_0, polynomial_degree=0, credible_interval=95):
50 | """
51 | Bayesian regression model. If poly_degree is specified the features will
52 | be transformed to with a polynomial basis function, which allows for polynomial
53 | regression. Assumes Normal prior and likelihood for the weights and scaled inverse
54 | chi-squared prior and likelihood for the variance of the weights.
55 |
56 | :param n_draws: The number of simulated draws from the posterior of the parameters.
57 | :param mu_0: The mean values of the prior Normal distribution of the parameters.
58 | :param omega_0: The precision matrix of the prior Normal distribution of the parameters.
59 | :param nu_0: The degrees of freedom of the prior scaled inverse chi squared distribution.
60 | :param sigma_sq_0: The scale parameter of the prior scaled inverse chi squared distribution.
61 | :param polynomial_degree: The polynomial degree that the features should be transformed to. Allows
62 | for polynomial regression.
63 | :param credible_interval: The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior
64 | of the parameters.
65 | """
66 | self.n_draws = n_draws
67 | self.polynomial_degree = polynomial_degree
68 | self.credible_interval = credible_interval
69 |
70 | # Prior parameters
71 | self.mu_0 = mu_0
72 | self.omega_0 = omega_0
73 | self.nu_0 = nu_0
74 | self.sigma_sq_0 = sigma_sq_0
75 |
76 | def scaled_inverse_chi_square(self, n, df, scale):
77 | """
78 | Allows for simulation from the scaled inverse chi squared
79 | distribution. Assumes the variance is distributed according to
80 | this distribution.
81 | :param n:
82 | :param df:
83 | :param scale:
84 | :return:
85 | """
86 | X = chi2.rvs(size=n, df=df)
87 | sigma_sq = df * scale / X
88 | return sigma_sq
89 |
90 | def fit(self, X, y):
91 | # For polynomial transformation
92 | if self.polynomial_degree:
93 | X = polynomial_features(X, degree=self.polynomial_degree)
94 |
95 | n_samples, n_features = X.shape[0], X.shape[1]
96 | X_X_T = torch.mm(X.T, X)
97 |
98 | # Least squares approximate of beta
99 | beta_hat = torch.mm(torch.mm(torch.pinverse(X_X_T), X.T), y)
100 |
101 | # The posterior parameters can be determined analytically since we assume
102 | # conjugate priors for the likelihoods.
103 | # Normal prior / likelihood => Normal posterior
104 | mu_n = torch.mm(torch.pinverse(X_X_T + self.omega_0), torch.mm(X_X_T, beta_hat) + torch.mm(self.omega_0, self.mu_0.unsqueeze(1)))
105 | omega_n = X_X_T + self.omega_0
106 | nu_n = self.nu_0 + n_samples
107 |
108 | # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior
109 | sigma_sq_n = (1.0/nu_n) * (self.nu_0 * self.sigma_sq_0 + torch.mm(y.T, y) + torch.mm(torch.mm(self.mu_0.unsqueeze(1).T, self.omega_0), self.mu_0.unsqueeze(1)) - torch.mm(mu_n.T, torch.mm(omega_n, mu_n)))
110 |
111 | # Simulate parameter values for n_draws
112 | beta_draws = torch.empty((self.n_draws, n_features))
113 | for i in range(self.n_draws):
114 | sigma_sq = self.scaled_inverse_chi_square(n=1, df=nu_n, scale=sigma_sq_n)
115 | beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq * torch.pinverse(omega_n))
116 | beta_draws[1, :] = torch.tensor(beta,dtype=torch.float)
117 |
118 | # Select the mean of the simulated variables as the ones used to make predictions
119 | self.w = torch.mean(beta_draws, dim=0, dtype=torch.double)
120 |
121 | # Lower and upper boundary of the credible interval
122 | l_eti = 0.50 - self.credible_interval / 2
123 | u_eti = 0.50 + self.credible_interval / 2
124 | self.eti = torch.tensor([[torch.quantile(beta_draws[:, i], q=l_eti), torch.quantile(beta_draws[:, i], q=u_eti)] for i in range(n_features)], dtype=torch.double)
125 |
126 | def predict(self, X, eti=False):
127 | if self.polynomial_degree:
128 | X = polynomial_features(X, degree=self.polynomial_degree)
129 | y_pred = torch.mm(X, self.w.unsqueeze(1))
130 | # If the lower and upper boundaries for the 95%
131 | # equal tail interval should be returned
132 | if eti:
133 | lower_w = self.eti[:, 0]
134 | upper_w = self.eti[:, 1]
135 |
136 | y_lower_prediction = torch.mm(X, lower_w.unsqueeze(1))
137 | y_upper_prediction = torch.mm(X, upper_w.unsqueeze(1))
138 |
139 | return y_pred, y_lower_prediction, y_upper_prediction
140 |
141 | return y_pred
142 |
143 | if __name__ == '__main__':
144 | data = pd.read_csv('temp.txt', sep="\t")
145 | X = torch.tensor(data["time"].values).unsqueeze(0).T
146 | y = torch.tensor(data["temp"].values).unsqueeze(0).T
147 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
148 | n_samples, n_features = X.shape[0], X.shape[1]
149 | mu_0 = torch.zeros(n_features, dtype=torch.double)
150 | omega_0 = torch.diag(torch.tensor([0.0001] * n_features, dtype=torch.double))
151 | nu_0 = 1
152 | sigma_sq_0 = 100
153 | credible_interval = 0.40
154 | classifier = BayesianRegression(n_draws=2000,
155 | polynomial_degree=4,
156 | mu_0=mu_0,
157 | omega_0=omega_0,
158 | nu_0=nu_0,
159 | sigma_sq_0=sigma_sq_0,
160 | credible_interval=credible_interval)
161 | classifier.fit(x_train, y_train)
162 | y_pred = classifier.predict(x_test)
163 | mse = mean_squared_error(y_test, y_pred)
164 | y_pred_, y_lower_, y_upper_ = classifier.predict(X=X, eti=True)
165 | print("Mean Squared Error:", mse)
166 | #
167 | # Color map
168 | cmap = plt.get_cmap('viridis')
169 |
170 | # Plot the results
171 | m1 = plt.scatter(366 * x_train, y_train, color=cmap(0.9), s=10)
172 | m2 = plt.scatter(366 * x_test, y_test, color=cmap(0.5), s=10)
173 | p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction")
174 | p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(credible_interval))
175 | p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2)
176 | plt.axis((0, 366, -20, 25))
177 | plt.suptitle("Bayesian Regression")
178 | plt.title("MSE: %.2f" % mse, fontsize=10)
179 | plt.xlabel('Day')
180 | plt.ylabel('Temperature in Celcius')
181 | plt.legend(loc='lower right')
182 | # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
183 | plt.legend(loc='lower right')
184 |
185 | plt.show()
186 |
--------------------------------------------------------------------------------