├── mlwithpytorch.png ├── Day-30-Loss-Functions └── loss.py ├── Day-26-Normalization └── normalization.py ├── Day-09-PCA └── pca.py ├── Day-01-Linear-Regression └── LinearRegression.py ├── Day-17-K-Medoids ├── utility.py └── PAM.py ├── Day-23-Gradient-Descent └── gd.py ├── Day-06-KNN └── KNN.py ├── Day-19-ElasticNet └── ElasticNetRegression.py ├── Day-12-LDA ├── lda.py └── NaiveBayes.py ├── README.md ├── Day-21-LatentDirichlet └── LDA_TopicModeling.py ├── Day-28-Activations ├── activation.py └── MLP.py ├── Day-15-MultiClassLDA └── multi-class-LDA.py ├── Day-02-Logistic-Regression └── LogisticRegression.py ├── Day-22-AffinityPropagation └── AffinityPropagation.py ├── Day-05-Naive-Bayes └── NaiveBayes.py ├── Day-29-Optimizers └── optimizer.py ├── Day-07-SVM └── svm.py ├── Day-25-RANSAC └── ransac.py ├── Day-24-Regularization └── regularization.py ├── Day-08-tf-idf └── tfidf.py ├── Day-14-DBSCAN └── dbscan.py ├── Day-27-MLP └── mlp.py ├── Day-13-Adaboost └── adaboost.py ├── Day-20-SpectralClustering └── spectralClustering.py ├── Day-04-KMeans-Clustering └── KMeans.py ├── Day-18-TSNE └── tsne.py ├── Day-10-Lasso-Ridge-Regression └── Lasso_Ridge_Regression.py ├── Day-11-Gaussian-Mixture-Model └── gmm.py ├── Day-03-Decision-Tree └── DecisionTree.py └── Day-16-Bayesian-Regression └── BayesianRegression.py /mlwithpytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mayurji/MLWithPytorch/HEAD/mlwithpytorch.png -------------------------------------------------------------------------------- /Day-30-Loss-Functions/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class MeanSquareLoss: 4 | def __init__(self): pass 5 | 6 | def loss(self, y, y_pred): 7 | return torch.sum(torch.power((y - y_pred), 2),dim=1) / y.shape[0] 8 | 9 | def gradient(self, y, y_pred): 10 | return -(y - y_pred) 11 | 12 | class CrossEntropy: 13 | def __init__(self): pass 14 | 15 | def loss(self, y, p): 16 | # Avoid division by zero 17 | p = np.clip(p, 1e-15, 1 - 1e-15) 18 | return - y * torch.log(p) - (1 - y) * torch.log(1 - p) 19 | 20 | def gradient(self, y, p): 21 | # Avoid division by zero 22 | p = torch.clip(p, 1e-15, 1 - 1e-15) 23 | return - (y / p) + (1 - y) / (1 - p) 24 | 25 | class MeanAbsoluteLoss: 26 | def __init__(self): pass 27 | 28 | def loss(self, y, y_pred): 29 | return torch.sum(torch.abs(y - y_pred), dim=1) / y.shape[0] 30 | 31 | def gradient(self, y, y_pred): 32 | return -(y - y_pred) 33 | 34 | class HuberLoss: 35 | def __init__(self):pass 36 | 37 | def loss(self, y, y_pred, delta): 38 | if torch.abs(y - y_pred) <=delta: 39 | return 0.5 * torch.pow(y - y_pred, 2) 40 | else: 41 | return (delta * torch.abs(y - y_pred)) - (0.5 * torch.pow(delta, 2)) 42 | 43 | class HingeLoss: 44 | def __init__(self): 45 | pass 46 | 47 | def loss(self, y, y_pred): 48 | return torch.max(0, (1-y) * y_pred).values 49 | 50 | class KLDivergence: 51 | def __init__(self): 52 | pass 53 | 54 | def loss(self, y, y_pred): 55 | return torch.sum(y_pred * torch.log((y_pred / y))) 56 | -------------------------------------------------------------------------------- /Day-26-Normalization/normalization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.neighbors import KNeighborsClassifier 3 | from sklearn.datasets import load_iris 4 | from sklearn.metrics import accuracy_score 5 | class Normalization: 6 | def __init__(self, X): 7 | self.X = X 8 | 9 | def z_score(self): 10 | mean = torch.mean(self.X, dim=0) 11 | return self.X.subtract(mean)/ torch.std(self.X, dim=0) 12 | 13 | def min_max(self): 14 | min = torch.min(self.X, dim=0) 15 | max = torch.max(self.X, dim=0) 16 | return self.X.subtract(min.values) / (max.values - min.values) 17 | 18 | def log_scaling(self): 19 | return torch.log(self.X) 20 | 21 | def clipping(self, max, min): 22 | if self. X > max: 23 | mask = self. X > max 24 | self.X = self.X * mask 25 | 26 | if self. X < min: 27 | mask = self. X < min 28 | self.X = self.X * mask 29 | 30 | return self.X 31 | 32 | if __name__ == '__main__': 33 | data = load_iris() 34 | X = torch.tensor(data.data) 35 | y = torch.tensor(data.target).unsqueeze(1) 36 | cls = KNeighborsClassifier() 37 | normalizer = Normalization(X) 38 | X_transform = normalizer.z_score() 39 | cls.fit(X, y) 40 | y_pred = cls.predict(X) 41 | print('Without Normalization',accuracy_score(y, y_pred)) 42 | cls.fit(X_transform, y) 43 | y_pred = cls.predict(X_transform) 44 | print('Z-Score Normalization' ,accuracy_score(y, y_pred)) 45 | X_transform = normalizer.min_max() 46 | cls.fit(X_transform, y) 47 | y_pred = cls.predict(X_transform) 48 | print('Min-Max Normalization' ,accuracy_score(y, y_pred)) 49 | X_transform = normalizer.log_scaling() 50 | cls.fit(X_transform, y) 51 | y_pred = cls.predict(X_transform) 52 | print('Log Scaling', accuracy_score(y, y_pred)) 53 | -------------------------------------------------------------------------------- /Day-09-PCA/pca.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.datasets import load_iris 3 | import seaborn as sb 4 | import matplotlib.pyplot as plt 5 | 6 | class pca: 7 | def __init__(self, n_components): 8 | """ 9 | :param n_components: Number of principal components the data should be reduced too. 10 | """ 11 | self.components = n_components 12 | 13 | def fit_transform(self, X): 14 | """ 15 | * Centering our inputs with mean 16 | * Finding covariance matrix using centered tensor 17 | * Finding eigen value and eigen vector using torch.eig() 18 | * Sorting eigen values in descending order and finding index of high eigen values 19 | * Using sorted index, get the eigen vectors 20 | * Tranforming the Input vectors with n columns into PCA components with reduced dimension 21 | :param X: Input tensor with n columns. 22 | :return: Output tensor with reduced principal components 23 | """ 24 | centering_X = X - torch.mean(X, dim=0) 25 | covariance_matrix = torch.mm(centering_X.T, centering_X)/(centering_X.shape[0] - 1) 26 | eigen_values, eigen_vectors = torch.eig(covariance_matrix, eigenvectors=True) 27 | eigen_sorted_index = torch.argsort(eigen_values[:,0],descending=True) 28 | eigen_vectors_sorted = eigen_vectors[:,eigen_sorted_index] 29 | component_vector = eigen_vectors_sorted[:,0:self.components] 30 | transformed = torch.mm(component_vector.T, centering_X.T).T 31 | return transformed 32 | 33 | if __name__ == '__main__': 34 | data = load_iris() 35 | X = torch.tensor(data.data,dtype=torch.double) 36 | y = torch.tensor(data.target) 37 | pca = pca(n_components=2) 38 | pca_vector = pca.fit_transform(X) 39 | plt.figure(figsize=(6, 6)) 40 | sb.scatterplot(pca_vector[:, 0], pca_vector[:, 1], hue=y, s=60, palette='icefire') 41 | plt.show() 42 | -------------------------------------------------------------------------------- /Day-01-Linear-Regression/LinearRegression.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class LinearRegression: 4 | 5 | def __init__(self): 6 | """ 7 | :desc lr: Learning Rate 8 | :desc iteration: Number of iterations over complete data set 9 | """ 10 | 11 | self.lr = 0.01 12 | self.iterations = 1000 13 | 14 | def y_pred(self, X, w): 15 | """ 16 | :desc w: weight tensor 17 | :desc X: input tensor 18 | """ 19 | return torch.mm(torch.transpose(w, 0, 1), X) 20 | 21 | def loss(self, ypred, y): 22 | """ 23 | :desc c: cost function - to measure the loss between estimated vs ground truth 24 | """ 25 | l = 1 / self.m * torch.sum(torch.pow(ypred - y, 2)) 26 | return l 27 | 28 | def gradient_descent(self, w, X, y, ypred): 29 | """ 30 | :desc dCdW: derivative of cost function 31 | :desc w_update: change in weight tensor after each iteration 32 | """ 33 | dCdW = 2 / self.m * torch.mm(X, torch.transpose(ypred - y, 0, 1)) 34 | w_update = w - self.lr * dCdW 35 | return w_update 36 | 37 | def run(self, X, y): 38 | """ 39 | :type y: tensor object 40 | :type X: tensor object 41 | """ 42 | bias = torch.ones((1, X.shape[1])) 43 | X = torch.cat((bias, X), dim=0) 44 | self.m = X.shape[1] 45 | self.n = X.shape[0] 46 | w = torch.zeros((self.n, 1)) 47 | 48 | for iteration in range(1, self.iterations + 1): 49 | ypred = self.y_pred(X, w) 50 | cost = self.loss(ypred, y) 51 | 52 | if iteration % 100 == 0: 53 | print(f'Loss at iteration {iteration} is {cost}') 54 | w = self.gradient_descent(w, X, y, ypred) 55 | 56 | return w 57 | 58 | 59 | if __name__ == '__main__': 60 | """ 61 | :desc X: random initialization of input tensor 62 | :desc y: random initialization of output tensor 63 | """ 64 | X = torch.rand(1, 500) 65 | y = 2 * X + 3 + torch.randn(1, 500) * 0.1 66 | regression = LinearRegression() 67 | w = regression.run(X, y) 68 | -------------------------------------------------------------------------------- /Day-17-K-Medoids/utility.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.metrics import accuracy_score 3 | import numpy as np 4 | 5 | class SquareLoss: 6 | def __init__(self): 7 | pass 8 | 9 | def loss(self, y, y_pred): 10 | return 0.5 * torch.pow((y - y_pred), 2) 11 | 12 | def gradient(self, y, y_pred): 13 | return -(y - y_pred) 14 | 15 | class CrossEntropy: 16 | def __init__(self): 17 | pass 18 | 19 | def loss(self, y, p): 20 | p = torch.clip(p, 1e-15, 1 - 1e-15) 21 | return - y * torch.log(p) - (1 - y) * torch.log(1 - p) 22 | 23 | def accuracy(self, y, p): 24 | return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1)) 25 | 26 | def gradient(self, y, p): 27 | p = torch.clip(p, 1e-15, 1 - 1e-15) 28 | return -(y/p) + (1-y) / (1-p) 29 | 30 | def euclidean_distance(x1, x2): 31 | """ 32 | :param x1: input tensor 33 | :param x2: input tensor 34 | :return: distance between tensors 35 | """ 36 | 37 | return torch.cdist(x1.unsqueeze(0), x2.unsqueeze(0)) 38 | 39 | def to_categorical(X, n_columns=None): 40 | if not n_columns: 41 | n_columns = torch.amax(X) + 1 42 | one_hot = torch.zeros((X.shape[0], n_columns)) 43 | one_hot[torch.arange(X.shape[0])] = 1 44 | return one_hot 45 | 46 | def mean_squared_error(y_true, y_pred): 47 | mse = torch.mean(torch.pow(y_true - y_pred, 2)) 48 | return mse 49 | 50 | def divide_on_feature(X, feature_i, threshold): 51 | 52 | split_func = None 53 | if isinstance(threshold, int) or isinstance(threshold, float): 54 | split_func = lambda sample: sample[feature_i] >= threshold 55 | else: 56 | split_func = lambda sample: sample[feature_i] == threshold 57 | 58 | 59 | X_1 = torch.tensor([sample.numpy() for sample in X if split_func(sample)]) 60 | X_2 = torch.tensor([sample.numpy() for sample in X if not split_func(sample)]) 61 | 62 | return np.array([X_1.numpy(), X_2.numpy()], dtype='object') 63 | 64 | def calculate_variance(X): 65 | mean = torch.ones(X.shape) * torch.mean(X, dim=0) 66 | n_samples = X.shape[0] 67 | variance = (1/ n_samples) * torch.diag(torch.mm((X-mean).T, (X-mean))) 68 | return variance 69 | -------------------------------------------------------------------------------- /Day-23-Gradient-Descent/gd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from sklearn.datasets import load_boston 4 | 5 | class GradientDescent: 6 | def __init__(self, learning_rate=0.01, max_iterations=100): 7 | self.lr = learning_rate 8 | self.max_iterations = max_iterations 9 | 10 | def normalization(self, X): 11 | """ 12 | :param X: Input tensor 13 | :return: Normalized input using l2 norm. 14 | """ 15 | l2 = torch.norm(X, p=2, dim=-1) 16 | l2[l2 == 0] = 1 17 | return X / l2.unsqueeze(1) 18 | 19 | def compute_error(self, b, m, X, y): 20 | total_error = 0 21 | for i in range(0, X.shape[0]): 22 | total_error += (y - (torch.mm(m , X.T)) + b) ** 2 23 | return total_error / float(X.shape[0]) 24 | 25 | def step(self, b_curr, m_curr, X, y, learning_rate): 26 | b_gradient = 0 27 | m_gradient = 0 28 | N = float(X.shape[0]) 29 | for i in range(X.shape[0]): 30 | b_gradient += -(2/N) * torch.sum(y - (torch.mm(X, m_curr.T) + b_curr), dim=0) 31 | m_gradient += -(2/N) * torch.sum(torch.mm(X.T, (y - (torch.mm(X, m_curr.T) + b_curr))), dim=0) 32 | 33 | new_b = b_curr - (learning_rate * b_gradient) 34 | new_m = m_curr - (learning_rate * m_gradient) 35 | return [new_b, new_m] 36 | 37 | def gradient_descent(self, X, y, start_b, start_m): 38 | b = start_b 39 | m = start_m 40 | for i in range(self.max_iterations): 41 | b, m = self.step(b_curr=b, m_curr=m, X=X, y=y, learning_rate=self.lr) 42 | 43 | return b, m 44 | 45 | if __name__ == '__main__': 46 | data = load_boston() 47 | X = torch.tensor(data.data) 48 | y = torch.tensor(data.target).unsqueeze(1) 49 | initial_b = 0.0 50 | initial_m = torch.zeros((X.shape[1], 1), dtype=torch.double).T 51 | nn.init.normal(initial_m) 52 | gd = GradientDescent(learning_rate=0.0001,max_iterations=100) 53 | gd.compute_error(X=gd.normalization(X), y=y, b=initial_b, m=initial_m) 54 | bias, slope = gd.gradient_descent(gd.normalization(X), y, start_b=initial_b, start_m=initial_m) 55 | X = gd.normalization(X) 56 | print('y: ', y[0].item()) 57 | print('y_pred: ', (torch.mm(slope, X[0].unsqueeze(0).T)+bias).item()) 58 | 59 | -------------------------------------------------------------------------------- /Day-06-KNN/KNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from scipy.stats import mode 3 | from sklearn.datasets import load_iris 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import accuracy_score 6 | 7 | class KNN: 8 | def __init__(self, k, X): 9 | """ 10 | :param k: Number of Neighbors 11 | """ 12 | self.k = k 13 | 14 | def distance(self, point_1, point_2, default='euclidean', p=2): 15 | if default == 'euclidean': 16 | return torch.norm(point_1 - point_2, 2, 0) 17 | elif default == 'manhattan': 18 | return torch.sum(torch.abs(point_1 - point_2)) 19 | elif default == "minkowski": 20 | return torch.pow(torch.sum(torch.abs(point_1 - point_2)**p), 1/p) 21 | else: 22 | raise ValueError("Unknown similarity distance type") 23 | 24 | def fit_predict(self, X, y, item): 25 | """ 26 | * Iterate through each datapoints (item/y_test) that needs to be classified 27 | * Find distance between all train data points and each datapoint (item/y_test) 28 | using euclidean distance 29 | * Sort the distance using argsort, it gives indices of the y_test 30 | * Find the majority label whose distance closest to each datapoint of y_test. 31 | 32 | 33 | :param X: Input tensor 34 | :param y: Ground truth label 35 | :param item: tensors to be classified 36 | :return: predicted labels 37 | """ 38 | y_predict = [] 39 | for i in item: 40 | point_distances = [] 41 | for ipt in range(X.shape[0]): 42 | distances = self.distance(X[ipt, :], i) 43 | point_distances.append(distances) 44 | 45 | point_distances = torch.tensor(point_distances) 46 | k_neighbors = torch.argsort(point_distances)[:self.k] 47 | y_label = y[k_neighbors] 48 | major_class = mode(y_label) 49 | major_class = major_class.mode[0] 50 | y_predict.append(major_class) 51 | 52 | return torch.tensor(y_predict) 53 | 54 | if __name__ == '__main__': 55 | iris = load_iris() 56 | X = torch.tensor(iris.data) 57 | y = torch.tensor(iris.target) 58 | torch.manual_seed(0) 59 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 60 | knn = KNN(k=5, X=x_train) 61 | y_pred = knn.fit_predict(x_train, y_train, x_test) 62 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}') 63 | -------------------------------------------------------------------------------- /Day-19-ElasticNet/ElasticNetRegression.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.datasets import load_boston 3 | from sklearn.model_selection import train_test_split 4 | 5 | class ElasticNetRegression: 6 | def __init__(self, learning_rate, max_iterations, l1_penality, l2_penality): 7 | self.lr = learning_rate 8 | self.max_iterations = max_iterations 9 | self.l1_penality = l1_penality 10 | self.l2_penality = l2_penality 11 | 12 | def normalization(self, X): 13 | """ 14 | :param X: Input tensor 15 | :return: Normalized input using l2 norm. 16 | """ 17 | l2 = torch.norm(X, p=2, dim=-1) 18 | l2[l2 == 0] = 1 19 | return X / l2.unsqueeze(1) 20 | 21 | def fit(self, X, y): 22 | self.m, self.n = X.shape 23 | self.w = torch.zeros(self.n, dtype=torch.double).unsqueeze(1) 24 | self.b = 0.0 25 | self.X = X 26 | self.y = y 27 | for i in range(self.max_iterations): 28 | self.update_weights() 29 | 30 | return self 31 | 32 | def update_weights(self): 33 | y_pred = self.predict(self.X) 34 | dw = torch.zeros(self.n).unsqueeze(1) 35 | for j in range(self.n): 36 | if self.w[j] > 0: 37 | dw[j] = ( - (2* torch.mm(self.X[:, j].unsqueeze(0), (self.y - y_pred)) + self.l1_penality + 2 * self.l2_penality * self.w[j])) / self.m 38 | else: 39 | dw[j] = (- (2 * torch.mm(self.X[:, j].unsqueeze(0), (self.y - y_pred)) - self.l1_penality + 2 * self.l2_penality * self.w[j])) / self.m 40 | 41 | db = -2 * torch.sum(self.y - y_pred) / self.m 42 | self.w = self.w - self.lr * dw 43 | self.b = self.b - self.lr * db 44 | return self 45 | 46 | def predict(self, X): 47 | return torch.mm(X, self.w) + self.b 48 | 49 | if __name__ == '__main__': 50 | data = load_boston() 51 | regression = ElasticNetRegression(max_iterations=1000, learning_rate=0.001, l1_penality=500, l2_penality=1) 52 | X, y = regression.normalization(torch.tensor(data.data, dtype=torch.double)), torch.tensor(data.target).unsqueeze(1) 53 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 54 | regression.fit(x_train, y_train) 55 | Y_pred = regression.predict(x_test) 56 | print("Predicted values: ", torch.round(Y_pred[:3])) 57 | print("Real values: ", y_test[:3]) 58 | print("Trained W: ", torch.round(regression.w[0])) 59 | print("Trained b: ", torch.round(regression.b)) 60 | 61 | -------------------------------------------------------------------------------- /Day-12-LDA/lda.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from NaiveBayes import NaiveBayes 3 | from sklearn.datasets import load_breast_cancer 4 | from sklearn.preprocessing import MinMaxScaler 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import accuracy_score 7 | 8 | class LDA: 9 | def __init__(self): 10 | self.w = None 11 | 12 | def covariance_matrix(self, X): 13 | """ 14 | :param X: Input tensor 15 | :return: cavariance of input tensor 16 | """ 17 | centering_X = X - torch.mean(X, dim=0) 18 | cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1) 19 | return cov 20 | 21 | def fit(self, X, y): 22 | """ 23 | :param X: Input tensor 24 | :param y: output tensor 25 | :return: transformation vector - to convert high dimensional input space into lower dimensional 26 | subspace. 27 | X1, X2 are samples based on class. cov_1 and cov_2 measures how features of samples of each class are related. 28 | 29 | """ 30 | X1 = X[y==0] 31 | X2 = X[y==1] 32 | cov_1 = self.covariance_matrix(X1) 33 | cov_2 = self.covariance_matrix(X2) 34 | cov_total = cov_1 + cov_2 35 | mean1 = torch.mean(X1, dim=0) 36 | mean2 = torch.mean(X2, dim=0) 37 | mean_diff = mean1 - mean2 38 | 39 | # Determine the vector which when X is projected onto it best separates the 40 | # data by class. w = (mean1 - mean2) / (cov1 + cov2) 41 | self.w = torch.mm(torch.pinverse(cov_total), mean_diff.unsqueeze(1)) 42 | 43 | def transform(self, X, y): 44 | self.fit(X, y) 45 | X_transformed = torch.mm(X, self.w) 46 | return X_transformed 47 | 48 | def predict(self, X): 49 | y_pred = [] 50 | for sample in X: 51 | h = torch.mm(sample.unsqueeze(0), self.w) 52 | y = 1 * (h < 0) 53 | y_pred.append(y) 54 | 55 | return y_pred 56 | 57 | if __name__ == '__main__': 58 | breast_cancer = load_breast_cancer() 59 | X = breast_cancer.data 60 | X_normalized = MinMaxScaler().fit_transform(X) 61 | X = torch.tensor(X_normalized) 62 | y = torch.tensor(breast_cancer.target)#.unsqueeze(1) 63 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 64 | lda = LDA() 65 | X_transformed = lda.transform(x_train, y_train) 66 | GNB = NaiveBayes(X_transformed, y_train) 67 | GNB.find_mu_and_sigma(X_transformed, y_train) 68 | X_test_transformed = lda.transform(x_test, y_test) 69 | y_pred = GNB.predict_probability(X_test_transformed) 70 | print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}') 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Machine Learning Using Pytorch 2 | 3 | 4 | 5 | The objective of the repository is to learn and build machine learning models using Pytorch. 6 | 7 | ![GitHub stars](https://img.shields.io/github/stars/Mayurji/MLWithPytorch?style=social) 8 | ![GitHub forks](https://img.shields.io/github/forks/Mayurji/MLWithPytorch?style=social) 9 | ![GitHub watchers](https://img.shields.io/github/watchers/Mayurji/MLWithPytorch?style=social) 10 | ![GitHub followers](https://img.shields.io/github/followers/Mayurji?style=social) 11 | 12 | ![GitHub repo size](https://img.shields.io/github/repo-size/Mayurji/MLWithPytorch?style=plastic) 13 | ![GitHub language count](https://img.shields.io/github/languages/count/Mayurji/MLWithPytorch?style=plastic) 14 | ![GitHub top language](https://img.shields.io/github/languages/top/Mayurji/MLWithPytorch?style=plastic) 15 | ![GitHub last commit](https://img.shields.io/github/last-commit/Mayurji/MLWithPytorch?color=red&style=plastic) 16 | 17 | 18 | Buy Me A Coffee donate button 19 | 20 | 21 | Patreon donate button 22 | 23 | 24 | ![MLWithPyTorch](/mlwithpytorch.png) 25 | 26 | **List of Algorithms Covered** 27 | 28 | 📌 Day 1 - Linear Regression \ 29 | 📌 Day 2 - Logistic Regression \ 30 | 📌 Day 3 - Decision Tree \ 31 | 📌 Day 4 - KMeans Clustering \ 32 | 📌 Day 5 - Naive Bayes \ 33 | 📌 Day 6 - K Nearest Neighbour (KNN) \ 34 | 📌 Day 7 - Support Vector Machine \ 35 | 📌 Day 8 - Tf-Idf Model \ 36 | 📌 Day 9 - Principal Components Analysis \ 37 | 📌 Day 10 - Lasso and Ridge Regression \ 38 | 📌 Day 11 - Gaussian Mixture Model \ 39 | 📌 Day 12 - Linear Discriminant Analysis \ 40 | 📌 Day 13 - Adaboost Algorithm \ 41 | 📌 Day 14 - DBScan Clustering \ 42 | 📌 Day 15 - Multi-Class LDA \ 43 | 📌 Day 16 - Bayesian Regression \ 44 | 📌 Day 17 - K-Medoids \ 45 | 📌 Day 18 - TSNE \ 46 | 📌 Day 19 - ElasticNet Regression \ 47 | 📌 Day 20 - Spectral Clustering \ 48 | 📌 Day 21 - Latent Dirichlet \ 49 | 📌 Day 22 - Affinity Propagation \ 50 | 📌 Day 23 - Gradient Descent Algorithm \ 51 | 📌 Day 24 - Regularization Techniques \ 52 | 📌 Day 25 - RANSAC Algorithm \ 53 | 📌 Day 26 - Normalizations \ 54 | 📌 Day 27 - Multi-Layer Perceptron \ 55 | 📌 Day 28 - Activations \ 56 | 📌 Day 29 - Optimizers \ 57 | 📌 Day 30 - Loss Functions 58 | 59 | ### Let me know if there is any correction. Feedback is welcomed. 60 | 61 | ## References 62 | 63 | * Sklearn Library 64 | * ML-Glossary 65 | * ML From Scratch (Github) 66 | -------------------------------------------------------------------------------- /Day-21-LatentDirichlet/LDA_TopicModeling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.datasets import fetch_20newsgroups 5 | from sklearn.feature_extraction.text import CountVectorizer 6 | 7 | class LatentDirichlet: 8 | def __init__(self, D, V, T): 9 | self.D = len(D) 10 | self.V = len(V) 11 | self.T = T 12 | self.alpha = 1 / T 13 | self.beta = 1 / T 14 | 15 | def fit_transform(self, documents): 16 | z_d_n = [[0 for _ in range(len(d))] for d in documents] 17 | theta_d_z = torch.zeros((self.D, self.T)) 18 | phi_z_w = torch.zeros((self.T, self.V)) 19 | n_z = torch.zeros((self.T)) 20 | n_d = torch.zeros((self.D)) 21 | 22 | for d, doc in enumerate(documents): 23 | for n, w in enumerate(doc): 24 | z_d_n[d][n] = n % self.T 25 | z = z_d_n[d][n] 26 | theta_d_z[d][z] += 1 27 | phi_z_w[z, w] += 1 28 | n_z[z] += 1 29 | n_d[d] += 1 30 | 31 | for iter in range(10): 32 | for d, doc in enumerate(documents): 33 | for n,w in enumerate(doc): 34 | z = z_d_n[d][n] 35 | theta_d_z[d][z] -= 1 36 | phi_z_w[z, w] -= 1 37 | n_z[z] -= 1 38 | p_d_t = (theta_d_z[d] + self.alpha) / (n_d[d] - 1 + self.T * self.alpha) 39 | p_t_w = (phi_z_w[:, w] + self.beta) / (n_z + self.V * self.beta) 40 | p_z = p_d_t * p_t_w 41 | p_z /= torch.sum(p_z) 42 | new_z = torch.multinomial(p_z, 1) 43 | z_d_n[d][n] = new_z[0] 44 | theta_d_z[d][new_z] += 1 45 | phi_z_w[new_z, w] += 1 46 | n_z[new_z] += 1 47 | 48 | return theta_d_z, phi_z_w 49 | 50 | if __name__ == '__main__': 51 | n_samples = 10000 52 | documents = [] 53 | data, _ = fetch_20newsgroups(shuffle=True, random_state=2, 54 | remove=('headers', 'footers', 'quotes'), return_X_y=True) 55 | data_samples = data[:n_samples] 56 | cnt_vectorizer = CountVectorizer(max_df=0.95, min_df=2, 57 | max_features=10000, 58 | stop_words='english') 59 | vectorizer = cnt_vectorizer.fit_transform(data_samples) 60 | vocabulary = cnt_vectorizer.vocabulary_ 61 | for row in vectorizer.toarray(): 62 | present_words = np.where(row != 0)[0].tolist() 63 | present_words_with_count = [] 64 | for w_i in present_words: 65 | for count in range(row[w_i]): 66 | present_words_with_count.append(w_i) 67 | documents.append(present_words_with_count) 68 | 69 | LD = LatentDirichlet(D=documents, V=vocabulary, T=20) 70 | topic_distribution, word_distribution = LD.fit_transform(documents) 71 | i = 1 72 | plt.plot(topic_distribution[i] / sum(topic_distribution[i])); 73 | plt.title("Topic distribution $theta_i$ for document {}".format(i)); 74 | plt.show() 75 | -------------------------------------------------------------------------------- /Day-28-Activations/activation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.datasets import load_digits 3 | from sklearn.model_selection import train_test_split 4 | from MLP import MultiLayerPerceptron, CrossEntropy, normalization, accuracy_score, to_categorical 5 | 6 | class Sigmoid: 7 | def __call__(self, X): 8 | return 1 / (1 + torch.exp(-X)) 9 | 10 | def gradient(self, X): 11 | return self.__call__(X) * (1 - self.__call__(X)) 12 | 13 | class Softmax: 14 | def __call__(self, X): 15 | e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True).values) 16 | return e_x / torch.sum(e_x, dim=1, keepdim=True) 17 | 18 | def gradient(self, X): 19 | p = self.__call__(X) 20 | return p * (1 - p) 21 | 22 | class TanH: 23 | def __call__(self, X): 24 | return 2 / (1 + torch.exp(-2 * X)) - 1 25 | 26 | def gradient(self,X): 27 | return 1 - torch.pow(self.__call__(X), 2) 28 | 29 | class Relu: 30 | def __call__(self, X): 31 | return torch.where(X>0.0, X, 0.0) 32 | 33 | def gradient(self, X): 34 | return torch.where(X >=0.0, 1.0, 0.0) 35 | 36 | class LeakyRelu: 37 | def __init__(self, alpha): 38 | self.alpha = alpha 39 | 40 | def __call__(self, X): 41 | return torch.where(X > 0.0, X, self.alpha * X) 42 | 43 | def gradient(self, X): 44 | return torch.where(X > 0.0, 1.0, self.alpha) 45 | 46 | class ELU: 47 | def __init__(self, alpha): 48 | self.alpha = alpha 49 | 50 | def __call__(self, X): 51 | return torch.where(X>=0.0, X, self.alpha * (torch.exp(X) - 1)) 52 | 53 | def gradient(self, X): 54 | return torch.where(X >= 0.0, 1.0, self.__call__(X) + self.alpha) 55 | 56 | class SELU(): 57 | def __init__(self): 58 | self.alpha = 1.6732632423543772848170429916717 59 | self.scale = 1.0507009873554804934193349852946 60 | 61 | def __call__(self, x): 62 | return self.scale * torch.where(x >= 0.0, x, self.alpha*(torch.exp(x)-1)) 63 | 64 | def gradient(self, x): 65 | return self.scale * torch.where(x >= 0.0, 1.0, self.alpha * torch.exp(x)) 66 | 67 | class SoftPlus(): 68 | def __call__(self, x): 69 | return torch.log(1 + torch.exp(x)) 70 | 71 | def gradient(self, x): 72 | return 1 / (1 + torch.exp(-x)) 73 | 74 | if __name__ == '__main__': 75 | data = load_digits() 76 | X = normalization(torch.tensor(data.data, dtype=torch.double)) 77 | y = torch.tensor(data.target) 78 | 79 | # Convert the nominal y values to binary 80 | y = to_categorical(y) 81 | 82 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) 83 | # MLP 84 | clf = MultiLayerPerceptron(n_hidden=16, 85 | n_iterations=1000, 86 | learning_rate=0.01, activation_function_hidden_layer=Sigmoid(), 87 | activation_function_output_layer=Softmax()) 88 | 89 | clf.fit(X_train, y_train) 90 | y_pred = torch.argmax(clf.predict(X_test), dim=1) 91 | y_test = torch.argmax(y_test, dim=1) 92 | 93 | accuracy = accuracy_score(y_test, y_pred) 94 | print("Accuracy:", accuracy) 95 | -------------------------------------------------------------------------------- /Day-15-MultiClassLDA/multi-class-LDA.py: -------------------------------------------------------------------------------- 1 | """ 2 | Checkout below url on Multi-Class LDA 3 | https://multivariatestatsjl.readthedocs.io/en/latest/mclda.html 4 | """ 5 | import torch 6 | from sklearn.datasets import load_iris 7 | import matplotlib.pyplot as plt 8 | 9 | class MultiClassLDA: 10 | def __init__(self, solver='svd'): 11 | self.solver = solver 12 | 13 | def covariance_matrix(self, X): 14 | """ 15 | :param X: Input tensor 16 | :return: cavariance of input tensor 17 | """ 18 | centering_X = X - torch.mean(X, dim=0) 19 | cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1) 20 | return cov 21 | 22 | def scatter_matrix(self, X, y): 23 | """ 24 | :param X: Input tensor 25 | :param y: Output tensor 26 | :return: How features are related to each other in within-class distribution and between class distribution 27 | """ 28 | n_features = X.shape[1] 29 | labels = y.unique() 30 | 31 | # Within-Class Scatter Matrix 32 | sw = torch.zeros((n_features, n_features)) 33 | for label in labels: 34 | X_class = X[y==label] 35 | sw += (X_class.shape[0] - 1) * self.covariance_matrix(X_class) 36 | 37 | # Between-Class Scatter Matrix 38 | n_samples_mean = torch.sum(X, dim=0) 39 | sb = torch.zeros((n_features, n_features)) 40 | for label in labels: 41 | X_class = X[y==label] 42 | mean_class = torch.mean(X_class, dim=0).unsqueeze(0) 43 | 44 | sb += (X_class.shape[0]) * torch.mm((mean_class - n_samples_mean), (mean_class - n_samples_mean).T) 45 | 46 | return sw, sb 47 | 48 | def transform(self, X, y, n_components): 49 | """ 50 | And Why Inverse, In matrices, there is no concepts of division, thus multiplying with inverse 51 | matrix helps in acheiving what division does. 52 | :param X: 53 | :param y: 54 | :param n_components: Transforming from high dimension data to lower dimension n_components. 55 | :return: Transformed set of low dimensional X matrix 56 | """ 57 | sw, sb = self.scatter_matrix(X, y) 58 | A = torch.mm(torch.pinverse(sw), sb) 59 | eigen_values, eigen_vectors = torch.eig(A, eigenvectors=True) 60 | eigen_sorted_index = torch.argsort(eigen_values[:, 0], descending=True) 61 | eigen_vectors_sorted = eigen_vectors[:, eigen_sorted_index] 62 | component_vector = eigen_vectors_sorted[:, 0:n_components] 63 | component_vector = component_vector.type(torch.DoubleTensor) 64 | transformed = torch.mm(X, component_vector) 65 | return transformed 66 | 67 | def plot_in_2d(self, X, y, title=None): 68 | """ Plot the dataset X and the corresponding labels y in 2D using the LDA 69 | transformation.""" 70 | X_transformed = self.transform(X, y, n_components=2) 71 | x1 = X_transformed[:, 0] 72 | x2 = X_transformed[:, 1] 73 | plt.scatter(x1, x2, c=y) 74 | if title: plt.title(title) 75 | plt.show() 76 | 77 | 78 | if __name__ == '__main__': 79 | data = load_iris() 80 | X = torch.tensor(data.data, dtype=torch.double) 81 | y = torch.tensor(data.target) 82 | mclda = MultiClassLDA() 83 | mclda.plot_in_2d(X, y) 84 | -------------------------------------------------------------------------------- /Day-02-Logistic-Regression/LogisticRegression.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.datasets import make_blobs 3 | 4 | class LogisticRegression: 5 | def __init__(self, X): 6 | """ 7 | :param X: Input tensor 8 | :keyword lr: learning rate 9 | :keyword epochs: number of times the model iterates over complete dataset 10 | :keyword weights: parameters learned during training 11 | :keyword bias: parameter learned during training 12 | """ 13 | self.lr = 0.1 14 | self.epochs = 1000 15 | self.m, self.n = X.shape 16 | self.weights = torch.zeros((self.n, 1), dtype=torch.double) 17 | self.bias = 0 18 | 19 | def sigmoid(self, z): 20 | """ 21 | :param z: latent variable represents (wx + b) 22 | :return: squashes the real value between 0 and 1 representing probability score. 23 | """ 24 | return 1 / (1 + torch.exp(-z)) 25 | 26 | def loss(self, yhat): 27 | """ 28 | :param yhat: Estimated y 29 | :return: Log loss - When y=1, it cancels out half function, remaining half is considered for loss calculation and vice-versa 30 | """ 31 | return -(1 / self.m) * torch.sum(y * torch.log(yhat) + (1 - y) * torch.log(1 - yhat)) 32 | 33 | def gradient(self, y_predict): 34 | """ 35 | :param y_predict: Estimated y 36 | :return: gradient is calculated to find how much change is required in parameters to reduce the loss. 37 | """ 38 | dw = 1 / self.m * torch.mm(X.T, (y_predict - y)) 39 | db = 1 / self.m * torch.sum(y_predict - y) 40 | return dw, db 41 | 42 | def run(self, X, y): 43 | """ 44 | :param X: Input tensor 45 | :param y: Output tensor 46 | :var y_predict: Predicted tensor 47 | :var cost: Difference between ground truth and predicted 48 | :var dw, db: Weight and bias update for weight tensor and bias scalar 49 | :return: updated weights and bias 50 | """ 51 | for epoch in range(1, self.epochs + 1): 52 | 53 | y_predict = self.sigmoid(torch.mm(X, self.weights) + self.bias) 54 | cost = self.loss(y_predict) 55 | dw, db = self.gradient(y_predict) 56 | 57 | self.weights -= self.lr * dw 58 | self.bias -= self.lr * db 59 | 60 | if epoch % 100 == 0: 61 | print(f"Cost after iteration {epoch}: {cost}") 62 | 63 | return self.weights, self.bias 64 | 65 | def predict(self, X): 66 | """ 67 | :param X: Input tensor 68 | :var y_predict_labels: Converts float value to int/bool true(1) or false(0) 69 | :return: outputs labels as 0 and 1 70 | """ 71 | y_predict = self.sigmoid(torch.mm(X, self.weights) + self.bias) 72 | y_predict_labels = y_predict > 0.5 73 | 74 | return y_predict_labels 75 | 76 | if __name__ == '__main__': 77 | """ 78 | :var manual_seed: for reproducing the results 79 | :desc unsqueeze: adds a dimension to the tensor at specified position. 80 | """ 81 | torch.manual_seed(0) 82 | X, y = make_blobs(n_samples=1000, centers=2) 83 | X = torch.tensor(X) 84 | y = torch.tensor(y).unsqueeze(1) 85 | lr = LogisticRegression(X) 86 | w, b = lr.run(X, y) 87 | y_predict = lr.predict(X) 88 | 89 | print(f"Accuracy: {torch.sum(y == y_predict) // X.shape[0]}") 90 | -------------------------------------------------------------------------------- /Day-22-AffinityPropagation/AffinityPropagation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reading: https://en.wikipedia.org/wiki/Affinity_propagation 3 | Applicable: Human Face Clustering 4 | """ 5 | import torch 6 | from scipy.spatial.distance import pdist,squareform 7 | from sklearn.datasets import load_breast_cancer 8 | 9 | class AffinityPropagation: 10 | def __init__(self, similariy_matrix, max_iteration=200, num_iter=5, alpha=0.5, print_every=100): 11 | """ 12 | :param similariy_matrix: 13 | :param max_iteration: 14 | :param num_iter: 15 | :param alpha: 16 | :param print_every: 17 | """ 18 | self.s = similariy_matrix 19 | self.max_iteration = max_iteration 20 | self.alpha = alpha 21 | self.print_every = print_every 22 | N, N = self.s.shape 23 | self.r = torch.zeros((N, N)) 24 | self.a = torch.zeros((N, N)) 25 | 26 | def step(self): 27 | """ 28 | :param r is responsiblity matrix, For each data point x_i, how well-suited is x_k as it exempler along with 29 | other exemplars. 30 | :param a is availability matrix, For appropriate is x_k as exemplers for x_i, while keeping other data points 31 | who keeps x_k as exemplar. 32 | :return: 33 | """ 34 | N, N = self.s.shape 35 | old_r = self.r 36 | old_a = self.a 37 | a_plus_s = self.a + self.s 38 | 39 | first_max = torch.max(a_plus_s, dim=1) 40 | first_max_indices = torch.argmax(a_plus_s, dim=1) 41 | first_max = torch.reshape(torch.repeat_interleave(first_max.values, N), (N, N)) 42 | a_plus_s[range(N), first_max_indices] = float('-inf') 43 | second_max = torch.max(a_plus_s, dim=1).values 44 | # responsibility Update 45 | r = self.s - first_max 46 | r[range(N), first_max_indices] = self.s[range(N), first_max_indices] - second_max[range(N)] 47 | r = self.alpha * old_r + (1 - self.alpha) * r 48 | rp = torch.maximum(r, torch.scalar_tensor(0)) 49 | m = rp.size(0) 50 | rp.as_strided([m], [m + 1]).copy_(torch.diag(r)) 51 | a = torch.reshape(torch.repeat_interleave(torch.sum(rp, dim=0), N),(N, N)).T - rp 52 | da = torch.diag(a) 53 | a = torch.minimum(a, torch.scalar_tensor(0)) 54 | k = a.size(0) 55 | a.as_strided([k], [k+1]).copy_(da) 56 | # Availibility Update 57 | a = self.alpha * old_a + (1 - self.alpha) * a 58 | 59 | return r, a 60 | 61 | def solve(self): 62 | for i in range(self.max_iteration): 63 | self.r, self.a = self.step() 64 | 65 | e = self.r + self.a 66 | 67 | N, N = e.shape 68 | I = torch.where(torch.diag(e) > 0)[0] 69 | K = len(I) 70 | 71 | c = self.s[:, I] 72 | c = torch.argmax(c, dim=1) 73 | c[I] = torch.arange(0, K) 74 | idx = I[c] 75 | exemplar_indices = I 76 | exemplar_assignment = idx 77 | return exemplar_indices, exemplar_assignment 78 | 79 | if __name__ == "__main__": 80 | """ 81 | :param similarity_matrix 82 | It finds the similarity between data points. 83 | 84 | """ 85 | data = load_breast_cancer() 86 | x = torch.tensor(data.data, dtype=torch.double) 87 | 88 | similarity_matrix = squareform(pdist(x, metric='euclidean')) 89 | similarity_matrix = torch.from_numpy(similarity_matrix) 90 | max_iteration = 3000 91 | affinity_prop = AffinityPropagation(similarity_matrix, max_iteration=max_iteration, 92 | alpha=0.5) 93 | indices, assignment = affinity_prop.solve() 94 | -------------------------------------------------------------------------------- /Day-12-LDA/NaiveBayes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.datasets import load_iris 4 | from sklearn.metrics import accuracy_score 5 | from sklearn.model_selection import train_test_split 6 | 7 | class NaiveBayes: 8 | def __init__(self, X, y): 9 | """ 10 | :param X: input tensor 11 | :param y: target tensor 12 | :var total_samples: Number of Samples 13 | :var feature_count: Number of Features 14 | :var mu: mean 15 | :var sigma: variance 16 | :var e: epsilon 17 | :var n_classes: number of classes 18 | 19 | why e - epsilon ? 20 | # If the ratio of data variance between dimensions is too small, it 21 | # will cause numerical errors. To address this, we artificially 22 | # boost the variance by epsilon, a small fraction of the standard 23 | # deviation of the largest dimension. 24 | """ 25 | self.total_samples, self.feature_count = X.shape[0], X.shape[1] 26 | self.mu = {} 27 | self.sigma = {} 28 | self.prior_probability_X = {} 29 | self.e = 1e-4 30 | self.n_classes = len(y.unique()) 31 | 32 | def find_mu_and_sigma(self, X, y): 33 | """ 34 | Bayes Theorem: 35 | P(Y|X) = P(X|Y) * P(Y) / P(X) 36 | 37 | :type mu: dict 38 | :type sigma: dict 39 | :type prior_probability: dict 40 | :describe mu: keys are class label and values are feature's mean values. 41 | :describe sigma: keys are class label and values are feature's variance values. 42 | :describe prior probability of x: It calculates the prior prabability of X for each class. P(X). 43 | :return: 44 | """ 45 | for cls in range(self.n_classes): 46 | X_class = X[y==cls] 47 | self.mu[cls] = torch.mean(X_class, dim=0) 48 | self.sigma[cls] = torch.var(X_class, dim=0) 49 | self.prior_probability_X[cls] = X_class.shape[0] / X.shape[0] 50 | 51 | def gaussian_naive_bayes(self, X, mu, sigma): 52 | """ 53 | :return: Multivariate normal(gaussian) distribution - Maximum Likelihood Estimation 54 | https://www.statlect.com/fundamentals-of-statistics/multivariate-normal-distribution-maximum-likelihood 55 | 56 | Log Likelihood Function = Constant - probability 57 | """ 58 | constant = -self.feature_count / 2 * torch.log(2 * torch.tensor(np.pi)) - 0.5 * torch.sum(torch.log(sigma+self.e)) 59 | probability = 0.5 * torch.sum(torch.pow(X-mu, 2) / (sigma + self.e), dim=1) 60 | return constant - probability 61 | 62 | def predict_probability(self, X): 63 | """ 64 | Calculating probabilities for each sample input in X using prior probability 65 | and gaussian density function. 66 | torch.argmax: To find the class with max-probability. 67 | Note: We are calculate log probabilities as in Sklearn's predict_log_proba, that why we have + sign between 68 | prior probabilites and likelihood (class probability). 69 | 70 | :return: 71 | """ 72 | probabilities = torch.zeros((X.shape[0], self.n_classes)) 73 | for cls in range(self.n_classes): 74 | class_probability = self.gaussian_naive_bayes(X, self.mu[cls], self.sigma[cls]) 75 | probabilities[:, cls] = class_probability + torch.log(torch.scalar_tensor(self.prior_probability_X[cls])) 76 | 77 | 78 | return torch.argmax(probabilities, dim=1) 79 | 80 | if __name__ == '__main__': 81 | iris = load_iris() 82 | X = torch.tensor(iris.data) 83 | y = torch.tensor(iris.target) 84 | torch.manual_seed(0) 85 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 86 | GNB = NaiveBayes(x_train, y_train) 87 | GNB.find_mu_and_sigma(x_train, y_train) 88 | y_pred = GNB.predict_probability(x_test) 89 | print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}') 90 | -------------------------------------------------------------------------------- /Day-28-Activations/MLP.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.datasets import load_digits 3 | from sklearn.model_selection import train_test_split 4 | def accuracy_score(y, p): 5 | accuracy = torch.sum(y == p, dim=0) / len(y) 6 | return accuracy 7 | 8 | def to_categorical(X, n_col=None): 9 | if not n_col: 10 | n_col = torch.amax(X) + 1 11 | 12 | one_hot = torch.zeros((X.shape[0], n_col)) 13 | one_hot[torch.arange(X.shape[0]), X] = 1 14 | return one_hot 15 | 16 | def normalization(X): 17 | """ 18 | :param X: Input tensor 19 | :return: Normalized input using l2 norm. 20 | """ 21 | l2 = torch.norm(X, p=2, dim=-1) 22 | l2[l2 == 0] = 1 23 | return X / l2.unsqueeze(1) 24 | 25 | class CrossEntropy: 26 | def __init__(self): 27 | pass 28 | def loss(self, y, p): 29 | p = torch.clip(p, 1e-15, 1-1e-15) 30 | return - y * torch.log(p) - (1 -y) * torch.log(1 - p) 31 | 32 | def accuracy_score(self, y, p): 33 | return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1)) 34 | 35 | def gradient(self, y, p): 36 | p = torch.clip(p, 1e-15, 1 - 1e-15) 37 | return - (y / p) + (1 - y) / (1 -p) 38 | class MultiLayerPerceptron: 39 | def __init__(self, n_hidden, activation_function_hidden_layer, activation_function_output_layer, n_iterations=1000, learning_rate=0.001): 40 | self.n_hidden = n_hidden 41 | self.n_iterations = n_iterations 42 | self.learning_rate = learning_rate 43 | self.hidden_activation = activation_function_hidden_layer 44 | self.output_activation = activation_function_output_layer 45 | self.loss = CrossEntropy() 46 | 47 | def initalize_weight(self, X, y): 48 | n_samples, n_features = X.shape 49 | _, n_outputs = y.shape 50 | limit = 1 / torch.sqrt(torch.scalar_tensor(n_features)) 51 | self.W = torch.DoubleTensor(n_features, self.n_hidden).uniform_(-limit, limit) 52 | 53 | self.W0 = torch.zeros((1, self.n_hidden)) 54 | limit = 1 / torch.sqrt(torch.scalar_tensor(self.n_hidden)) 55 | self.V = torch.DoubleTensor(self.n_hidden, n_outputs).uniform_(-limit, limit) 56 | self.V0 = torch.zeros((1, n_outputs)) 57 | 58 | def fit(self, X, y): 59 | self.initalize_weight(X, y) 60 | for i in range(self.n_iterations): 61 | hidden_input = torch.mm(X, self.W) + self.W0 62 | hidden_output = self.hidden_activation(hidden_input) 63 | 64 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0 65 | y_pred = self.output_activation(output_layer_input) 66 | 67 | grad_wrt_first_output = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) 68 | grad_v = torch.mm(hidden_output.T, grad_wrt_first_output) 69 | grad_v0 = torch.sum(grad_wrt_first_output, dim=0, keepdim=True) 70 | 71 | grad_wrt_first_hidden = torch.mm(grad_wrt_first_output, self.V.T) * self.hidden_activation.gradient(hidden_input) 72 | grad_w = torch.mm(X.T, grad_wrt_first_hidden) 73 | grad_w0 = torch.sum(grad_wrt_first_hidden, dim=0, keepdim=True) 74 | 75 | # Update weights (by gradient descent) 76 | # Move against the gradient to minimize loss 77 | self.V -= self.learning_rate * grad_v 78 | self.V0 -= self.learning_rate * grad_v0 79 | self.W -= self.learning_rate * grad_w 80 | self.W0 -= self.learning_rate * grad_w0 81 | 82 | # Use the trained model to predict labels of X 83 | 84 | def predict(self, X): 85 | # Forward pass: 86 | hidden_input = torch.mm(X,self.W) + self.W0 87 | hidden_output = self.hidden_activation(hidden_input) 88 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0 89 | y_pred = self.output_activation(output_layer_input) 90 | return y_pred 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /Day-05-Naive-Bayes/NaiveBayes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.datasets import load_iris 4 | from sklearn.metrics import accuracy_score 5 | from sklearn.model_selection import train_test_split 6 | 7 | class NaiveBayes: 8 | def __init__(self, X, y): 9 | """ 10 | why e - epsilon ? 11 | # If the ratio of data variance between dimensions is too small, it 12 | # will cause numerical errors. To address this, we artificially 13 | # boost the variance by epsilon, a small fraction of the standard 14 | # deviation of the largest dimension. 15 | 16 | :param X: input tensor 17 | :param y: target tensor 18 | :var total_samples: Number of Samples 19 | :var feature_count: Number of Features 20 | :var mu: mean 21 | :var sigma: variance 22 | :var e: epsilon 23 | :var n_classes: number of classes 24 | """ 25 | self.total_samples, self.feature_count = X.shape[0], X.shape[1] 26 | self.mu = {} 27 | self.sigma = {} 28 | self.prior_probability_X = {} 29 | self.e = 1e-4 30 | self.n_classes = len(y.unique()) 31 | 32 | def find_mu_and_sigma(self, X, y): 33 | """ 34 | Bayes Theorem: 35 | P(Y|X) = P(X|Y) * P(Y) / P(X) 36 | 37 | :type mu: dict 38 | :type sigma: dict 39 | :type prior_probability: dict 40 | :describe mu: keys are class label and values are feature's mean values. 41 | :describe sigma: keys are class label and values are feature's variance values. 42 | :describe prior probability of x: It calculates the prior prabability of X for each class. P(X). 43 | :return: 44 | """ 45 | for cls in range(self.n_classes): 46 | X_class = X[y==cls] 47 | self.mu[cls] = torch.mean(X_class, dim=0) 48 | self.sigma[cls] = torch.var(X_class, dim=0) 49 | self.prior_probability_X[cls] = X_class.shape[0] / X.shape[0] 50 | 51 | def gaussian_naive_bayes(self, X, mu, sigma): 52 | """ 53 | :return: Multivariate normal(gaussian) distribution - Maximum Likelihood Estimation 54 | https://www.statlect.com/fundamentals-of-statistics/multivariate-normal-distribution-maximum-likelihood 55 | 56 | Log Likelihood Function = Constant - probability 57 | """ 58 | constant = - self.feature_count / 2 * torch.log(2 * torch.tensor(np.pi)) - 0.5 * torch.sum(torch.log(sigma+self.e)) 59 | probability = 0.5 * torch.sum(torch.pow(X-mu, 2) / (sigma + self.e), dim=1) 60 | return constant - probability 61 | 62 | def predict_probability(self, X): 63 | """ 64 | Calculating probabilities for each sample input in X using prior probability 65 | and gaussian density function. 66 | 67 | torch.argmax: To find the class with max-probability. 68 | 69 | Note: We are calculate log probabilities as in Sklearn's predict_log_proba, that why we have + sign between 70 | prior probabilites and likelihood (class probability). 71 | 72 | :return: 73 | """ 74 | probabilities = torch.zeros((X.shape[0], self.n_classes)) 75 | for cls in range(self.n_classes): 76 | class_probability = self.gaussian_naive_bayes(X, self.mu[cls], self.sigma[cls]) 77 | probabilities[:, cls] = class_probability + torch.log(torch.scalar_tensor(self.prior_probability_X[cls])) 78 | 79 | 80 | return torch.argmax(probabilities, dim=1) 81 | 82 | if __name__ == '__main__': 83 | iris = load_iris() 84 | X = torch.tensor(iris.data) 85 | y = torch.tensor(iris.target) 86 | torch.manual_seed(0) 87 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 88 | GNB = NaiveBayes(x_train, y_train) 89 | GNB.find_mu_and_sigma(x_train, y_train) 90 | y_pred = GNB.predict_probability(x_test) 91 | print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}') 92 | -------------------------------------------------------------------------------- /Day-29-Optimizers/optimizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class StochasticGradientDescentWithMomentum: 4 | def __init__(self, learning_rate=0.001, momentum=0): 5 | self.lr = learning_rate 6 | self.momentum = momentum 7 | self.w_update = None 8 | 9 | def update(self, w, gradient_wrt_w): 10 | if self.w_update is None: 11 | self.w_update = torch.zeros(w.shape) 12 | 13 | self.w_update = self.momentum * self.w_update + (1 - self.momentum) * gradient_wrt_w 14 | return w - self.lr * self.w_update 15 | 16 | class NesterovAcceleratedGradient: 17 | def __init__(self, learning_rate=0.001, momentum=0.4): 18 | self.lr = learning_rate 19 | self.momentum = momentum 20 | self.w_update = torch.tensor([]) 21 | 22 | def update(self, w, gradient_function): 23 | approx_future_gradient = torch.clip(gradient_function(w - self.momentum * self.w_update), -1, 1) 24 | 25 | if not self.w_update.any(): 26 | self.w_update = torch.zeros(w.shape) 27 | 28 | self.w_update = self.momentum * self.w_update + self.lr * approx_future_gradient 29 | return w - self.w_update 30 | 31 | class Adagrad: 32 | def __init__(self, learning_rate=0.01): 33 | self.lr = learning_rate 34 | self.G = None 35 | self.eps = 1e-8 36 | 37 | def update(self, w, gradient_wrt_w): 38 | if self.G is None: 39 | self.G = torch.zeros(w.shape) 40 | 41 | self.G += torch.pow(gradient_wrt_w, 2) 42 | return w - self.lr * gradient_wrt_w / torch.sqrt(self.G + self.eps) 43 | 44 | class Adadelta: 45 | def __init__(self, rho=0.95, eps=1e-6): 46 | self.E_W_update = None 47 | self.E_gradient = None 48 | self.w_update = None 49 | self.eps = eps 50 | self.rho = rho 51 | 52 | def update(self, w, gradient_wrt_w): 53 | if self.w_update is None: 54 | self.w_update = torch.zeros(w.shape) 55 | self.E_gradient = torch.zeros(gradient_wrt_w.shape) 56 | self.E_W_update = torch.zeros(w.shape) 57 | 58 | self.E_gradient = self.rho * self.E_gradient + (1 - self.rho) * torch.pow(gradient_wrt_w, 2) 59 | RMS_Delta_W = torch.sqrt(self.E_W_update + self.eps) 60 | RMS_gradient = torch.sqrt(self.E_gradient + self.eps) 61 | 62 | adaptive_lr = RMS_Delta_W / RMS_gradient 63 | self.w_update = adaptive_lr * gradient_wrt_w 64 | self.E_W_update = self.rho * self.E_W_update + (1 - self.rho) * torch.pow(self.w_update, 2) 65 | return w - self.w_update 66 | 67 | class RMSprop: 68 | def __init__(self, learning_rate=0.01, rho=0.9): 69 | self.lr = learning_rate 70 | self.Eg = None 71 | self.eps = 1e-8 72 | self.rho = rho 73 | 74 | def update(self, w, gradient_wrt_w): 75 | if self.Eg is None: 76 | self.Eg = torch.zeros(gradient_wrt_w.shape) 77 | 78 | self.Eg = self.rho * self.Eg + (1 - self.rho) * torch.pow(gradient_wrt_w, 2) 79 | return w - self.lr * gradient_wrt_w / torch.sqrt(self.Eg + self.eps) 80 | 81 | class Adam: 82 | def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999): 83 | self.lr = learning_rate 84 | self.eps = 1e-8 85 | self.m = None 86 | self.v = None 87 | self.b1 = b1 88 | self.b2 = b2 89 | 90 | def update(self, w, gradient_wrt_w): 91 | if self.m is None: 92 | self.m = torch.zeros(gradient_wrt_w.shape) 93 | self.v = torch.zeros(gradient_wrt_w.shape) 94 | 95 | self.m = self.b1 * self.m + (1 - self.b1) * gradient_wrt_w 96 | self.v = self.b2 * self.v + (1 - self.b2) * torch.pow(gradient_wrt_w, 2) 97 | 98 | m_hat = self.m / (1 - self.b1) 99 | v_hat = self.v / (1 - self.b2) 100 | 101 | self.w_update = self.lr * m_hat / torch.sqrt(v_hat) + self.eps 102 | 103 | return w - self.w_update 104 | -------------------------------------------------------------------------------- /Day-07-SVM/svm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.preprocessing import MinMaxScaler 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.datasets import load_breast_cancer 5 | from sklearn.metrics import accuracy_score 6 | from sklearn.utils import shuffle 7 | 8 | class SVM: 9 | def __init__(self, X, y, C=1.0): 10 | self.total_samples, self.features_count = X.shape[0], X.shape[1] 11 | self.n_classes = len(y.unique()) 12 | self.learning_rate = 0.001 13 | self.C = C 14 | 15 | def loss(self, X, W, y): 16 | """ 17 | C parameter tells the SVM optimization how much you want to avoid misclassifying each training 18 | example. For large values of C, the optimization will choose a smaller-margin hyperplane if that 19 | hyperplane does a better job of getting all the training points classified correctly. Conversely, 20 | a very small value of C will cause the optimizer to look for a larger-margin separating hyperplane, 21 | even if that hyperplane misclassifies more points. For very tiny values of C, you should get 22 | misclassified examples, often even if your training data is linearly separable. 23 | 24 | :param X: 25 | :param W: 26 | :param y: 27 | :return: 28 | """ 29 | num_samples = X.shape[0] 30 | distances = 1 - y * (torch.mm(X, W.T)) 31 | 32 | distances[distances < 0] = 0 33 | hinge_loss = self.C * (torch.sum(distances) // num_samples) 34 | cost = 1 / 2 * torch.mm(W, W.T) + hinge_loss 35 | return cost 36 | 37 | def gradient_update(self, W, X, y): 38 | """ 39 | :param W: Weight Matrix 40 | :param X: Input Tensor 41 | :param y: Ground truth tensor 42 | :return: change in weight 43 | """ 44 | distance = 1 - (y * torch.mm(X, W.T)) 45 | dw = torch.zeros((1, X.shape[1]),dtype=torch.double) 46 | for idx, dist in enumerate(distance): 47 | if max(0, dist) == 0: 48 | di = W 49 | else: 50 | di = W - (self.C * y[idx] * X[idx]) 51 | 52 | dw += di 53 | 54 | dw = dw / len(y) 55 | return dw 56 | 57 | def fit(self, X, y, max_epochs): 58 | """ 59 | :param X: Input Tensor 60 | :param y: Output tensor 61 | :param max_epochs: Number of epochs the complete dataset is passed through the model 62 | :return: learned weight of the svm model 63 | """ 64 | weight = torch.randn((1, X.shape[1]), dtype=torch.double) * torch.sqrt(torch.scalar_tensor(1./X.shape[1])) 65 | cost_threshold = 0.0001 66 | previous_cost = float('inf') 67 | nth = 0 68 | for epoch in range(1, max_epochs+1): 69 | X, y = shuffle(X, y) 70 | for idx, x in enumerate(X): 71 | weight_update = self.gradient_update(weight, torch.tensor(x).unsqueeze(0), y[idx]) 72 | weight = weight - (self.learning_rate * weight_update) 73 | 74 | if epoch % 100 == 0: 75 | cost = self.loss(X, weight, y) 76 | print(f'Loss at epoch {epoch}: {cost}') 77 | if abs(previous_cost - cost) < cost_threshold * previous_cost: 78 | return weight 79 | previous_cost = cost 80 | nth += 1 81 | return weight 82 | 83 | if __name__ == '__main__': 84 | num_epochs = 1000 85 | breast_cancer = load_breast_cancer() 86 | X = breast_cancer.data 87 | X_normalized = MinMaxScaler().fit_transform(X) 88 | X = torch.tensor(X_normalized) 89 | y = torch.tensor(breast_cancer.target).unsqueeze(1) 90 | bias = torch.ones((X.shape[0], 1)) 91 | X = torch.cat((bias, X), dim=1) 92 | torch.manual_seed(0) 93 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 94 | svm = SVM(x_train, y_train) 95 | model_weights = svm.fit(x_train, y_train, max_epochs=num_epochs) 96 | y_pred = torch.sign(torch.mm(x_test, model_weights.T)) 97 | print(f'Accuracy: {accuracy_score(y_test, y_pred)}') 98 | -------------------------------------------------------------------------------- /Day-25-RANSAC/ransac.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import random 4 | from sklearn.datasets import make_regression 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | class LeastSquareModel: 9 | def fit(self, A, Y): 10 | A_T = A.T 11 | A_T_A = torch.mm(A_T, A) 12 | A_T_Y = torch.mm(A_T, Y) 13 | model = torch.mm(torch.pinverse(A_T_A),A_T_Y) 14 | 15 | return model 16 | 17 | class RansacModel: 18 | def __init__(self, curve_fitting_model): 19 | self.curve_fitting_model = curve_fitting_model 20 | 21 | def fit(self, A, Y, num_sample, threshold): 22 | num_iterations = math.inf 23 | iterations_done = 0 24 | num_samples = 3 25 | max_inlier_count = 0 26 | best_model = None 27 | probability_outlier = torch.scalar_tensor(0.5, dtype=torch.double) 28 | desired_prob = torch.scalar_tensor(0.95, dtype=torch.double) 29 | total_data = torch.column_stack((A, Y)) 30 | data_size = len(total_data) 31 | 32 | while num_iterations > iterations_done: 33 | 34 | random.shuffle(total_data) 35 | sample_data = total_data[:num_samples, :] 36 | estimated_model = self.curve_fitting_model.fit(sample_data[:, :-1], sample_data[:, -1:]) 37 | y_cap = torch.mm(A, estimated_model) 38 | error = torch.abs(Y - y_cap.T) 39 | inlier_count = torch.count_nonzero(error max_inlier_count: 42 | max_inlier_count = inlier_count 43 | best_model = estimated_model 44 | 45 | probability_outlier = 1 - inlier_count/data_size 46 | #print('# inliers:', inlier_count) 47 | #print('# prob_outlier:', probability_outlier) 48 | num_iterations = torch.log(1 - desired_prob) / torch.log(1 - (1 - probability_outlier) ** num_sample) 49 | iterations_done = iterations_done + 1 50 | 51 | #print('# s:', iterations_done) 52 | #print('# n:', num_iterations) 53 | #print('# max_inlier_count: ', max_inlier_count) 54 | 55 | return best_model 56 | 57 | def fit_curve(X, y): 58 | x_square = torch.pow(X, 2) 59 | 60 | A = torch.stack((x_square, X, torch.ones(X.shape[0]).unsqueeze(1)),dim=1) 61 | A = A.squeeze(2) 62 | threshold = torch.std(y) / 5 63 | ls_model = LeastSquareModel() 64 | ls_model_estimate = ls_model.fit(A, y) 65 | 66 | ls_model_y = torch.mm(A, ls_model_estimate) 67 | 68 | ransac_model = RansacModel(ls_model) 69 | ransac_model_estimate = ransac_model.fit(A, y, 3, threshold) 70 | ransac_model_y = torch.mm(A, ransac_model_estimate) 71 | 72 | return ls_model_y, ransac_model_y 73 | 74 | if __name__ == '__main__': 75 | X1, y1 = make_regression(n_features=1, n_targets=1) 76 | X2, y2 = make_regression(n_features=1, n_targets=1) 77 | 78 | # X1, y1 = data1['x '], data1['y'] 79 | # X2, y2 = data2['X'], data2['y'] 80 | X1, y1 = torch.tensor(X1, dtype=torch.double), torch.tensor(y1, dtype=torch.double).unsqueeze(1) 81 | X2, y2 = torch.tensor(X2, dtype=torch.double), torch.tensor(y2, dtype=torch.double).unsqueeze(1) 82 | ls_model_y1, ransac_model_y1 = fit_curve(X1, y1) 83 | ls_model_y2, ransac_model_y2 = fit_curve(X2, y2) 84 | 85 | fig, (ax1, ax2) = plt.subplots(1, 2) 86 | 87 | ax1.set_title('Dataset-1') 88 | ax1.scatter(X1, y1, marker='o', color=(0, 1, 0), label='data points') 89 | ax1.plot(X1, ls_model_y1, color='red', label='Least sqaure model') 90 | ax1.plot(X1, ransac_model_y1, color='blue', label='Ransac model') 91 | ax1.set(xlabel='x-axis', ylabel='y-axis') 92 | ax1.legend() 93 | 94 | ax2.set_title('Dataset-2') 95 | ax2.scatter(X2, y2, marker='o', color=(0, 1, 0), label='data points') 96 | ax2.plot(X2, ls_model_y2, color='red', label='Least sqaure model') 97 | ax2.plot(X2, ransac_model_y2, color='blue', label='Ransac model') 98 | ax2.set(xlabel='x-axis', ylabel='y-axis') 99 | ax2.legend() 100 | 101 | plt.show() 102 | -------------------------------------------------------------------------------- /Day-24-Regularization/regularization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.datasets import load_iris 3 | 4 | class Regularization: 5 | def __init__(self, X): 6 | self.X = X 7 | 8 | def dropout(self, drop_probability): 9 | """ 10 | Dropout is a regularization technique for neural networks that drops a unit (along with connections) at 11 | training time with a specified probability P (a common value is P = 0.5). At test time, all units are present, 12 | but with weights scaled by p(i.e. w becomes pw ). 13 | The idea is to prevent co-adaptation, where the neural network becomes too reliant on particular 14 | connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating 15 | an implicit ensemble of neural networks. 16 | :param drop_probability: float value between 0 to 1 17 | """ 18 | if drop_probability < 1.0: 19 | keep_probability = 1 - drop_probability 20 | masker = torch.FloatTensor(self.X.shape).uniform_(0, 1) 21 | masked = masker < keep_probability 22 | 23 | if keep_probability > 0.0: 24 | scale = 1 / keep_probability 25 | else: 26 | scale = 0.0 27 | 28 | return masked * self.X * scale 29 | 30 | def L2_Regularization(self, y, W, lambda_value): 31 | """ 32 | Weight Decay, or L2 Regularization, is a regularization technique applied to the weights of a neural network. 33 | We minimize a loss function compromising both the primary loss function and a penalty on the L2 Norm of the 34 | weights: 35 | L_new(w) = L_original(w) + lambda * W_T * W 36 | where is a value determining the strength of the penalty (encouraging smaller weights). 37 | Weight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining 38 | it through to objective function. Often weight decay refers to the implementation where we specify it directly 39 | in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the 40 | objective function). 41 | """ 42 | Regularization_term = (lambda_value * torch.mm(W, W.T)).type(torch.DoubleTensor) / (2 * y.shape[0]) 43 | output = torch.sum((y - torch.mm(X, W.T))**2, dim=0) + Regularization_term 44 | return output 45 | 46 | def L1_Regularization(self, y, W, lambda_value): 47 | """ 48 | L1 Regularization is a regularization technique applied to the weights of a neural network. We minimize a loss 49 | function compromising both the primary loss function and a penalty on the L1 Norm of the weights: 50 | L_new(w) = L_original(w) + lambda * ||W|| 51 | where is a value determining the strength of the penalty. In contrast to weight decay, regularization promotes 52 | sparsity; i.e. some parameters have an optimal value of zero. 53 | """ 54 | Regularization_term = torch.sum((lambda_value * torch.abs(W)).type(torch.DoubleTensor) / (2 * y.shape[0]),dim=1) 55 | output = torch.sum((y - torch.mm(X, W.T))**2, dim=0) + Regularization_term 56 | return output 57 | 58 | 59 | if __name__ == '__main__': 60 | """ 61 | Dropout: 62 | 63 | A = torch.arange(20).reshape((5, 4)) 64 | print(A) 65 | Regularizer = Regularization(X=A) 66 | print(Regularizer.dropout(drop_probability=0.5)) 67 | 68 | L2 Regularization or Weight Decay: 69 | 70 | data = load_iris() 71 | X = torch.tensor(data.data, dtype=torch.double) 72 | y = torch.tensor(data.target).unsqueeze(1) 73 | W = torch.FloatTensor(X.shape[1]).uniform_(0, 1).unsqueeze(0).type(torch.DoubleTensor) 74 | Regularizer = Regularization(X) 75 | Regularizer.L2_Regularization(y=y, W=W, lambda_value=0.7) 76 | 77 | L1 Regularization: 78 | 79 | data = load_iris() 80 | X = torch.tensor(data.data, dtype=torch.double) 81 | y = torch.tensor(data.target).unsqueeze(1) 82 | W = torch.FloatTensor(X.shape[1]).uniform_(0, 1).unsqueeze(0).type(torch.DoubleTensor) 83 | Regularizer = Regularization(X) 84 | print(Regularizer.L1_Regularization(y=y, W=W, lambda_value=0.7)) 85 | """ 86 | -------------------------------------------------------------------------------- /Day-08-tf-idf/tfidf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from nltk.tokenize import word_tokenize 3 | 4 | class TF_IDF: 5 | """ 6 | TF - Term Frequency: count of specific word in document / total no. of words in document 7 | IDF - Inverse Document Frequency: Log ratio of (Total no. of documents / no. of documents containing words) 8 | """ 9 | def text_processing(self, X): 10 | """ 11 | text processing: We clean our text by removing special character and keeping them as lower case and each 12 | line sentence is converted into list of words and then finding the total number of unqiue words in the all the 13 | documents combined together. 14 | :param X: List of documents 15 | :return: Unique words (Vocabulary), all documents [[d_1], [d_2], ..[d_n]] 16 | """ 17 | documents = [] 18 | vocabulary = [] 19 | for document in X: 20 | document_words = [word.lower() for word in word_tokenize(document) if word.isalpha()] 21 | documents.append(document_words) 22 | for word in document_words: 23 | if word not in vocabulary: 24 | vocabulary.append(word) 25 | 26 | vocabulary = set(vocabulary) 27 | return vocabulary, documents 28 | 29 | def strtoint(self, vocabulary): 30 | """ 31 | :param vocabulary: all unique in the documents 32 | :return: mapping words to integer such as {'the': 1} 33 | """ 34 | wordToInt = {} 35 | for i, vocab in enumerate(vocabulary): 36 | wordToInt[vocab] = i 37 | 38 | return wordToInt 39 | 40 | def vocab_frequency(self, vocabulary, documents): 41 | """ 42 | :param vocabulary: all unique in the documents 43 | :param documents: all the documents 44 | :return: Frequency of word in all the documents combined together 45 | """ 46 | word_frequency = {} 47 | for word in vocabulary: 48 | word_frequency[word] = 0 49 | for document in documents: 50 | if word in document: 51 | word_frequency[word] += 1 52 | 53 | return word_frequency 54 | 55 | def tf(self, input_document, word): 56 | """ 57 | Calculating term_frequency 58 | :param input_document: test document 59 | :param word: each word in the test document 60 | :return: tf value (refer the formula above) 61 | """ 62 | num_words = len(input_document) 63 | word_frequency = len([token for token in input_document if token==word]) 64 | return word_frequency/num_words 65 | 66 | def idf(self, word, word_frequency, documents): 67 | """ 68 | :param word: words of the test input document 69 | :param word_frequency: word frequency w.r.t all the documents available. 70 | :param documents: all the documents 71 | :return: idf value 72 | """ 73 | try: 74 | word_frequency = word_frequency[word] + 1 75 | except: 76 | word_frequency = 1 77 | 78 | return torch.log(torch.scalar_tensor(len(documents))/word_frequency) 79 | 80 | def fit_tranform(self, document, vocabulary, wordToInt, word_frequency, documents): 81 | """ 82 | :param document: test input document 83 | :param vocabulary: all unique words 84 | :param wordToInt: word to int mapping 85 | :param word_frequency: each word frequency throughout all the documents 86 | :param documents: all the documents 87 | :return: tf_idf vector for test input document 88 | """ 89 | tfidf_vector = torch.zeros((len(vocabulary), ), dtype=torch.double) 90 | for word in document: 91 | tf = self.tf(document, word) 92 | idf = self.idf(word, word_frequency, documents) 93 | tfidf_values = tf * idf 94 | tfidf_vector[wordToInt[word]] = tfidf_values 95 | 96 | return tfidf_vector 97 | 98 | if __name__ == '__main__': 99 | vectors = [] 100 | documents = ['Hi, how are you?', 101 | 'What are you doing?', 102 | 'what is your name?', 103 | 'who are you?'] 104 | 105 | tfidf_vectorizer = TF_IDF() 106 | vocabulary, processed_documents = tfidf_vectorizer.text_processing(documents) 107 | wordToInt = tfidf_vectorizer.strtoint(vocabulary) 108 | vocab_frequecy = tfidf_vectorizer.vocab_frequency(vocabulary, processed_documents) 109 | _, new_document = tfidf_vectorizer.text_processing([documents[0]]) 110 | print(tfidf_vectorizer.fit_tranform(new_document[0],vocabulary, wordToInt, vocab_frequecy, documents)) 111 | -------------------------------------------------------------------------------- /Day-14-DBSCAN/dbscan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Checkout Density Based Spectral Clustering Blag: 3 | https://blog.dominodatalab.com/topology-and-density-based-clustering/ 4 | 5 | - Compared to centroid-based clustering like k-means, density-based clustering works by 6 | identifying “dense” clusters of points, allowing it to learn clusters of arbitrary shape 7 | and identify outliers in the data. 8 | """ 9 | import torch 10 | from sklearn.datasets import load_iris 11 | from sklearn.model_selection import train_test_split 12 | from sklearn.metrics import accuracy_score 13 | from sklearn import datasets 14 | 15 | class DBScan: 16 | def __init__(self, eps = 2.5, min_points=30): 17 | """ 18 | eps - radius distance around which a cluster is considered. 19 | min_points - Number of points to be present inside the radius 20 | (check out density reachable or border points from blog to understand how cluster points are considered) 21 | """ 22 | self.eps = eps 23 | self.minimum_points = min_points 24 | 25 | def euclidean_distance(self, x1, x2): 26 | """ 27 | :param x1: input tensor 28 | :param x2: input tensor 29 | :return: distance between tensors 30 | """ 31 | return torch.cdist(x1, x2) 32 | 33 | def direct_neighbours(self, sample): 34 | """ 35 | :param sample: Sample whose neighbors needs to be identified 36 | :return: all the neighbors within eps distance 37 | """ 38 | neighbors = [] 39 | idxs = torch.arange(self.X.shape[0]) 40 | for i, _sample in enumerate(self.X[idxs != sample]): 41 | 42 | distance = self.euclidean_distance(self.X[sample].unsqueeze(0), _sample.unsqueeze(0)) 43 | if distance < self.eps: 44 | neighbors.append(i) 45 | 46 | return torch.tensor(neighbors) 47 | 48 | def density_neighbors(self, sample, neighbors): 49 | """ 50 | Recursive method which expands the cluster until we have reached the border 51 | of the dense area (density determined by eps and min_samples) 52 | 53 | :param sample: Sample whose border points to be identified 54 | :param neighbors: samples and its neighbors within eps distance 55 | :return: It updates the number of points assigned to each cluster, by finding 56 | border points and its relative points. In a sense, it expands cluster. 57 | """ 58 | cluster = [sample] 59 | for neighbor_i in neighbors: 60 | if not neighbor_i in self.visited_samples: 61 | self.visited_samples.append(neighbor_i) 62 | self.neighbors[neighbor_i] = self.direct_neighbours(neighbor_i) 63 | 64 | if len(self.neighbors[neighbor_i]) >= self.minimum_points: 65 | expanded_cluster = self.density_neighbors( 66 | neighbor_i, self.neighbors[neighbor_i]) 67 | cluster = cluster + expanded_cluster 68 | else: 69 | cluster.append(neighbor_i) 70 | 71 | return cluster 72 | 73 | def get_cluster_label(self): 74 | """ 75 | :return: assign cluster label based on expanded clusters 76 | """ 77 | labels = torch.zeros(self.X.shape[0]).fill_(len(self.clusters)) 78 | for cluster_i, cluster in enumerate(self.clusters): 79 | for sample_i in cluster: 80 | labels[sample_i] = cluster_i 81 | 82 | return labels 83 | 84 | def predict(self, X): 85 | """ 86 | :param X: input tensor 87 | :return: predicting the labels os samples depending on its distance from clusters 88 | """ 89 | self.X = X 90 | self.clusters = [] 91 | self.visited_samples = [] 92 | self.neighbors = {} 93 | n_samples = X.shape[0] 94 | 95 | for sample_i in range(n_samples): 96 | if sample_i in self.visited_samples: 97 | continue 98 | self.neighbors[sample_i] = self.direct_neighbours(sample_i) 99 | if len(self.neighbors[sample_i]) >= self.minimum_points: 100 | self.visited_samples.append(sample_i) 101 | new_cluster = self.density_neighbors( 102 | sample_i, self.neighbors[sample_i]) 103 | self.clusters.append(new_cluster) 104 | 105 | cluster_labels = self.get_cluster_label() 106 | return cluster_labels 107 | 108 | if __name__ == '__main__': 109 | iris = load_iris() 110 | torch.manual_seed(0) 111 | X = torch.tensor(iris.data, dtype=torch.float) 112 | y = torch.tensor(iris.target) 113 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 114 | dbscan = DBScan(eps=0.25, min_points=20) 115 | ypred = dbscan.predict(x_train) 116 | print(f'Accuracy Score: {accuracy_score(y_train, ypred)}') 117 | -------------------------------------------------------------------------------- /Day-27-MLP/mlp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multi-Layer Preceptron 3 | """ 4 | import torch 5 | from sklearn.datasets import load_digits 6 | from sklearn.model_selection import train_test_split 7 | 8 | class Sigmoid: 9 | def __call__(self, X): 10 | return 1 / (1 + torch.exp(-X)) 11 | 12 | def gradient(self, X): 13 | return self.__call__(X) * (1 - self.__call__(X)) 14 | 15 | class Softmax: 16 | def __call__(self, X): 17 | e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True).values) 18 | return e_x / torch.sum(e_x, dim=1, keepdim=True) 19 | 20 | def gradient(self, X): 21 | p = self.__call__(X) 22 | return p * (1 - p) 23 | 24 | def accuracy_score(y, p): 25 | accuracy = torch.sum(y == p, dim=0) / len(y) 26 | return accuracy 27 | 28 | def to_categorical(X, n_col=None): 29 | if not n_col: 30 | n_col = torch.amax(X) + 1 31 | 32 | one_hot = torch.zeros((X.shape[0], n_col)) 33 | one_hot[torch.arange(X.shape[0]), X] = 1 34 | return one_hot 35 | 36 | def normalization(X): 37 | """ 38 | :param X: Input tensor 39 | :return: Normalized input using l2 norm. 40 | """ 41 | l2 = torch.norm(X, p=2, dim=-1) 42 | l2[l2 == 0] = 1 43 | return X / l2.unsqueeze(1) 44 | 45 | class CrossEntropy: 46 | def __init__(self): 47 | pass 48 | def loss(self, y, p): 49 | p = torch.clip(p, 1e-15, 1-1e-15) 50 | return - y * torch.log(p) - (1 -y) * torch.log(1 - p) 51 | 52 | def accuracy_score(self, y, p): 53 | return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1)) 54 | 55 | def gradient(self, y, p): 56 | p = torch.clip(p, 1e-15, 1 - 1e-15) 57 | return - (y / p) + (1 - y) / (1 -p) 58 | 59 | class MultiLayerPerceptron: 60 | def __init__(self, n_hidden, n_iterations=1000, learning_rate=0.001): 61 | self.n_hidden = n_hidden 62 | self.n_iterations = n_iterations 63 | self.learning_rate = learning_rate 64 | self.hidden_activation = Sigmoid() 65 | self.output_activation = Softmax() 66 | self.loss = CrossEntropy() 67 | 68 | def initalize_weight(self, X, y): 69 | n_samples, n_features = X.shape 70 | _, n_outputs = y.shape 71 | limit = 1 / torch.sqrt(torch.scalar_tensor(n_features)) 72 | self.W = torch.DoubleTensor(n_features, self.n_hidden).uniform_(-limit, limit) 73 | 74 | self.W0 = torch.zeros((1, self.n_hidden)) 75 | limit = 1 / torch.sqrt(torch.scalar_tensor(self.n_hidden)) 76 | self.V = torch.DoubleTensor(self.n_hidden, n_outputs).uniform_(-limit, limit) 77 | self.V0 = torch.zeros((1, n_outputs)) 78 | 79 | def fit(self, X, y): 80 | self.initalize_weight(X, y) 81 | for i in range(self.n_iterations): 82 | hidden_input = torch.mm(X, self.W) + self.W0 83 | hidden_output = self.hidden_activation(hidden_input) 84 | 85 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0 86 | y_pred = self.output_activation(output_layer_input) 87 | 88 | grad_wrt_first_output = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) 89 | grad_v = torch.mm(hidden_output.T, grad_wrt_first_output) 90 | grad_v0 = torch.sum(grad_wrt_first_output, dim=0, keepdim=True) 91 | 92 | grad_wrt_first_hidden = torch.mm(grad_wrt_first_output, self.V.T) * self.hidden_activation.gradient(hidden_input) 93 | grad_w = torch.mm(X.T, grad_wrt_first_hidden) 94 | grad_w0 = torch.sum(grad_wrt_first_hidden, dim=0, keepdim=True) 95 | 96 | # Update weights (by gradient descent) 97 | # Move against the gradient to minimize loss 98 | self.V -= self.learning_rate * grad_v 99 | self.V0 -= self.learning_rate * grad_v0 100 | self.W -= self.learning_rate * grad_w 101 | self.W0 -= self.learning_rate * grad_w0 102 | 103 | # Use the trained model to predict labels of X 104 | 105 | def predict(self, X): 106 | # Forward pass: 107 | hidden_input = torch.mm(X,self.W) + self.W0 108 | hidden_output = self.hidden_activation(hidden_input) 109 | output_layer_input = torch.mm(hidden_output, self.V) + self.V0 110 | y_pred = self.output_activation(output_layer_input) 111 | return y_pred 112 | 113 | 114 | if __name__ == '__main__': 115 | data = load_digits() 116 | X = normalization(torch.tensor(data.data, dtype=torch.double)) 117 | y = torch.tensor(data.target) 118 | 119 | # Convert the nominal y values to binary 120 | y = to_categorical(y) 121 | 122 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) 123 | # MLP 124 | clf = MultiLayerPerceptron(n_hidden=16, 125 | n_iterations=1000, 126 | learning_rate=0.01) 127 | 128 | clf.fit(X_train, y_train) 129 | y_pred = torch.argmax(clf.predict(X_test), dim=1) 130 | y_test = torch.argmax(y_test, dim=1) 131 | 132 | accuracy = accuracy_score(y_test, y_pred) 133 | print("Accuracy:", accuracy) 134 | 135 | 136 | -------------------------------------------------------------------------------- /Day-13-Adaboost/adaboost.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adaboost Algorithm Blog post: 3 | https://www.mygreatlearning.com/blog/adaboost-algorithm/ 4 | """ 5 | import torch 6 | from sklearn.datasets import load_breast_cancer 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import accuracy_score 9 | 10 | class stump: 11 | "Each Stump is a weak classifier and combination of them are referred as Boosting Mechanism" 12 | def __init__(self): 13 | """ 14 | * Polarity is used to classify sample as either 1 or -1 15 | * feature index is for identifying node for separating classes 16 | * features are compared against threshold value 17 | * Alpha value indicates the classifier accuracy 18 | """ 19 | self.polarity = 1 20 | self.feature_index = None 21 | self.threshold = None 22 | self.alpha = None 23 | 24 | class Adaboost: 25 | def __init__(self, num_classifiers): 26 | """ 27 | :param num_classifiers: Number of weak classifiers 28 | """ 29 | self.num_classifiers = num_classifiers 30 | 31 | def fit(self, X, y): 32 | """ 33 | :param X: Input tensor 34 | :param y: output tensor 35 | :return: Creates a list of weak classifier with set of properties as 36 | mentioned in stump class. 37 | * Initialize weights to 1/N, N is number of samples 38 | * Iterate through different weak classifiers 39 | * Minimum error given for using a certain feature value threshold for predicting sample label 40 | * Iterate through each feature and its unique values to find the threshold value 41 | * Label samples with value less than threshold as -1 42 | * Error, Sum of weights of misclassified samples 43 | * If the error is over 50% we flip the polarity so that samples that were classified as 0 are 44 | classified as 1, and vice versa. E.g error = 0.8 => (1 - error) = 0.2 45 | * If this threshold resulted in the smallest error we save the configuration 46 | * Calculate the alpha which is used to update the sample weights, 47 | Alpha is also an approximation of this classifier's proficiency 48 | * set all predictions to '1' initially 49 | * The indexes where the sample values are below threshold, label them as -1 50 | * Updated weights and normalize to one 51 | * save each weak classifier 52 | """ 53 | n_samples, n_features = X.shape[0], X.shape[1] 54 | weight = torch.zeros(n_samples).fill_(1/n_samples) 55 | self.clfs = [] 56 | for _ in range(self.num_classifiers): 57 | clf = stump() 58 | minimum_error = float('inf') 59 | for feature_i in range(n_features): 60 | feature_values = X[:, feature_i].unsqueeze(1) 61 | unqiue_values = feature_values.unique() 62 | for threshold in unqiue_values: 63 | p = 1 64 | prediction = torch.ones(y.shape) 65 | prediction[X[:, feature_i] < threshold] = -1 66 | error = torch.sum(weight[y != prediction]) 67 | if error > 0.5: 68 | error = 1 - error 69 | p = -1 70 | 71 | if error < minimum_error: 72 | clf.polarity = p 73 | clf.threshold = threshold 74 | clf.feature_index = feature_i 75 | minimum_error = error 76 | 77 | clf.alpha = 0.5 * torch.log(1.0 - minimum_error) / (minimum_error + 1e-10) 78 | predictions = torch.ones(y.shape) 79 | negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold) 80 | predictions[negative_idx] = -1 81 | 82 | weight *= torch.exp(-clf.alpha * y * predictions) 83 | weight /= torch.sum(weight) 84 | 85 | self.clfs.append(clf) 86 | 87 | def predict(self, X): 88 | """ 89 | same process as mentioned above. 90 | :param X: 91 | :return: predicted estimate of ground truth. 92 | """ 93 | n_samples = X.shape[0] 94 | y_pred = torch.zeros((n_samples, 1)) 95 | for clf in self.clfs: 96 | predictions = torch.ones(y_pred.shape) 97 | negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold) 98 | predictions[negative_idx] = -1 99 | y_pred += clf.alpha * predictions 100 | 101 | print(y_pred) 102 | y_pred = torch.sign(y_pred).flatten() 103 | print(y_pred) 104 | return y_pred 105 | 106 | if __name__ == '__main__': 107 | breast_cancer = load_breast_cancer() 108 | torch.manual_seed(0) 109 | X = torch.tensor(breast_cancer.data, dtype=torch.float) 110 | y = torch.tensor(breast_cancer.target) 111 | n_classes = len(torch.unique(y)) 112 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 113 | clf = Adaboost(num_classifiers=20) 114 | clf.fit(x_train, y_train) 115 | y_pred = clf.predict(x_test) 116 | 117 | accuracy = accuracy_score(y_test, y_pred) 118 | print ("Accuracy:", accuracy) 119 | -------------------------------------------------------------------------------- /Day-20-SpectralClustering/spectralClustering.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reference: https://en.wikipedia.org/wiki/Spectral_clustering 3 | Blog Post: https://towardsdatascience.com/spectral-clustering-aba2640c0d5b 4 | """ 5 | import torch 6 | from sklearn.datasets import make_moons 7 | import matplotlib.pyplot as plt 8 | from scipy.spatial.distance import pdist, squareform 9 | from sklearn.cluster import KMeans 10 | 11 | def SpectralClustering(X, K=8, adj=True, metric='euclidean', sim_graph='fully_connect', sigma=1, knn=10, epsilon=0.5, normalized=1): 12 | """ 13 | :param X: Input tensor 14 | :param K: cluster to look out for using KMeans 15 | :param adj: Adjacency Matrix 16 | :param metric: 17 | :param sim_graph: Technique to create edges between nodes in graph. 18 | :param sigma: Parameter for RBF kernel 19 | :param knn: To connect with 10 nearest neighors with edges 20 | :param epsilon:Parameter for finding edges 21 | :param normalized: 22 | :return: 23 | """ 24 | 25 | # To convert our adjacency matrix as connected graph we can use technique like KNN. 26 | 27 | if not adj: 28 | adjacency_matrix = squareform(X, metric=metric) 29 | else: 30 | adjacency_matrix = X 31 | 32 | if sim_graph == 'fully_connect': 33 | adjacency_matrix = torch.from_numpy(adjacency_matrix) 34 | w = torch.exp(-adjacency_matrix/ (2 * sigma)) 35 | 36 | elif sim_graph =='eps_neighbor': 37 | adjacency_matrix = torch.from_numpy(adjacency_matrix) 38 | w = (adjacency_matrix <= epsilon).type(torch.DoubleTensor) 39 | elif sim_graph == 'knn': 40 | adjacency_matrix = torch.from_numpy(adjacency_matrix) 41 | w = torch.zeros(adjacency_matrix.shape) 42 | adjacency_sort = torch.argsort(adjacency_matrix, dim=1) 43 | for i in range(adjacency_sort.shape[0]): 44 | w[i, adjacency_sort[i, :][:(knn+1)]] = 1 45 | elif sim_graph == 'mutual_knn': 46 | adjacency_matrix = torch.from_numpy(adjacency_matrix) 47 | w1 = torch.zeros(adjacency_matrix.shape) 48 | adjacency_sort = torch.argsort(adjacency_matrix, dim=1) 49 | for i in range(adjacency_matrix.shape[0]): 50 | for j in adjacency_sort[i, :][:(knn+1)]: 51 | if i==j: 52 | w1[i, i] = 1 53 | elif w1[i, j] == 0 and w1[j, i]==0: 54 | w1[i, j] = 0.5 55 | else: 56 | w1[i, j] = w1[j, i] = 1 57 | w = w1[w1>0.5].type(torch.DoubleTensor).clone 58 | else: 59 | raise ValueError("The 'sim_graph' argument should be one of the strings, 'fully_connect', 'eps_neighbor', 'knn', or 'mutual_knn'!") 60 | 61 | #Degree Matrix 62 | D = torch.diag(torch.sum(w, dim=1)) 63 | 64 | #Graph Laplacian 65 | L = D - w 66 | 67 | # Finding eigen Value of Graph Laplacian Matrix, 68 | """ 69 | The eigenvalues of the Laplacian indicated that there were four clusters. 70 | The vectors associated with those eigenvalues contain information on how to segment the nodes. 71 | """ 72 | if normalized == 1: 73 | D_INV = torch.diag(1/torch.diag(D)) 74 | lambdas, V = torch.eig(torch.mm(D_INV, L), eigenvectors=True) 75 | ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0)) 76 | V_K = V[:, ind[:K]] 77 | 78 | elif normalized == 2: 79 | D_INV_SQRT = torch.diag(1/torch.sqrt(torch.diag(D))) 80 | lambdas, V = torch.eig(torch.matmul(torch.matmul(D_INV_SQRT, L), D_INV_SQRT)) 81 | ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0)) 82 | V_K = torch.real(V[:, ind[:,K]]) 83 | if any(V_K.sum(dim=1) == 0): 84 | raise ValueError("Can't normalize the matrix with the first K eigenvectors as columns! Perhaps the \ 85 | number of clusters K or the number of neighbors in k-NN is too small.") 86 | V_K = V_K/torch.reshape(torch.norm(V_K, dim=1), (V_K.shape[0], 1)) 87 | else: 88 | lambdas, V = torch.eig(L) 89 | ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0)) 90 | V_K = torch.real(V[:, ind[:K]]) 91 | 92 | # KMeans is used for assigning the labels to the clusters. 93 | kmeans = KMeans(n_clusters=K, init='k-means++', random_state=0).fit(V_K) 94 | return kmeans 95 | 96 | if __name__ == '__main__': 97 | moon_data, moon_labels = make_moons(100, noise=0.05) 98 | moon_data = torch.tensor(moon_data) 99 | moon_labels = torch.tensor(moon_labels) 100 | # Compute the adjacency matrix, Similarity Matrix. 101 | Adj_mat = squareform(pdist(moon_data, metric='euclidean', p=2)) 102 | # Spectral clustering... 103 | spec_re1 = SpectralClustering(Adj_mat, K=2, sim_graph='fully_connect', sigma=0.01, normalized=1) 104 | spec_re2 = SpectralClustering(Adj_mat, K=2, sim_graph='knn', knn=10, normalized=1) 105 | 106 | # Often need to change figsize when doing subplots 107 | plt.figure(figsize=(8, 4)) 108 | plt.subplot(1, 2, 1) 109 | plt.scatter(x=moon_data[:, 0], y=moon_data[:, 1], c=spec_re1.labels_, s=2) 110 | plt.colorbar() 111 | plt.title('Fully connected graph with RBF kernel ($\sigma=0.01$)') 112 | 113 | plt.subplot(1, 2, 2) 114 | plt.scatter(x=moon_data[:, 0], y=moon_data[:, 1], c=spec_re2.labels_, s=2) 115 | plt.colorbar() 116 | plt.title('$k$-Nearest Neighbor graphs ($k=10$)') 117 | 118 | plt.suptitle('Spectral Clustering', y=-0.01) 119 | 120 | # Automatrically adjust padding between subpots 121 | plt.tight_layout() 122 | plt.show() 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /Day-17-K-Medoids/PAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from utility import euclidean_distance 3 | from sklearn.datasets import load_iris 4 | from sklearn.metrics import accuracy_score 5 | from sklearn.model_selection import train_test_split 6 | """ 7 | K-Medoids also known as Partitioned Around Medoids. 8 | """ 9 | class PAM: 10 | def __init__(self, k=2): 11 | """ 12 | :param k: Number of clusters to be formed using Medoids 13 | """ 14 | self.k = k 15 | 16 | def random_medoids(self, X): 17 | """ 18 | Similar to KMeans, selecting a random samples from dataset as medoids 19 | :param X: Input tensor 20 | :return: For iris dataset, three medoids are selected. 21 | """ 22 | n_samples, n_features = X.shape[0], X.shape[1] 23 | medoids = torch.zeros((self.k, n_features)) 24 | for i in range(self.k): 25 | idx = torch.randperm(len(X))[1] 26 | medoid = X[idx] 27 | medoids[i] = medoid 28 | 29 | return medoids 30 | 31 | def closest_medoid(self, sample, medoids): 32 | """ 33 | Calculate distance between each sample and every medoids 34 | :param sample: Data point 35 | :param medoids: Similar to centroid in KMeans. 36 | :return: Assigining medoid to each sample 37 | """ 38 | closest_i = None 39 | closest_distance = float('inf') 40 | for i, medoid in enumerate(medoids): 41 | distance = euclidean_distance(sample, medoid) 42 | if distance < closest_distance: 43 | closest_i = i 44 | closest_distance = distance 45 | return closest_i 46 | 47 | def create_clusters(self, X, medoids): 48 | """ 49 | Creating clusters after assigning samples to each medoid 50 | :return: 51 | """ 52 | clusters = [[] for _ in range(self.k)] 53 | for sample_i, sample in enumerate(X): 54 | medoid_i = self.closest_medoid(sample, medoids) 55 | clusters[medoid_i].append(sample_i) 56 | 57 | return clusters 58 | 59 | def calculate_cost(self, X, clusters, medoids): 60 | """ 61 | Total distance between samples and their medoid 62 | :param clusters: Three medoids with samples assigned to each of them 63 | :return: Total distance as mentioned above 64 | """ 65 | cost = 0 66 | for i, cluster in enumerate(clusters): 67 | medoid = medoids[i] 68 | for sample_i in cluster: 69 | cost += euclidean_distance(X[sample_i], medoid) 70 | 71 | return cost 72 | 73 | def get_non_medoids(self, X, medoids): 74 | """ 75 | Mediods are points in cluster acts reference for all other points(non-medoids) 76 | to find distance between them. 77 | :return: all the data point which are not medoids. 78 | """ 79 | non_medoids = [] 80 | for sample in X: 81 | if not sample in medoids: 82 | non_medoids.append(sample) 83 | 84 | return non_medoids 85 | 86 | def get_cluster_label(self, clusters, X): 87 | """ 88 | Assigning each sample as index to a medoid. 89 | """ 90 | y_pred = torch.zeros(X.shape[0]) 91 | for cluster_i in range(len(clusters)): 92 | cluster = clusters[cluster_i] 93 | for sample_i in cluster: 94 | y_pred[sample_i] = cluster_i 95 | 96 | return y_pred 97 | 98 | def predict(self, X): 99 | """ 100 | Do Partitioning Around Medoids and return the cluster labels 101 | * First, randomly selection medoids 102 | * Create cluster based on medoids selected and samples 103 | * Cost(distance) of the existing cluster and the samples in it. 104 | * Iterate, until we find the least cost with best medoids. 105 | * Find all non-medoids 106 | :return: Predicting medoid for test sample or a data point. 107 | """ 108 | 109 | medoids = self.random_medoids(X) 110 | clusters = self.create_clusters(X, medoids) 111 | cost = self.calculate_cost(X, clusters, medoids) 112 | 113 | while True: 114 | best_medoids = medoids 115 | lowest_cost = cost 116 | for medoid in medoids: 117 | non_medoids = self.get_non_medoids(X, medoids) 118 | # Calculate the cost when swapping medoid and samples 119 | for sample in non_medoids: 120 | # Swap sample with the medoid 121 | new_medoids = medoids.clone() 122 | new_medoids[medoids == medoid][:4] = sample 123 | # Assign samples to new medoids 124 | new_clusters = self.create_clusters(X, new_medoids) 125 | # Calculate the cost with the new set of medoids 126 | new_cost = self.calculate_cost(X, new_clusters, new_medoids) 127 | # If the swap gives us a lower cost we save the medoids and cost 128 | if new_cost < lowest_cost: 129 | lowest_cost = new_cost 130 | best_medoids = new_medoids 131 | # If there was a swap that resultet in a lower cost we save the 132 | # resulting medoids from the best swap and the new cost 133 | if lowest_cost < cost: 134 | cost = lowest_cost 135 | medoids = best_medoids 136 | else: 137 | break 138 | 139 | final_clusters = self.create_clusters(X, medoids) 140 | # Return the samples cluster indices as labels 141 | return self.get_cluster_label(final_clusters, X) 142 | 143 | 144 | if __name__ == '__main__': 145 | data = load_iris() 146 | X = data.data 147 | y = data.target 148 | # Cluster the data using K-Medoids 149 | X = torch.tensor(X, dtype=torch.float) 150 | y = torch.tensor(y) 151 | clf = PAM(k=3) 152 | y_pred = clf.predict(X) 153 | print(accuracy_score(y_pred, y)) 154 | 155 | 156 | -------------------------------------------------------------------------------- /Day-04-KMeans-Clustering/KMeans.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import scipy 3 | import numpy as np 4 | from sklearn.datasets import load_iris 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import accuracy_score 7 | 8 | 9 | class KMeans: 10 | def __init__(self, X, k, iterations): 11 | """ 12 | :param X: input tensor 13 | :param k: Number of clusters 14 | :variable samples: Number of samples 15 | :variable features: Number of features 16 | """ 17 | self.k = k 18 | self.max_iterations = iterations 19 | self.samples = X.shape[0] 20 | self.features = X.shape[1] 21 | self.KMeans_Centroids = [] 22 | 23 | # def initialize_centroid(self, X): 24 | # return X[torch.randint(X.shape[0], (self.k,))] 25 | 26 | def initialize_centroid(self, X, K): 27 | """ 28 | Initialization Technique is KMeans++. Thanks to stackoverflow. 29 | https://stackoverflow.com/questions/5466323/how-could-one-implement-the-k-means-algorithm 30 | :param X: Input Tensor 31 | :param K: Number of clusters to build 32 | :return: Selection of three centroid vector from X 33 | """ 34 | I = [0] 35 | C = [X[0]] 36 | for k in range(1, K): 37 | D2 = np.array([min([np.inner(c - x, c - x) for c in C]) for x in X]) 38 | probs = D2 / D2.sum() 39 | cumprobs = probs.cumsum() 40 | r = torch.rand(1).item() 41 | 42 | for j, p in enumerate(cumprobs): 43 | if r < p: 44 | i = j 45 | break 46 | I.append(i) 47 | return X[I] 48 | 49 | def distance(self, sample, centroid, dim=None, default="euclidean"): 50 | if default == "euclidean": 51 | return torch.norm(sample - centroid, 2, 0) 52 | elif default == "manhattan": 53 | return torch.sum(torch.abs(sample - centroid)) 54 | elif default == "cosine": 55 | return torch.sum(sample * centroid) / (torch.norm(sample) * torch.norm(centroid)) 56 | else: 57 | raise ValueError("Unknown similarity distance type") 58 | 59 | def closest_centroid(self, sample, centroids): 60 | """ 61 | :param sample: sample whose distance from centroid is to be measured 62 | :param centroids: all the centroids of all the clusters 63 | :return: centroid's index is passed for each sample 64 | """ 65 | closest = None 66 | min_distance = float('inf') 67 | for idx, centroid in enumerate(centroids): 68 | distance = self.distance(sample, centroid) 69 | if distance < min_distance: 70 | closest = idx 71 | min_distance = distance 72 | 73 | return closest 74 | 75 | def create_clusters(self, centroids, X): 76 | """ 77 | :param centroids: Centroids of all clusters 78 | :param X: Input tensor 79 | :return: Assigning each sample to a cluster. 80 | """ 81 | n_samples = X.shape[0] 82 | k_clusters = [[] for _ in range(self.k)] 83 | for idx, sample in enumerate(X): 84 | centroid_index = self.closest_centroid(sample, centroids) 85 | k_clusters[centroid_index].append(idx) 86 | 87 | return k_clusters 88 | 89 | def update_centroids(self, clusters, X): 90 | """ 91 | :return: Updating centroids after each iteration. 92 | """ 93 | centroids = torch.zeros((self.k, self.features)) 94 | for idx, cluster in enumerate(clusters): 95 | centroid = torch.mean(X[cluster], dim=0) 96 | centroids[idx] = centroid 97 | 98 | return centroids 99 | 100 | def label_clusters(self, clusters, X): 101 | """ 102 | Labeling the samples with index of clusters 103 | :return: labeled samples 104 | """ 105 | y_pred = torch.zeros(X.shape[0]) 106 | for idx, cluster in enumerate(clusters): 107 | for sample_idx in cluster: 108 | y_pred[sample_idx] = idx 109 | 110 | return y_pred 111 | 112 | def fit(self, X): 113 | """ 114 | Initializing centroid using Kmeans++, then find distance between each sample and initial centroids, then assign 115 | cluster label based on min_distance, repeat this process for max_iteration and simultaneously updating 116 | centroid by calculating distance between sample and updated centroid. Convergence happen when difference between 117 | previous and updated centroid is None. 118 | :return: updated centroids of the cluster after max_iterations. 119 | """ 120 | centroids = self.initialize_centroid(X, self.k) 121 | for _ in range(self.max_iterations): 122 | clusters = self.create_clusters(centroids, X) 123 | previous_centroids = centroids 124 | centroids = self.update_centroids(clusters, X) 125 | difference = centroids - previous_centroids 126 | 127 | # print(difference) 128 | if not difference.numpy().any(): 129 | break 130 | 131 | self.KMeans_Centroids = centroids 132 | return centroids 133 | 134 | def predict(self, X): 135 | """ 136 | :return: label/cluster number for each input sample is returned 137 | """ 138 | if not self.KMeans_Centroids.numpy().any(): 139 | raise Exception("No Centroids Found. Run KMeans fit") 140 | 141 | clusters = self.create_clusters(self.KMeans_Centroids, X) 142 | labels = self.label_clusters(clusters, X) 143 | 144 | return labels 145 | 146 | 147 | if __name__ == '__main__': 148 | iris = load_iris() 149 | torch.manual_seed(0) 150 | X = torch.tensor(iris.data, dtype=torch.float) 151 | y = torch.tensor(iris.target) 152 | n_classes = len(torch.unique(y)) 153 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1) 154 | kmeans = KMeans(x_train, k=n_classes, iterations=300) 155 | kmeans.fit(x_train) 156 | ypred = kmeans.predict(x_test) 157 | print(f'Accuracy Score: {accuracy_score(y_test, ypred)}') 158 | -------------------------------------------------------------------------------- /Day-18-TSNE/tsne.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reference: https://towardsdatascience.com/t-sne-clearly-explained-d84c537f53a 3 | Playground: https://distill.pub/2016/misread-tsne/ 4 | Wiki: https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding 5 | """ 6 | import torch 7 | import logging 8 | from sklearn.datasets import load_iris, load_digits, load_diabetes 9 | class TSNE: 10 | """ 11 | The goal is to take a set of points in a high-dimensional space and find a faithful representation of those 12 | points in a lower-dimensional space, typically the 2D plane. The algorithm is non-linear and adapts to the 13 | underlying data, performing different transformations on different regions. Those differences can be a major 14 | source of confusion. 15 | """ 16 | def __init__(self, n_components=2, preplexity=5.0, max_iter=1, learning_rate=200): 17 | """ 18 | :param n_components: 19 | :param preplexity: how to balance attention between local and global aspects of your data. The parameter is, 20 | in a sense, a guess about the number of close neighbors each point has. Typical value between 5 to 50. 21 | With small value of preplexity, the local groups are formed and with increasing preplexity global groups are 22 | formed. A perplexity is more or less a target number of neighbors for our central point. 23 | :param max_iter: Iterations to stabilize the results and converge. 24 | :param learning_rate: 25 | """ 26 | self.max_iter = max_iter 27 | self.preplexity = preplexity 28 | self.n_components = n_components 29 | self.initial_momentum = 0.5 30 | self.final_momentum = 0.8 31 | self.min_gain = 0.01 32 | self.lr = learning_rate 33 | self.tol = 1e-5 34 | self.preplexity_tries = 50 35 | 36 | def l2_distance(self, X): 37 | """ 38 | :return: Distance between two vectors 39 | """ 40 | sum_X = torch.sum(X * X, dim=1) 41 | return (-2* torch.mm(X, X.T) + sum_X).T + sum_X 42 | 43 | def get_pairwise_affinities(self, X): 44 | """ 45 | :param X: High dimensional input 46 | :return: a (Gaussian) probability distribution over pairs of high-dimensional objects in such a way that similar 47 | objects are assigned a higher probability while dissimilar points are assigned a lower probability. To find 48 | variance for this distribution we use Binary search. The variance is calculated between fixed preplexity given 49 | by the user. 50 | """ 51 | affines = torch.zeros((self.n_samples, self.n_samples), dtype=torch.float32) 52 | target_entropy = torch.log(torch.scalar_tensor(self.preplexity)) 53 | distance = self.l2_distance(X) 54 | for i in range(self.n_samples): 55 | affines[i, :] = self.binary_search(distance[i], target_entropy) 56 | 57 | #affines = torch.diagonal(affines).fill_(1.0e-12) 58 | affines[torch.eye(affines.shape[0]).byte()] = 1.0e-12 59 | affines = affines.clip(min=1e-100) 60 | affines = (affines + affines.T)/(2*self.n_samples) 61 | return affines 62 | 63 | def q_distribution(self, D): 64 | """ 65 | A (Student t-distirbution)distribution is learnt in lower dimensional space, n_samples and n_components 66 | (2 or 3 dimension), and similar to above method 'get_pairwise_affinities', we find the probability of the 67 | data points with high probability for closer points and less probability for disimilar points. 68 | """ 69 | Q = 1.0 / (1.0 + D) 70 | Q[torch.eye(Q.shape[0]).byte()] = 0.0 71 | Q = Q.clip(min=1e-100) 72 | return Q 73 | 74 | def binary_search(self, dist, target_entropy): 75 | """ 76 | SNE performs a binary search for the value of sigma that produces probability distribution with a fixed 77 | perplexity that is specified by the user. 78 | """ 79 | precision_minimum = 0 80 | precision_maximum = 1.0e15 81 | precision = 1.0e5 82 | 83 | for _ in range(self.preplexity_tries): 84 | denominator = torch.sum(torch.exp(-dist[dist > 0.0] / precision)) 85 | beta = torch.exp(-dist / precision) / denominator 86 | 87 | g_beta = beta[beta > 0.0] 88 | # Shannon Entropy 89 | entropy = -torch.sum(g_beta * torch.log2(g_beta)) 90 | error = entropy - target_entropy 91 | 92 | if error > 0: 93 | precision_maximum = precision 94 | precision = (precision + precision_minimum) / 2.0 95 | else: 96 | precision_minimum = precision 97 | precision = (precision + precision_maximum) / 2.0 98 | 99 | if torch.abs(error) < self.tol: 100 | break 101 | 102 | return beta 103 | 104 | def fit_transform(self, X): 105 | self.n_samples, self.n_features = X.shape[0], X.shape[1] 106 | Y = torch.randn(self.n_samples, self.n_components) 107 | velocity = torch.zeros_like(Y) 108 | gains = torch.ones_like(Y) 109 | P = self.get_pairwise_affinities(X) 110 | 111 | iter_num = 0 112 | while iter_num < self.max_iter: 113 | iter_num += 1 114 | D = self.l2_distance(Y) 115 | Q = self.q_distribution(D) 116 | Q_n = Q /torch.sum(Q) 117 | 118 | pmul = 4.0 if iter_num < 100 else 1.0 119 | momentum = 0.5 if iter_num < 20 else 0.8 120 | 121 | grads = torch.zeros(Y.shape) 122 | for i in range(self.n_samples): 123 | """ 124 | Optimization using gradient to converge between the true P and estimated Q distrbution. 125 | """ 126 | grad = 4 * torch.mm(((pmul * P[i] - Q_n[i]) * Q[i]).unsqueeze(0), Y[i] -Y) 127 | grads[i] = grad 128 | 129 | gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ((grads > 0) == (velocity > 0)) 130 | gains = gains.clip(min=self.min_gain) 131 | 132 | velocity = momentum * velocity - self.lr * (gains * grads) 133 | Y += velocity 134 | Y = Y - torch.mean(Y, 0) 135 | error = torch.sum(P * torch.log(P/Q_n)) 136 | print("Iteration %s, error %s" % (iter_num, error)) 137 | return Y 138 | 139 | if __name__ == '__main__': 140 | data = load_diabetes() 141 | torch.manual_seed(42) 142 | X = torch.tensor(data.data, dtype=torch.double) 143 | print(max(X[1,:])) 144 | y = torch.tensor(data.target) 145 | print(y.shape) 146 | tsne = TSNE(n_components=2) 147 | tsne.fit_transform(X) 148 | -------------------------------------------------------------------------------- /Day-10-Lasso-Ridge-Regression/Lasso_Ridge_Regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reference: https://github.com/eriklindernoren/ML-From-Scratch 3 | This github repository implements high quality code as we see in official libraries like sklearn etc. 4 | Great reference to kickstart your journey for ML programming. 5 | """ 6 | import torch 7 | from sklearn.datasets import load_boston 8 | from itertools import combinations_with_replacement 9 | from sklearn.metrics import accuracy_score 10 | from sklearn.model_selection import train_test_split 11 | import seaborn as sb 12 | import matplotlib.pyplot as plt 13 | 14 | class LassoRegularization: 15 | def __init__(self, alpha): 16 | """ 17 | :param alpha: 18 | * When 0, the lasso regression turns into Linear Regression 19 | * When increases towards infinity, it turns features coefficients into zero. 20 | * Try out different value to find out optimized values. 21 | """ 22 | self.alpha = alpha 23 | 24 | def __call__(self, w): 25 | """ 26 | :param w: Weight vector 27 | :return: Penalization value for MSE 28 | """ 29 | return self.alpha * torch.norm(w, p=1) 30 | 31 | def grad(self, w): 32 | """ 33 | :param w: weight vector 34 | :return: weight update based on sign value, it helps in removing coefficients from W vector 35 | torch.sign: 36 | a 37 | tensor([ 0.7000, -1.2000, 0.0000, 2.3000]) 38 | torch.sign(a) 39 | tensor([ 1., -1., 0., 1.]) 40 | """ 41 | return self.alpha * torch.sign(w) 42 | 43 | class RidgeRegularization: 44 | def __init__(self, alpha): 45 | """ 46 | :param alpha: 47 | * When 0, the lasso regression turns into Linear Regression 48 | * When increases towards infinity, it turns features coefficients into zero. 49 | * Try out different value to find out optimized values. 50 | """ 51 | self.alpha = alpha 52 | 53 | def __call__(self, w): 54 | """ 55 | :param w: Weight vector 56 | :return: Penalization value for MSE 57 | """ 58 | return self.alpha * 0.5 * torch.mm(w.T, w) 59 | 60 | def grad(self, w): 61 | """ 62 | :param w: weight vector 63 | :return: weight update based on sign value, it helps in reducing the coefficient values from W vector 64 | """ 65 | return self.alpha * w 66 | 67 | class Regression: 68 | def __init__(self, learning_rate, epochs, regression_type='lasso'): 69 | """ 70 | :param learning_rate: constant step while updating weight 71 | :param epochs: Number of epochs the data is passed through the model 72 | Initalizing regularizer for Lasso Regression. 73 | """ 74 | self.lr = learning_rate 75 | self.epochs = epochs 76 | if regression_type == 'lasso': 77 | self.regularization = LassoRegularization(alpha=1.0) 78 | else: 79 | self.regularization = RidgeRegularization(alpha=2.0) 80 | 81 | def normalization(self, X): 82 | """ 83 | :param X: Input tensor 84 | :return: Normalized input using l2 norm. 85 | """ 86 | l2 = torch.norm(X, p=2, dim=-1) 87 | l2[l2 == 0] = 1 88 | return X / l2.unsqueeze(1) 89 | 90 | def polynomial_features(self, X, degree): 91 | """ 92 | It creates polynomial features from existing set of features. For instance, 93 | X_1, X_2, X_3 are available features, then polynomial features takes combinations of 94 | these features to create new feature by doing X_1*X_2, X_1*X_3, X_2*X3. 95 | 96 | combinations output: [(), (0,), (1,), (2,), (3,), (0, 0), (0, 1), (0, 2), (0, 3), 97 | (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)] 98 | :param X: Input tensor (For Iris Dataset, (150, 4)) 99 | :param degree: Polynomial degree of 2, i.e we'll have product of two feature vector at max. 100 | :return: Output tensor (After adding polynomial features, the number of features increases to 15) 101 | """ 102 | n_samples, n_features = X.shape[0], X.shape[1] 103 | def index_combination(): 104 | combinations = [combinations_with_replacement(range(n_features), i) for i in range(0, degree+1)] 105 | flat_combinations = [item for sublists in combinations for item in sublists] 106 | return flat_combinations 107 | 108 | combinations = index_combination() 109 | n_output_features = len(combinations) 110 | X_new = torch.empty((n_samples, n_output_features)) 111 | 112 | for i, index_combs in enumerate(combinations): 113 | X_new[:, i] = torch.prod(X[:, index_combs], dim=1) 114 | 115 | X_new = X_new.type(torch.DoubleTensor) 116 | return X_new 117 | 118 | def weight_initialization(self, n_features): 119 | """ 120 | :param n_features: Number of features in the data 121 | :return: creating weight vector using uniform distribution. 122 | """ 123 | limit = 1 / torch.sqrt(torch.scalar_tensor(n_features)) 124 | #self.w = torch.FloatTensor((n_features,)).uniform(-limit, limit) 125 | self.w = torch.distributions.uniform.Uniform(-limit, limit).sample((n_features, 1)) 126 | self.w = self.w.type(torch.DoubleTensor) 127 | 128 | def fit(self, X, y): 129 | """ 130 | :param X: Input tensor 131 | :param y: ground truth tensor 132 | :return: updated weight vector for prediction 133 | """ 134 | self.training_error = {} 135 | self.weight_initialization(n_features=X.shape[1]) 136 | for epoch in range(1, self.epochs+1): 137 | y_pred = torch.mm(X, self.w) 138 | mse = torch.mean(0.5 * (y - y_pred)**2 + self.regularization(self.w)) 139 | self.training_error[epoch] = mse.item() 140 | grad_w = torch.mm(-(y - y_pred).T, X).T + self.regularization.grad(self.w) 141 | self.w -= self.lr * grad_w 142 | 143 | 144 | def predict(self, X): 145 | """ 146 | :param X: input tensor 147 | :return: predicted output using learned weight vector 148 | """ 149 | y_pred = torch.mm(X, self.w) 150 | return y_pred 151 | 152 | if __name__ == '__main__': 153 | boston = load_boston() 154 | torch.manual_seed(0) 155 | X = torch.tensor(boston.data, dtype=torch.double) 156 | y = torch.tensor(boston.target, dtype=torch.double).unsqueeze(1) 157 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 158 | regression = Regression(learning_rate=0.0001, epochs=3000, regression_type='lasso') 159 | regression.fit(regression.normalization(regression.polynomial_features(x_train, degree=1)), y_train) 160 | y_pred = regression.predict(regression.normalization(regression.polynomial_features(x_test, degree=1))) 161 | plt.figure(figsize=(6, 6)) 162 | sb.scatterplot(list(regression.training_error.keys()), list(regression.training_error.values())) 163 | plt.show() 164 | -------------------------------------------------------------------------------- /Day-11-Gaussian-Mixture-Model/gmm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Blog post GMM: https://brilliant.org/wiki/gaussian-mixture-model/ 3 | """ 4 | import torch 5 | import math 6 | from sklearn.datasets import load_iris 7 | from sklearn.metrics import accuracy_score 8 | from sklearn.model_selection import train_test_split 9 | 10 | class GMM: 11 | def __init__(self, k, max_epochs=100, tolerance=1e-8): 12 | """ 13 | :param k: the number of clusters the algorithm will form. 14 | :param max_epochs: The number of iterations the algorithm will run for if it does 15 | not converge before that. 16 | :param tolerance: float 17 | If the difference of the results from one iteration to the next is 18 | smaller than this value we will say that the algorithm has converged. 19 | """ 20 | self.k = k 21 | self.parameters = [] 22 | self.max_epochs = max_epochs 23 | self.tolerance = tolerance 24 | self.responsibility = None 25 | self.responsibilities = [] 26 | self.sample_assignments = None 27 | 28 | def normalization(self, X): 29 | """ 30 | :param X: Input tensor 31 | :return: Normalized input using l2 norm. 32 | """ 33 | l2 = torch.norm(X, p=2, dim=-1) 34 | l2[l2 == 0] = 1 35 | return X / l2.unsqueeze(1) 36 | 37 | def covariance_matrix(self, X): 38 | """ 39 | :param X: Input tensor 40 | :return: cavariance of input tensor 41 | """ 42 | centering_X = X - torch.mean(X, dim=0) 43 | cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1) 44 | return cov 45 | 46 | def random_gaussian_initialization(self, X): 47 | """ 48 | Since we are using iris dataset, we know the no. of class is 3. 49 | We create three gaussian distribution representing each class with 50 | random sampling of data to find parameters like μ and 𝚺/N (covariance matrix) 51 | for each class 52 | :param X: input tensor 53 | :return: 3 randomly selected mean and covariance of X, each act as a separate cluster 54 | """ 55 | n_samples = X.shape[0] 56 | self.prior = (1 / self.k) * torch.ones(self.k) 57 | for cls in range(self.k): 58 | parameter = {} 59 | parameter['mean'] = X[torch.randperm(n_samples)[:1]] 60 | parameter['cov'] = self.covariance_matrix(X) 61 | self.parameters.append(parameter) 62 | 63 | def multivariate_gaussian_distribution(self, X, parameters): 64 | """ 65 | Checkout the equation from Multi-Dimensional Model from blog link posted above. 66 | We find the likelihood of each sample w.r.t to the parameters initialized above for each separate cluster. 67 | :param X: Input tensor 68 | :param parameters: mean, cov of the randomly initialized gaussian 69 | :return: Likelihood of each sample belonging to a cluster with random initialization of mean and cov. 70 | Since it is a multivariate problem we have covariance and not variance. 71 | """ 72 | n_features = X.shape[1] 73 | mean = parameters['mean'] 74 | cov = parameters['cov'] 75 | determinant = torch.det(cov) 76 | likelihoods = torch.zeros(X.shape[0]) 77 | for i, sample in enumerate(X): 78 | dim = torch.scalar_tensor(n_features, dtype=torch.float) 79 | coefficients = 1.0/ torch.sqrt(torch.pow((2.0 * math.pi), dim) * determinant) 80 | exponent = torch.exp( -0.5 * torch.mm(torch.mm((sample - mean) ,torch.pinverse(cov)) , (sample - mean).T)) 81 | likelihoods[i] = coefficients * exponent 82 | 83 | return likelihoods 84 | 85 | def get_likelihood(self, X): 86 | """ 87 | Previously, we have initialized 3 different mean and covariance in random_gaussian_initialization(). Now around 88 | each of these mean and cov, we see likelihood of the each sample using multivariate gaussian distribution. 89 | :param X: 90 | :return: Storing the likelihood of each sample belonging to a cluster with random initialization of mean and cov. 91 | Since it is a multivariate problem we have covariance and not variance. 92 | """ 93 | n_samples = X.shape[0] 94 | likelihoods_cls = torch.zeros((n_samples, self.k)) 95 | for cls in range(self.k): 96 | likelihoods_cls[:, cls] = self.multivariate_gaussian_distribution(X, self.parameters[cls]) 97 | 98 | return likelihoods_cls 99 | 100 | def expectation(self, X): 101 | """ 102 | Expectation Maximization Algorithm is used to find the optimized value of randomly initialized mean and cov. 103 | Expectation refers to probability. Here, It calculates the probabilities of X belonging to different cluster. 104 | :param X: input tensor 105 | :return: Max probability of each sample belonging to a particular class. 106 | """ 107 | weighted_likelihood = self.get_likelihood(X) * self.prior 108 | sum_likelihood = torch.sum(weighted_likelihood, dim=1).unsqueeze(1) 109 | # Determine responsibility as P(X|y)*P(y)/P(X) 110 | # responsibility stores each sample's probability score corresponding to each class 111 | self.responsibility = weighted_likelihood /sum_likelihood 112 | # Assign samples to cluster that has largest probability 113 | self.sample_assignments = self.responsibility.argmax(dim=1) 114 | # Save value for convergence check 115 | self.responsibilities.append(torch.max(self.responsibility, dim=1)) 116 | 117 | def maximization(self, X): 118 | """ 119 | Iterate through clusters and updating mean and covariance. 120 | Finding updated mean and covariance using probability score of each sample w.r.t each class 121 | :param X: 122 | :return: Updated mean, covariance and priors 123 | """ 124 | for i in range(self.k): 125 | resp = self.responsibility[:, i].unsqueeze(1) 126 | mean = torch.sum(resp * X, dim=0) / torch.sum(resp) 127 | covariance = torch.mm((X - mean).T, (X - mean) * resp) / resp.sum() 128 | self.parameters[i]['mean'], self.parameters[i]['cov'] = mean.unsqueeze(0), covariance 129 | 130 | n_samples = X.shape[0] 131 | self.prior = self.responsibility.sum(dim=0) / n_samples 132 | 133 | def convergence(self, X): 134 | """Convergence if || likehood - last_likelihood || < tolerance """ 135 | if len(self.responsibilities) < 2: 136 | return False 137 | difference = torch.norm(self.responsibilities[-1].values - self.responsibilities[-2].values) 138 | return difference <= self.tolerance 139 | 140 | def predict(self, X): 141 | self.random_gaussian_initialization(X) 142 | 143 | for _ in range(self.max_epochs): 144 | self.expectation(X) 145 | self.maximization(X) 146 | break 147 | 148 | if self.convergence(X): 149 | break 150 | 151 | self.expectation(X) 152 | return self.sample_assignments 153 | 154 | if __name__ == '__main__': 155 | iris = load_iris() 156 | torch.manual_seed(0) 157 | X = torch.tensor(iris.data, dtype=torch.float) 158 | y = torch.tensor(iris.target) 159 | n_classes = len(torch.unique(y)) 160 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 161 | gmm = GMM(k=n_classes, max_epochs=2000) 162 | y_pred = gmm.predict(x_train) 163 | print(f'Accuracy Score: {accuracy_score(y_train, y_pred)}') 164 | 165 | -------------------------------------------------------------------------------- /Day-03-Decision-Tree/DecisionTree.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.datasets import load_breast_cancer 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.metrics import accuracy_score 5 | 6 | 7 | class Node: 8 | def __init__(self, gini, num_samples, num_samples_per_class, predicted_class): 9 | self.gini = gini 10 | self.num_samples = num_samples 11 | self.num_samples_per_class = num_samples_per_class 12 | self.predicted_class = predicted_class 13 | self.feature_index = 0 14 | self.threshold = 0 15 | self.left = None 16 | self.right = None 17 | 18 | class DecisionTree_CART: 19 | def __init__(self, max_depth=None): 20 | self.max_depth = max_depth 21 | 22 | def fit(self, X, y): 23 | """Build decision tree classifier 24 | :argument X: Input Tensor 25 | :argument y: ground truth Tensor 26 | :variable n_classes_: Number of Classes in target variable 27 | :variable n_features_: Number of features 28 | :variable tree_: Making decision tree based on X, y along with max_depth 29 | """ 30 | self.n_classes_ = len(y.unique()) # classes are assumed to go from 0 to n-1 31 | self.n_features_ = X.shape[1] 32 | self.tree_ = self._grow_tree(X, y) 33 | 34 | def _gini(self, y): 35 | """Compute Gini impurity of a non-empty node. 36 | Gini impurity is defined as Σ p(1-p) over all classes, with p the frequency of a 37 | class within the node. Since Σ p = 1, this is equivalent to 1 - Σ p^2. 38 | 39 | :var m: Sample Size 40 | """ 41 | m = y.shape[0] 42 | 43 | return 1.0 - sum((torch.sum(y == c).item() // m) ** 2 for c in range(self.n_classes_)) 44 | 45 | def _best_split(self, X, y): 46 | """Find the best split for a node. 47 | "Best" means that the average impurity of the two children, weighted by their 48 | population, is the smallest possible. Additionally it must be less than the 49 | impurity of the current node. 50 | To find the best split, we loop through all the features, and consider all the 51 | midpoints between adjacent training samples as possible thresholds. We compute 52 | the Gini impurity of the split generated by that particular feature/threshold 53 | pair, and return the pair with smallest impurity. 54 | Returns: 55 | best_idx: Index of the feature for best split, or None if no split is found. 56 | best_thr: Threshold to use for the split, or None if no split is found. 57 | """ 58 | # Need at least two elements to split a node. 59 | m = y.shape[0] 60 | if m <= 1: 61 | return None, None 62 | 63 | # Count of each class in the current node. 64 | num_parent = [torch.sum(y == c).item() for c in range(self.n_classes_)] 65 | print(f'num_parent {num_parent}') 66 | 67 | # Gini of current node. 68 | best_gini = 1.0 - sum((n // m) ** 2 for n in num_parent) 69 | best_idx, best_thr = None, None 70 | 71 | # Loop through all features. 72 | for idx in range(self.n_features_): 73 | # Sort data along selected feature. 74 | thresholds, classes = zip(*sorted(zip(X[:, idx], y))) 75 | 76 | # We could actually split the node according to each feature/threshold pair 77 | # and count the resulting population for each class in the children, but 78 | # instead we compute them in an iterative fashion, making this for loop 79 | # linear rather than quadratic. 80 | num_left = [0] * self.n_classes_ 81 | num_right = num_parent.copy() 82 | for i in range(1, m): # possible split positions 83 | c = classes[i - 1] 84 | num_left[c] += 1 85 | num_right[c] -= 1 86 | gini_left = 1.0 - sum( 87 | (num_left[x] / i) ** 2 for x in range(self.n_classes_) 88 | ) 89 | gini_right = 1.0 - sum( 90 | (num_right[x] // (m - i)) ** 2 for x in range(self.n_classes_) 91 | ) 92 | 93 | # The Gini impurity of a split is the weighted average of the Gini 94 | # impurity of the children. 95 | gini = (i * gini_left + (m - i) * gini_right) / m 96 | 97 | # The following condition is to make sure we don't try to split two 98 | # points with identical values for that feature, as it is impossible 99 | # (both have to end up on the same side of a split). 100 | if thresholds[i] == thresholds[i - 1]: 101 | continue 102 | 103 | if gini < best_gini: 104 | best_gini = gini 105 | best_idx = idx 106 | best_thr = (thresholds[i] + thresholds[i - 1]) / 2 # midpoint 107 | 108 | print("Best Index and Threshold",best_idx, best_thr) 109 | 110 | return best_idx, best_thr 111 | 112 | def _grow_tree(self, X, y, depth=0): 113 | """Build a decision tree by recursively finding the best split.""" 114 | # Population for each class in current node. The predicted class is the one with 115 | # largest population. 116 | num_samples_per_class = torch.tensor([torch.sum(y == i) for i in range(self.n_classes_)]) 117 | predicted_class = torch.argmax(num_samples_per_class) 118 | node = Node( 119 | gini=self._gini(y), 120 | num_samples=y.shape[0], 121 | num_samples_per_class=num_samples_per_class, 122 | predicted_class=predicted_class, 123 | ) 124 | 125 | # Split recursively until maximum depth is reached. 126 | if depth < self.max_depth: 127 | idx, thr = self._best_split(X, y) 128 | if idx is not None: 129 | indices_left = X[:, idx] < thr 130 | X_left, y_left = X[indices_left], y[indices_left] 131 | X_right, y_right = X[~indices_left], y[~indices_left] 132 | node.feature_index = idx 133 | node.threshold = thr 134 | node.left = self._grow_tree(X_left, y_left, depth + 1) 135 | node.right = self._grow_tree(X_right, y_right, depth + 1) 136 | return node 137 | 138 | def predict(self, X): 139 | return [self._predict(inputs) for inputs in X] 140 | 141 | def _predict(self, inputs): 142 | """Predict class for a single sample.""" 143 | node = self.tree_ 144 | while node.left: 145 | if inputs[node.feature_index] < node.threshold: 146 | node = node.left 147 | else: 148 | node = node.right 149 | return node.predicted_class 150 | 151 | if __name__ == "__main__": 152 | """ 153 | :variable X: Input tensor with 30 features 154 | :target y: Output tensor with 2 classes 155 | 156 | * Converting Numpy array into torch tensor. 157 | * Creating DecisionTree Object with max_depth 5. 158 | * Fit and predict with DecisionTree Object. 159 | """ 160 | breast_cancer = load_breast_cancer() 161 | X = breast_cancer['data'] 162 | y = breast_cancer['target'] 163 | X = torch.tensor(X) 164 | y = torch.tensor(y) 165 | 166 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 167 | classifier = DecisionTree_CART(max_depth=5) 168 | classifier.fit(x_train, y_train) 169 | y_predict = classifier.predict(x_test) 170 | 171 | print(f'Accuracy: {accuracy_score(y_test, y_predict)}') 172 | -------------------------------------------------------------------------------- /Day-16-Bayesian-Regression/BayesianRegression.py: -------------------------------------------------------------------------------- 1 | """ 2 | Checkout the below url to understand, how Bayesian regression differs from Linear Regression 3 | https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7 4 | https://dzone.com/articles/bayesian-learning-for-machine-learning-part-ii-lin 5 | """ 6 | import pandas as pd 7 | import torch 8 | from scipy.stats import chi2, multivariate_normal 9 | from sklearn.model_selection import train_test_split 10 | from itertools import combinations_with_replacement 11 | import matplotlib.pyplot as plt 12 | 13 | def mean_squared_error(y_true, y_pred): 14 | """ Returns the mean squared error between y_true and y_pred """ 15 | mse = torch.mean(torch.pow(y_true - y_pred, 2)) 16 | return mse 17 | 18 | def polynomial_features(X, degree): 19 | """ 20 | It creates polynomial features from existing set of features. For instance, 21 | X_1, X_2, X_3 are available features, then polynomial features takes combinations of 22 | these features to create new feature by doing X_1*X_2, X_1*X_3, X_2*X3. 23 | 24 | For Degree 2: 25 | combinations output: [(), (0,), (1,), (2,), (3,), (0, 0), (0, 1), (0, 2), (0, 3), 26 | (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)] 27 | :param X: Input tensor (For Iris Dataset, (150, 4)) 28 | :param degree: Polynomial degree of 2, i.e we'll have product of two feature vector at max. 29 | :return: Output tensor (After adding polynomial features, the number of features increases to 15) 30 | """ 31 | n_samples, n_features = X.shape[0], X.shape[1] 32 | def index_combination(): 33 | combinations = [combinations_with_replacement(range(n_features), i) for i in range(0, degree+1)] 34 | flat_combinations = [item for sublists in combinations for item in sublists] 35 | return flat_combinations 36 | 37 | combinations = index_combination() 38 | n_output_features = len(combinations) 39 | X_new = torch.empty((n_samples, n_output_features)) 40 | 41 | for i, index_combs in enumerate(combinations): 42 | X_new[:, i] = torch.prod(X[:, index_combs], dim=1) 43 | 44 | X_new = X_new.type(torch.DoubleTensor) 45 | return X_new 46 | 47 | 48 | class BayesianRegression: 49 | def __init__(self, n_draws, mu_0, omega_0, nu_0, sigma_sq_0, polynomial_degree=0, credible_interval=95): 50 | """ 51 | Bayesian regression model. If poly_degree is specified the features will 52 | be transformed to with a polynomial basis function, which allows for polynomial 53 | regression. Assumes Normal prior and likelihood for the weights and scaled inverse 54 | chi-squared prior and likelihood for the variance of the weights. 55 | 56 | :param n_draws: The number of simulated draws from the posterior of the parameters. 57 | :param mu_0: The mean values of the prior Normal distribution of the parameters. 58 | :param omega_0: The precision matrix of the prior Normal distribution of the parameters. 59 | :param nu_0: The degrees of freedom of the prior scaled inverse chi squared distribution. 60 | :param sigma_sq_0: The scale parameter of the prior scaled inverse chi squared distribution. 61 | :param polynomial_degree: The polynomial degree that the features should be transformed to. Allows 62 | for polynomial regression. 63 | :param credible_interval: The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior 64 | of the parameters. 65 | """ 66 | self.n_draws = n_draws 67 | self.polynomial_degree = polynomial_degree 68 | self.credible_interval = credible_interval 69 | 70 | # Prior parameters 71 | self.mu_0 = mu_0 72 | self.omega_0 = omega_0 73 | self.nu_0 = nu_0 74 | self.sigma_sq_0 = sigma_sq_0 75 | 76 | def scaled_inverse_chi_square(self, n, df, scale): 77 | """ 78 | Allows for simulation from the scaled inverse chi squared 79 | distribution. Assumes the variance is distributed according to 80 | this distribution. 81 | :param n: 82 | :param df: 83 | :param scale: 84 | :return: 85 | """ 86 | X = chi2.rvs(size=n, df=df) 87 | sigma_sq = df * scale / X 88 | return sigma_sq 89 | 90 | def fit(self, X, y): 91 | # For polynomial transformation 92 | if self.polynomial_degree: 93 | X = polynomial_features(X, degree=self.polynomial_degree) 94 | 95 | n_samples, n_features = X.shape[0], X.shape[1] 96 | X_X_T = torch.mm(X.T, X) 97 | 98 | # Least squares approximate of beta 99 | beta_hat = torch.mm(torch.mm(torch.pinverse(X_X_T), X.T), y) 100 | 101 | # The posterior parameters can be determined analytically since we assume 102 | # conjugate priors for the likelihoods. 103 | # Normal prior / likelihood => Normal posterior 104 | mu_n = torch.mm(torch.pinverse(X_X_T + self.omega_0), torch.mm(X_X_T, beta_hat) + torch.mm(self.omega_0, self.mu_0.unsqueeze(1))) 105 | omega_n = X_X_T + self.omega_0 106 | nu_n = self.nu_0 + n_samples 107 | 108 | # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior 109 | sigma_sq_n = (1.0/nu_n) * (self.nu_0 * self.sigma_sq_0 + torch.mm(y.T, y) + torch.mm(torch.mm(self.mu_0.unsqueeze(1).T, self.omega_0), self.mu_0.unsqueeze(1)) - torch.mm(mu_n.T, torch.mm(omega_n, mu_n))) 110 | 111 | # Simulate parameter values for n_draws 112 | beta_draws = torch.empty((self.n_draws, n_features)) 113 | for i in range(self.n_draws): 114 | sigma_sq = self.scaled_inverse_chi_square(n=1, df=nu_n, scale=sigma_sq_n) 115 | beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq * torch.pinverse(omega_n)) 116 | beta_draws[1, :] = torch.tensor(beta,dtype=torch.float) 117 | 118 | # Select the mean of the simulated variables as the ones used to make predictions 119 | self.w = torch.mean(beta_draws, dim=0, dtype=torch.double) 120 | 121 | # Lower and upper boundary of the credible interval 122 | l_eti = 0.50 - self.credible_interval / 2 123 | u_eti = 0.50 + self.credible_interval / 2 124 | self.eti = torch.tensor([[torch.quantile(beta_draws[:, i], q=l_eti), torch.quantile(beta_draws[:, i], q=u_eti)] for i in range(n_features)], dtype=torch.double) 125 | 126 | def predict(self, X, eti=False): 127 | if self.polynomial_degree: 128 | X = polynomial_features(X, degree=self.polynomial_degree) 129 | y_pred = torch.mm(X, self.w.unsqueeze(1)) 130 | # If the lower and upper boundaries for the 95% 131 | # equal tail interval should be returned 132 | if eti: 133 | lower_w = self.eti[:, 0] 134 | upper_w = self.eti[:, 1] 135 | 136 | y_lower_prediction = torch.mm(X, lower_w.unsqueeze(1)) 137 | y_upper_prediction = torch.mm(X, upper_w.unsqueeze(1)) 138 | 139 | return y_pred, y_lower_prediction, y_upper_prediction 140 | 141 | return y_pred 142 | 143 | if __name__ == '__main__': 144 | data = pd.read_csv('temp.txt', sep="\t") 145 | X = torch.tensor(data["time"].values).unsqueeze(0).T 146 | y = torch.tensor(data["temp"].values).unsqueeze(0).T 147 | x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 148 | n_samples, n_features = X.shape[0], X.shape[1] 149 | mu_0 = torch.zeros(n_features, dtype=torch.double) 150 | omega_0 = torch.diag(torch.tensor([0.0001] * n_features, dtype=torch.double)) 151 | nu_0 = 1 152 | sigma_sq_0 = 100 153 | credible_interval = 0.40 154 | classifier = BayesianRegression(n_draws=2000, 155 | polynomial_degree=4, 156 | mu_0=mu_0, 157 | omega_0=omega_0, 158 | nu_0=nu_0, 159 | sigma_sq_0=sigma_sq_0, 160 | credible_interval=credible_interval) 161 | classifier.fit(x_train, y_train) 162 | y_pred = classifier.predict(x_test) 163 | mse = mean_squared_error(y_test, y_pred) 164 | y_pred_, y_lower_, y_upper_ = classifier.predict(X=X, eti=True) 165 | print("Mean Squared Error:", mse) 166 | # 167 | # Color map 168 | cmap = plt.get_cmap('viridis') 169 | 170 | # Plot the results 171 | m1 = plt.scatter(366 * x_train, y_train, color=cmap(0.9), s=10) 172 | m2 = plt.scatter(366 * x_test, y_test, color=cmap(0.5), s=10) 173 | p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction") 174 | p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(credible_interval)) 175 | p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2) 176 | plt.axis((0, 366, -20, 25)) 177 | plt.suptitle("Bayesian Regression") 178 | plt.title("MSE: %.2f" % mse, fontsize=10) 179 | plt.xlabel('Day') 180 | plt.ylabel('Temperature in Celcius') 181 | plt.legend(loc='lower right') 182 | # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') 183 | plt.legend(loc='lower right') 184 | 185 | plt.show() 186 | --------------------------------------------------------------------------------