├── mlwithpytorch.png
├── Day-30-Loss-Functions
    └── loss.py
├── Day-26-Normalization
    └── normalization.py
├── Day-09-PCA
    └── pca.py
├── Day-01-Linear-Regression
    └── LinearRegression.py
├── Day-17-K-Medoids
    ├── utility.py
    └── PAM.py
├── Day-23-Gradient-Descent
    └── gd.py
├── Day-06-KNN
    └── KNN.py
├── Day-19-ElasticNet
    └── ElasticNetRegression.py
├── Day-12-LDA
    ├── lda.py
    └── NaiveBayes.py
├── README.md
├── Day-21-LatentDirichlet
    └── LDA_TopicModeling.py
├── Day-28-Activations
    ├── activation.py
    └── MLP.py
├── Day-15-MultiClassLDA
    └── multi-class-LDA.py
├── Day-02-Logistic-Regression
    └── LogisticRegression.py
├── Day-22-AffinityPropagation
    └── AffinityPropagation.py
├── Day-05-Naive-Bayes
    └── NaiveBayes.py
├── Day-29-Optimizers
    └── optimizer.py
├── Day-07-SVM
    └── svm.py
├── Day-25-RANSAC
    └── ransac.py
├── Day-24-Regularization
    └── regularization.py
├── Day-08-tf-idf
    └── tfidf.py
├── Day-14-DBSCAN
    └── dbscan.py
├── Day-27-MLP
    └── mlp.py
├── Day-13-Adaboost
    └── adaboost.py
├── Day-20-SpectralClustering
    └── spectralClustering.py
├── Day-04-KMeans-Clustering
    └── KMeans.py
├── Day-18-TSNE
    └── tsne.py
├── Day-10-Lasso-Ridge-Regression
    └── Lasso_Ridge_Regression.py
├── Day-11-Gaussian-Mixture-Model
    └── gmm.py
├── Day-03-Decision-Tree
    └── DecisionTree.py
└── Day-16-Bayesian-Regression
    └── BayesianRegression.py


/mlwithpytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mayurji/MLWithPytorch/HEAD/mlwithpytorch.png


--------------------------------------------------------------------------------
/Day-30-Loss-Functions/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class MeanSquareLoss:
 4 |     def __init__(self): pass
 5 | 
 6 |     def loss(self, y, y_pred):
 7 |         return torch.sum(torch.power((y - y_pred), 2),dim=1) / y.shape[0]
 8 | 
 9 |     def gradient(self, y, y_pred):
10 |         return -(y - y_pred)
11 | 
12 | class CrossEntropy:
13 |     def __init__(self): pass
14 | 
15 |     def loss(self, y, p):
16 |         # Avoid division by zero
17 |         p = np.clip(p, 1e-15, 1 - 1e-15)
18 |         return - y * torch.log(p) - (1 - y) * torch.log(1 - p)
19 | 
20 |     def gradient(self, y, p):
21 |         # Avoid division by zero
22 |         p = torch.clip(p, 1e-15, 1 - 1e-15)
23 |         return - (y / p) + (1 - y) / (1 - p)
24 | 
25 | class MeanAbsoluteLoss:
26 |     def __init__(self): pass
27 | 
28 |     def loss(self, y, y_pred):
29 |         return torch.sum(torch.abs(y - y_pred), dim=1) / y.shape[0]
30 | 
31 |     def gradient(self, y, y_pred):
32 |         return -(y - y_pred)
33 | 
34 | class HuberLoss:
35 |     def __init__(self):pass
36 | 
37 |     def loss(self, y, y_pred, delta):
38 |         if torch.abs(y - y_pred) <=delta:
39 |             return 0.5 * torch.pow(y - y_pred, 2)
40 |         else:
41 |             return (delta * torch.abs(y - y_pred)) - (0.5 * torch.pow(delta, 2))
42 | 
43 | class HingeLoss:
44 |     def __init__(self):
45 |         pass
46 | 
47 |     def loss(self, y, y_pred):
48 |         return torch.max(0, (1-y) * y_pred).values
49 | 
50 | class KLDivergence:
51 |     def __init__(self):
52 |         pass
53 | 
54 |     def loss(self, y, y_pred):
55 |         return torch.sum(y_pred * torch.log((y_pred / y)))
56 | 


--------------------------------------------------------------------------------
/Day-26-Normalization/normalization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.neighbors import KNeighborsClassifier
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.metrics import accuracy_score
 5 | class Normalization:
 6 |     def __init__(self, X):
 7 |         self.X = X
 8 | 
 9 |     def z_score(self):
10 |         mean = torch.mean(self.X, dim=0)
11 |         return self.X.subtract(mean)/ torch.std(self.X, dim=0)
12 | 
13 |     def min_max(self):
14 |         min = torch.min(self.X, dim=0)
15 |         max = torch.max(self.X, dim=0)
16 |         return self.X.subtract(min.values) / (max.values - min.values)
17 | 
18 |     def log_scaling(self):
19 |         return torch.log(self.X)
20 | 
21 |     def clipping(self, max, min):
22 |         if self. X > max:
23 |             mask = self. X > max
24 |             self.X = self.X * mask
25 | 
26 |         if self. X < min:
27 |             mask = self. X < min
28 |             self.X = self.X * mask
29 | 
30 |         return self.X
31 | 
32 | if __name__ == '__main__':
33 |     data = load_iris()
34 |     X = torch.tensor(data.data)
35 |     y = torch.tensor(data.target).unsqueeze(1)
36 |     cls = KNeighborsClassifier()
37 |     normalizer = Normalization(X)
38 |     X_transform = normalizer.z_score()
39 |     cls.fit(X, y)
40 |     y_pred = cls.predict(X)
41 |     print('Without Normalization',accuracy_score(y, y_pred))
42 |     cls.fit(X_transform, y)
43 |     y_pred = cls.predict(X_transform)
44 |     print('Z-Score Normalization' ,accuracy_score(y, y_pred))
45 |     X_transform = normalizer.min_max()
46 |     cls.fit(X_transform, y)
47 |     y_pred = cls.predict(X_transform)
48 |     print('Min-Max Normalization' ,accuracy_score(y, y_pred))
49 |     X_transform = normalizer.log_scaling()
50 |     cls.fit(X_transform, y)
51 |     y_pred = cls.predict(X_transform)
52 |     print('Log Scaling', accuracy_score(y, y_pred))
53 | 


--------------------------------------------------------------------------------
/Day-09-PCA/pca.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.datasets import load_iris
 3 | import seaborn as sb
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | class pca:
 7 |     def __init__(self, n_components):
 8 |         """
 9 |         :param n_components: Number of principal components the data should be reduced too.
10 |         """
11 |         self.components = n_components
12 | 
13 |     def fit_transform(self, X):
14 |         """
15 |         * Centering our inputs with mean
16 |         * Finding covariance matrix using centered tensor
17 |         * Finding eigen value and eigen vector using torch.eig()
18 |         * Sorting eigen values in descending order and finding index of high eigen values
19 |         * Using sorted index, get the eigen vectors
20 |         * Tranforming the Input vectors with n columns into PCA components with reduced dimension
21 |         :param X: Input tensor with n columns.
22 |         :return: Output tensor with reduced principal components
23 |         """
24 |         centering_X = X - torch.mean(X, dim=0)
25 |         covariance_matrix = torch.mm(centering_X.T, centering_X)/(centering_X.shape[0] - 1)
26 |         eigen_values, eigen_vectors = torch.eig(covariance_matrix, eigenvectors=True)
27 |         eigen_sorted_index = torch.argsort(eigen_values[:,0],descending=True)
28 |         eigen_vectors_sorted = eigen_vectors[:,eigen_sorted_index]
29 |         component_vector = eigen_vectors_sorted[:,0:self.components]
30 |         transformed = torch.mm(component_vector.T, centering_X.T).T
31 |         return transformed
32 | 
33 | if __name__ == '__main__':
34 |     data = load_iris()
35 |     X = torch.tensor(data.data,dtype=torch.double)
36 |     y = torch.tensor(data.target)
37 |     pca = pca(n_components=2)
38 |     pca_vector = pca.fit_transform(X)
39 |     plt.figure(figsize=(6, 6))
40 |     sb.scatterplot(pca_vector[:, 0], pca_vector[:, 1], hue=y, s=60, palette='icefire')
41 |     plt.show()
42 | 


--------------------------------------------------------------------------------
/Day-01-Linear-Regression/LinearRegression.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class LinearRegression:
 4 | 
 5 |     def __init__(self):
 6 |         """
 7 |         :desc lr: Learning Rate
 8 |         :desc iteration: Number of iterations over complete data set
 9 |         """
10 | 
11 |         self.lr = 0.01
12 |         self.iterations = 1000
13 | 
14 |     def y_pred(self, X, w):
15 |         """
16 |         :desc w: weight tensor
17 |         :desc X: input tensor
18 |         """
19 |         return torch.mm(torch.transpose(w, 0, 1), X)
20 | 
21 |     def loss(self, ypred, y):
22 |         """
23 |         :desc c: cost function - to measure the loss between estimated vs ground truth
24 |         """
25 |         l = 1 / self.m * torch.sum(torch.pow(ypred - y, 2))
26 |         return l
27 | 
28 |     def gradient_descent(self, w, X, y, ypred):
29 |         """
30 |         :desc dCdW: derivative of cost function
31 |         :desc w_update: change in weight tensor after each iteration
32 |         """
33 |         dCdW = 2 / self.m * torch.mm(X, torch.transpose(ypred - y, 0, 1))
34 |         w_update = w - self.lr * dCdW
35 |         return w_update
36 | 
37 |     def run(self, X, y):
38 |         """
39 |         :type y: tensor object
40 |         :type X: tensor object
41 |         """
42 |         bias = torch.ones((1, X.shape[1]))
43 |         X = torch.cat((bias, X), dim=0)
44 |         self.m = X.shape[1]
45 |         self.n = X.shape[0]
46 |         w = torch.zeros((self.n, 1))
47 | 
48 |         for iteration in range(1, self.iterations + 1):
49 |             ypred = self.y_pred(X, w)
50 |             cost = self.loss(ypred, y)
51 | 
52 |             if iteration % 100 == 0:
53 |                 print(f'Loss at iteration {iteration} is {cost}')
54 |             w = self.gradient_descent(w, X, y, ypred)
55 | 
56 |         return w
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     """
61 |     :desc X: random initialization of input tensor
62 |     :desc y: random initialization of output tensor
63 |     """
64 |     X = torch.rand(1, 500)
65 |     y = 2 * X + 3 + torch.randn(1, 500) * 0.1
66 |     regression = LinearRegression()
67 |     w = regression.run(X, y)
68 | 


--------------------------------------------------------------------------------
/Day-17-K-Medoids/utility.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.metrics import accuracy_score
 3 | import numpy as np
 4 | 
 5 | class SquareLoss:
 6 |     def __init__(self):
 7 |         pass
 8 | 
 9 |     def loss(self, y, y_pred):
10 |         return 0.5 * torch.pow((y - y_pred), 2)
11 | 
12 |     def gradient(self, y, y_pred):
13 |         return -(y - y_pred)
14 | 
15 | class CrossEntropy:
16 |     def __init__(self):
17 |         pass
18 | 
19 |     def loss(self, y, p):
20 |         p = torch.clip(p, 1e-15, 1 - 1e-15)
21 |         return - y * torch.log(p) - (1 - y) * torch.log(1 - p)
22 | 
23 |     def accuracy(self, y, p):
24 |         return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1))
25 | 
26 |     def gradient(self, y, p):
27 |         p = torch.clip(p, 1e-15, 1 - 1e-15)
28 |         return -(y/p) + (1-y) / (1-p)
29 | 
30 | def euclidean_distance(x1, x2):
31 |     """
32 |     :param x1: input tensor
33 |     :param x2: input tensor
34 |     :return: distance between tensors
35 |     """
36 | 
37 |     return torch.cdist(x1.unsqueeze(0), x2.unsqueeze(0))
38 | 
39 | def to_categorical(X, n_columns=None):
40 |     if not n_columns:
41 |         n_columns = torch.amax(X) + 1
42 |     one_hot = torch.zeros((X.shape[0], n_columns))
43 |     one_hot[torch.arange(X.shape[0])] = 1
44 |     return one_hot
45 | 
46 | def mean_squared_error(y_true, y_pred):
47 |     mse = torch.mean(torch.pow(y_true - y_pred, 2))
48 |     return mse
49 | 
50 | def divide_on_feature(X, feature_i, threshold):
51 | 
52 |     split_func = None
53 |     if isinstance(threshold, int) or isinstance(threshold, float):
54 |         split_func = lambda sample: sample[feature_i] >= threshold
55 |     else:
56 |         split_func = lambda sample: sample[feature_i] == threshold
57 | 
58 | 
59 |     X_1 = torch.tensor([sample.numpy() for sample in X if split_func(sample)])
60 |     X_2 = torch.tensor([sample.numpy() for sample in X if not split_func(sample)])
61 | 
62 |     return np.array([X_1.numpy(), X_2.numpy()], dtype='object')
63 | 
64 | def calculate_variance(X):
65 |     mean = torch.ones(X.shape) * torch.mean(X, dim=0)
66 |     n_samples = X.shape[0]
67 |     variance = (1/ n_samples) * torch.diag(torch.mm((X-mean).T, (X-mean)))
68 |     return variance
69 | 


--------------------------------------------------------------------------------
/Day-23-Gradient-Descent/gd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from sklearn.datasets import load_boston
 4 | 
 5 | class GradientDescent:
 6 |     def __init__(self, learning_rate=0.01, max_iterations=100):
 7 |         self.lr = learning_rate
 8 |         self.max_iterations = max_iterations
 9 | 
10 |     def normalization(self, X):
11 |         """
12 |         :param X: Input tensor
13 |         :return: Normalized input using l2 norm.
14 |         """
15 |         l2 = torch.norm(X, p=2, dim=-1)
16 |         l2[l2 == 0] = 1
17 |         return X / l2.unsqueeze(1)
18 | 
19 |     def compute_error(self, b, m, X, y):
20 |         total_error = 0
21 |         for i in range(0, X.shape[0]):
22 |             total_error += (y - (torch.mm(m , X.T)) + b) ** 2
23 |         return total_error / float(X.shape[0])
24 | 
25 |     def step(self, b_curr, m_curr, X, y, learning_rate):
26 |         b_gradient = 0
27 |         m_gradient = 0
28 |         N = float(X.shape[0])
29 |         for i in range(X.shape[0]):
30 |             b_gradient += -(2/N) * torch.sum(y - (torch.mm(X, m_curr.T) + b_curr), dim=0)
31 |             m_gradient += -(2/N) * torch.sum(torch.mm(X.T,  (y - (torch.mm(X, m_curr.T) + b_curr))), dim=0)
32 | 
33 |         new_b = b_curr - (learning_rate * b_gradient)
34 |         new_m = m_curr - (learning_rate * m_gradient)
35 |         return [new_b, new_m]
36 | 
37 |     def gradient_descent(self, X, y, start_b, start_m):
38 |         b = start_b
39 |         m = start_m
40 |         for i in range(self.max_iterations):
41 |             b, m = self.step(b_curr=b, m_curr=m, X=X, y=y, learning_rate=self.lr)
42 | 
43 |         return b, m
44 | 
45 | if __name__ == '__main__':
46 |     data = load_boston()
47 |     X = torch.tensor(data.data)
48 |     y = torch.tensor(data.target).unsqueeze(1)
49 |     initial_b = 0.0
50 |     initial_m = torch.zeros((X.shape[1], 1), dtype=torch.double).T
51 |     nn.init.normal(initial_m)
52 |     gd = GradientDescent(learning_rate=0.0001,max_iterations=100)
53 |     gd.compute_error(X=gd.normalization(X), y=y, b=initial_b, m=initial_m)
54 |     bias, slope = gd.gradient_descent(gd.normalization(X), y, start_b=initial_b, start_m=initial_m)
55 |     X = gd.normalization(X)
56 |     print('y: ', y[0].item())
57 |     print('y_pred: ', (torch.mm(slope, X[0].unsqueeze(0).T)+bias).item())
58 | 
59 | 


--------------------------------------------------------------------------------
/Day-06-KNN/KNN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from scipy.stats import mode
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import accuracy_score
 6 | 
 7 | class KNN:
 8 |     def __init__(self, k, X):
 9 |         """
10 |         :param k: Number of Neighbors
11 |         """
12 |         self.k = k
13 | 
14 |     def distance(self, point_1, point_2, default='euclidean', p=2):
15 |         if default == 'euclidean':
16 |             return torch.norm(point_1 - point_2, 2, 0)
17 |         elif default == 'manhattan':
18 |             return torch.sum(torch.abs(point_1 - point_2))
19 |         elif default == "minkowski":
20 |             return torch.pow(torch.sum(torch.abs(point_1 - point_2)**p), 1/p)
21 |         else:
22 |             raise ValueError("Unknown similarity distance type")
23 | 
24 |     def fit_predict(self, X, y, item):
25 |         """
26 |         * Iterate through each datapoints (item/y_test) that needs to be classified
27 |         * Find distance between all train data points and each datapoint (item/y_test)
28 |           using euclidean distance
29 |         * Sort the distance using argsort, it gives indices of the y_test
30 |         * Find the majority label whose distance closest to each datapoint of y_test.
31 | 
32 | 
33 |         :param X: Input tensor
34 |         :param y: Ground truth label
35 |         :param item: tensors to be classified
36 |         :return: predicted labels
37 |         """
38 |         y_predict = []
39 |         for i in item:
40 |             point_distances = []
41 |             for ipt in range(X.shape[0]):
42 |                 distances = self.distance(X[ipt, :], i)
43 |                 point_distances.append(distances)
44 | 
45 |             point_distances = torch.tensor(point_distances)
46 |             k_neighbors = torch.argsort(point_distances)[:self.k]
47 |             y_label = y[k_neighbors]
48 |             major_class = mode(y_label)
49 |             major_class = major_class.mode[0]
50 |             y_predict.append(major_class)
51 | 
52 |         return torch.tensor(y_predict)
53 | 
54 | if __name__ == '__main__':
55 |     iris = load_iris()
56 |     X = torch.tensor(iris.data)
57 |     y = torch.tensor(iris.target)
58 |     torch.manual_seed(0)
59 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
60 |     knn = KNN(k=5, X=x_train)
61 |     y_pred = knn.fit_predict(x_train, y_train, x_test)
62 |     print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
63 | 


--------------------------------------------------------------------------------
/Day-19-ElasticNet/ElasticNetRegression.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.datasets import load_boston
 3 | from sklearn.model_selection import train_test_split
 4 | 
 5 | class ElasticNetRegression:
 6 |     def __init__(self, learning_rate, max_iterations, l1_penality, l2_penality):
 7 |         self.lr = learning_rate
 8 |         self.max_iterations = max_iterations
 9 |         self.l1_penality = l1_penality
10 |         self.l2_penality = l2_penality
11 | 
12 |     def normalization(self, X):
13 |         """
14 |         :param X: Input tensor
15 |         :return: Normalized input using l2 norm.
16 |         """
17 |         l2 = torch.norm(X, p=2, dim=-1)
18 |         l2[l2 == 0] = 1
19 |         return X / l2.unsqueeze(1)
20 | 
21 |     def fit(self, X, y):
22 |         self.m, self.n = X.shape
23 |         self.w = torch.zeros(self.n, dtype=torch.double).unsqueeze(1)
24 |         self.b = 0.0
25 |         self.X = X
26 |         self.y = y
27 |         for i in range(self.max_iterations):
28 |             self.update_weights()
29 | 
30 |         return self
31 | 
32 |     def update_weights(self):
33 |         y_pred = self.predict(self.X)
34 |         dw = torch.zeros(self.n).unsqueeze(1)
35 |         for j in range(self.n):
36 |             if self.w[j] > 0:
37 |                 dw[j] = ( - (2* torch.mm(self.X[:, j].unsqueeze(0), (self.y - y_pred)) + self.l1_penality + 2 * self.l2_penality * self.w[j])) / self.m
38 |             else:
39 |                 dw[j] = (- (2 * torch.mm(self.X[:, j].unsqueeze(0), (self.y - y_pred)) - self.l1_penality + 2 * self.l2_penality * self.w[j])) / self.m
40 | 
41 |         db = -2 * torch.sum(self.y - y_pred) / self.m
42 |         self.w = self.w - self.lr * dw
43 |         self.b = self.b - self.lr * db
44 |         return self
45 | 
46 |     def predict(self, X):
47 |         return torch.mm(X, self.w) + self.b
48 | 
49 | if __name__ == '__main__':
50 |     data = load_boston()
51 |     regression = ElasticNetRegression(max_iterations=1000, learning_rate=0.001, l1_penality=500, l2_penality=1)
52 |     X, y = regression.normalization(torch.tensor(data.data, dtype=torch.double)), torch.tensor(data.target).unsqueeze(1)
53 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
54 |     regression.fit(x_train, y_train)
55 |     Y_pred = regression.predict(x_test)
56 |     print("Predicted values: ", torch.round(Y_pred[:3]))
57 |     print("Real values: ", y_test[:3])
58 |     print("Trained W: ", torch.round(regression.w[0]))
59 |     print("Trained b: ", torch.round(regression.b))
60 | 
61 | 


--------------------------------------------------------------------------------
/Day-12-LDA/lda.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from NaiveBayes import NaiveBayes
 3 | from sklearn.datasets import load_breast_cancer
 4 | from sklearn.preprocessing import MinMaxScaler
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.metrics import accuracy_score
 7 | 
 8 | class LDA:
 9 |     def __init__(self):
10 |         self.w = None
11 | 
12 |     def covariance_matrix(self, X):
13 |         """
14 |         :param X: Input tensor
15 |         :return: cavariance of input tensor
16 |         """
17 |         centering_X = X - torch.mean(X, dim=0)
18 |         cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1)
19 |         return cov
20 | 
21 |     def fit(self, X, y):
22 |         """
23 |         :param X: Input tensor
24 |         :param y: output tensor
25 |         :return: transformation vector - to convert high dimensional input space into lower dimensional
26 |         subspace.
27 |         X1, X2 are samples based on class. cov_1 and cov_2 measures how features of samples of each class are related.
28 | 
29 |         """
30 |         X1 = X[y==0]
31 |         X2 = X[y==1]
32 |         cov_1 = self.covariance_matrix(X1)
33 |         cov_2 = self.covariance_matrix(X2)
34 |         cov_total = cov_1 + cov_2
35 |         mean1 = torch.mean(X1, dim=0)
36 |         mean2 = torch.mean(X2, dim=0)
37 |         mean_diff = mean1 - mean2
38 | 
39 |         # Determine the vector which when X is projected onto it best separates the
40 |         # data by class. w = (mean1 - mean2) / (cov1 + cov2)
41 |         self.w = torch.mm(torch.pinverse(cov_total), mean_diff.unsqueeze(1))
42 | 
43 |     def transform(self, X, y):
44 |         self.fit(X, y)
45 |         X_transformed = torch.mm(X, self.w)
46 |         return X_transformed
47 | 
48 |     def predict(self, X):
49 |         y_pred = []
50 |         for sample in X:
51 |             h = torch.mm(sample.unsqueeze(0), self.w)
52 |             y = 1 * (h < 0)
53 |             y_pred.append(y)
54 | 
55 |         return y_pred
56 | 
57 | if __name__ == '__main__':
58 |     breast_cancer = load_breast_cancer()
59 |     X = breast_cancer.data
60 |     X_normalized = MinMaxScaler().fit_transform(X)
61 |     X = torch.tensor(X_normalized)
62 |     y = torch.tensor(breast_cancer.target)#.unsqueeze(1)
63 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
64 |     lda = LDA()
65 |     X_transformed = lda.transform(x_train, y_train)
66 |     GNB = NaiveBayes(X_transformed, y_train)
67 |     GNB.find_mu_and_sigma(X_transformed, y_train)
68 |     X_test_transformed = lda.transform(x_test, y_test)
69 |     y_pred = GNB.predict_probability(X_test_transformed)
70 |     print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}')
71 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Machine Learning Using Pytorch
 2 | 
 3 | <!-- GitAds-Verify: UQ7D985JITW8GXGB5SJO91G1XYMZEHQK -->
 4 | 
 5 | The objective of the repository is to learn and build machine learning models using Pytorch.
 6 | 
 7 | ![GitHub stars](https://img.shields.io/github/stars/Mayurji/MLWithPytorch?style=social)
 8 | ![GitHub forks](https://img.shields.io/github/forks/Mayurji/MLWithPytorch?style=social)
 9 | ![GitHub watchers](https://img.shields.io/github/watchers/Mayurji/MLWithPytorch?style=social)
10 | ![GitHub followers](https://img.shields.io/github/followers/Mayurji?style=social)
11 | 
12 | ![GitHub repo size](https://img.shields.io/github/repo-size/Mayurji/MLWithPytorch?style=plastic)
13 | ![GitHub language count](https://img.shields.io/github/languages/count/Mayurji/MLWithPytorch?style=plastic)
14 | ![GitHub top language](https://img.shields.io/github/languages/top/Mayurji/MLWithPytorch?style=plastic)
15 | ![GitHub last commit](https://img.shields.io/github/last-commit/Mayurji/MLWithPytorch?color=red&style=plastic)
16 | 
17 | <span class="badge-buymeacoffee">
18 | <a href="https://ko-fi.com/mayurjain" title="Buy Me A Coffee"><img src="https://img.shields.io/badge/buy%20me%20a%20coffee-donate-yellow.svg" alt="Buy Me A Coffee donate button" /></a>
19 | </span>
20 | <span class="badge-patreon">
21 | <a href="https://patreon.com/startingBrain" title="Donate to this project using Patreon"><img src="https://img.shields.io/badge/patreon-donate-yellow.svg" alt="Patreon donate button" /></a>
22 | </span>
23 | 
24 | ![MLWithPyTorch](/mlwithpytorch.png)
25 | 
26 | **List of Algorithms Covered**
27 | 
28 | 📌 Day 1 - Linear Regression \
29 | 📌 Day 2 - Logistic Regression \
30 | 📌 Day 3 - Decision Tree \
31 | 📌 Day 4 - KMeans Clustering \
32 | 📌 Day 5 - Naive Bayes \
33 | 📌 Day 6 - K Nearest Neighbour (KNN) \
34 | 📌 Day 7 - Support Vector Machine \
35 | 📌 Day 8 - Tf-Idf Model \
36 | 📌 Day 9 - Principal Components Analysis \
37 | 📌 Day 10 - Lasso and Ridge Regression \
38 | 📌 Day 11 - Gaussian Mixture Model \
39 | 📌 Day 12 - Linear Discriminant Analysis \
40 | 📌 Day 13 - Adaboost Algorithm \
41 | 📌 Day 14 - DBScan Clustering \
42 | 📌 Day 15 - Multi-Class LDA \
43 | 📌 Day 16 - Bayesian Regression \
44 | 📌 Day 17 - K-Medoids \
45 | 📌 Day 18 - TSNE \
46 | 📌 Day 19 - ElasticNet Regression \
47 | 📌 Day 20 - Spectral Clustering \
48 | 📌 Day 21 - Latent Dirichlet \
49 | 📌 Day 22 - Affinity Propagation \
50 | 📌 Day 23 - Gradient Descent Algorithm \
51 | 📌 Day 24 - Regularization Techniques \
52 | 📌 Day 25 - RANSAC Algorithm \
53 | 📌 Day 26 - Normalizations \
54 | 📌 Day 27 - Multi-Layer Perceptron \
55 | 📌 Day 28 - Activations \
56 | 📌 Day 29 - Optimizers \
57 | 📌 Day 30 - Loss Functions
58 | 
59 | ### Let me know if there is any correction. Feedback is welcomed.
60 | 
61 | ## References
62 | 
63 | * Sklearn Library
64 | * ML-Glossary
65 | * ML From Scratch (Github)
66 | 


--------------------------------------------------------------------------------
/Day-21-LatentDirichlet/LDA_TopicModeling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.datasets import fetch_20newsgroups
 5 | from sklearn.feature_extraction.text import CountVectorizer
 6 | 
 7 | class LatentDirichlet:
 8 |     def __init__(self, D, V, T):
 9 |         self.D = len(D)
10 |         self.V = len(V)
11 |         self.T = T
12 |         self.alpha = 1 / T
13 |         self.beta = 1 / T
14 | 
15 |     def fit_transform(self, documents):
16 |         z_d_n = [[0 for _ in range(len(d))] for d in documents]
17 |         theta_d_z = torch.zeros((self.D, self.T))
18 |         phi_z_w = torch.zeros((self.T, self.V))
19 |         n_z = torch.zeros((self.T))
20 |         n_d = torch.zeros((self.D))
21 | 
22 |         for d, doc in enumerate(documents):
23 |             for n, w in enumerate(doc):
24 |                 z_d_n[d][n] = n % self.T
25 |                 z = z_d_n[d][n]
26 |                 theta_d_z[d][z] += 1
27 |                 phi_z_w[z, w] += 1
28 |                 n_z[z] += 1
29 |                 n_d[d] += 1
30 | 
31 |         for iter in range(10):
32 |             for d, doc in enumerate(documents):
33 |                 for n,w in enumerate(doc):
34 |                     z = z_d_n[d][n]
35 |                     theta_d_z[d][z] -= 1
36 |                     phi_z_w[z, w] -= 1
37 |                     n_z[z] -= 1
38 |                     p_d_t = (theta_d_z[d] + self.alpha) / (n_d[d] - 1 + self.T * self.alpha)
39 |                     p_t_w = (phi_z_w[:, w] + self.beta) / (n_z + self.V * self.beta)
40 |                     p_z = p_d_t * p_t_w
41 |                     p_z /= torch.sum(p_z)
42 |                     new_z = torch.multinomial(p_z, 1)
43 |                     z_d_n[d][n] = new_z[0]
44 |                     theta_d_z[d][new_z] += 1
45 |                     phi_z_w[new_z, w] += 1
46 |                     n_z[new_z] += 1
47 | 
48 |         return theta_d_z, phi_z_w
49 | 
50 | if __name__ == '__main__':
51 |     n_samples = 10000
52 |     documents = []
53 |     data, _ = fetch_20newsgroups(shuffle=True, random_state=2,
54 |                                  remove=('headers', 'footers', 'quotes'), return_X_y=True)
55 |     data_samples = data[:n_samples]
56 |     cnt_vectorizer = CountVectorizer(max_df=0.95, min_df=2,
57 |                                  max_features=10000,
58 |                                  stop_words='english')
59 |     vectorizer = cnt_vectorizer.fit_transform(data_samples)
60 |     vocabulary = cnt_vectorizer.vocabulary_
61 |     for row in vectorizer.toarray():
62 |         present_words = np.where(row != 0)[0].tolist()
63 |         present_words_with_count = []
64 |         for w_i in present_words:
65 |             for count in range(row[w_i]):
66 |                 present_words_with_count.append(w_i)
67 |         documents.append(present_words_with_count)
68 | 
69 |     LD = LatentDirichlet(D=documents, V=vocabulary, T=20)
70 |     topic_distribution, word_distribution = LD.fit_transform(documents)
71 |     i = 1
72 |     plt.plot(topic_distribution[i] / sum(topic_distribution[i]));
73 |     plt.title("Topic distribution $theta_i$ for document {}".format(i));
74 |     plt.show()
75 | 


--------------------------------------------------------------------------------
/Day-28-Activations/activation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.datasets import load_digits
 3 | from sklearn.model_selection import train_test_split
 4 | from MLP import MultiLayerPerceptron, CrossEntropy, normalization, accuracy_score, to_categorical
 5 | 
 6 | class Sigmoid:
 7 |     def __call__(self, X):
 8 |         return 1 / (1 + torch.exp(-X))
 9 | 
10 |     def gradient(self, X):
11 |         return self.__call__(X) * (1 - self.__call__(X))
12 | 
13 | class Softmax:
14 |     def __call__(self, X):
15 |         e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True).values)
16 |         return e_x / torch.sum(e_x, dim=1, keepdim=True)
17 | 
18 |     def gradient(self, X):
19 |         p = self.__call__(X)
20 |         return p * (1 - p)
21 | 
22 | class TanH:
23 |     def __call__(self, X):
24 |         return 2 / (1 + torch.exp(-2 * X)) - 1
25 | 
26 |     def gradient(self,X):
27 |         return 1 - torch.pow(self.__call__(X), 2)
28 | 
29 | class Relu:
30 |     def __call__(self, X):
31 |         return torch.where(X>0.0, X, 0.0)
32 | 
33 |     def gradient(self, X):
34 |         return torch.where(X >=0.0, 1.0, 0.0)
35 | 
36 | class LeakyRelu:
37 |     def __init__(self, alpha):
38 |         self.alpha = alpha
39 | 
40 |     def __call__(self, X):
41 |         return torch.where(X > 0.0, X, self.alpha * X)
42 | 
43 |     def gradient(self, X):
44 |         return torch.where(X > 0.0, 1.0, self.alpha)
45 | 
46 | class ELU:
47 |     def __init__(self, alpha):
48 |         self.alpha = alpha
49 | 
50 |     def __call__(self, X):
51 |         return torch.where(X>=0.0, X, self.alpha * (torch.exp(X) - 1))
52 | 
53 |     def gradient(self, X):
54 |         return torch.where(X >= 0.0, 1.0, self.__call__(X) + self.alpha)
55 | 
56 | class SELU():
57 |     def __init__(self):
58 |         self.alpha = 1.6732632423543772848170429916717
59 |         self.scale = 1.0507009873554804934193349852946
60 | 
61 |     def __call__(self, x):
62 |         return self.scale * torch.where(x >= 0.0, x, self.alpha*(torch.exp(x)-1))
63 | 
64 |     def gradient(self, x):
65 |         return self.scale * torch.where(x >= 0.0, 1.0, self.alpha * torch.exp(x))
66 | 
67 | class SoftPlus():
68 |     def __call__(self, x):
69 |         return torch.log(1 + torch.exp(x))
70 | 
71 |     def gradient(self, x):
72 |         return 1 / (1 + torch.exp(-x))
73 | 
74 | if __name__ == '__main__':
75 |     data = load_digits()
76 |     X = normalization(torch.tensor(data.data, dtype=torch.double))
77 |     y = torch.tensor(data.target)
78 | 
79 |     # Convert the nominal y values to binary
80 |     y = to_categorical(y)
81 | 
82 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
83 |     # MLP
84 |     clf = MultiLayerPerceptron(n_hidden=16,
85 |                                n_iterations=1000,
86 |                                learning_rate=0.01, activation_function_hidden_layer=Sigmoid(),
87 |                                activation_function_output_layer=Softmax())
88 | 
89 |     clf.fit(X_train, y_train)
90 |     y_pred = torch.argmax(clf.predict(X_test), dim=1)
91 |     y_test = torch.argmax(y_test, dim=1)
92 | 
93 |     accuracy = accuracy_score(y_test, y_pred)
94 |     print("Accuracy:", accuracy)
95 | 


--------------------------------------------------------------------------------
/Day-15-MultiClassLDA/multi-class-LDA.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Checkout below url on Multi-Class LDA
 3 | https://multivariatestatsjl.readthedocs.io/en/latest/mclda.html
 4 | """
 5 | import torch
 6 | from sklearn.datasets import load_iris
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | class MultiClassLDA:
10 |     def __init__(self, solver='svd'):
11 |         self.solver = solver
12 | 
13 |     def covariance_matrix(self, X):
14 |         """
15 |         :param X: Input tensor
16 |         :return: cavariance of input tensor
17 |         """
18 |         centering_X = X - torch.mean(X, dim=0)
19 |         cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1)
20 |         return cov
21 | 
22 |     def scatter_matrix(self, X, y):
23 |         """
24 |         :param X: Input tensor
25 |         :param y: Output tensor
26 |         :return: How features are related to each other in within-class distribution and between class distribution
27 |         """
28 |         n_features = X.shape[1]
29 |         labels = y.unique()
30 | 
31 |         # Within-Class Scatter Matrix
32 |         sw = torch.zeros((n_features, n_features))
33 |         for label in labels:
34 |             X_class = X[y==label]
35 |             sw += (X_class.shape[0] - 1) * self.covariance_matrix(X_class)
36 | 
37 |         # Between-Class Scatter Matrix
38 |         n_samples_mean = torch.sum(X, dim=0)
39 |         sb = torch.zeros((n_features, n_features))
40 |         for label in labels:
41 |             X_class = X[y==label]
42 |             mean_class = torch.mean(X_class, dim=0).unsqueeze(0)
43 | 
44 |             sb += (X_class.shape[0]) * torch.mm((mean_class - n_samples_mean), (mean_class - n_samples_mean).T)
45 | 
46 |         return sw, sb
47 | 
48 |     def transform(self, X, y, n_components):
49 |         """
50 |         And Why Inverse, In matrices, there is no concepts of division, thus multiplying with inverse
51 |         matrix helps in acheiving what division does.
52 |         :param X:
53 |         :param y:
54 |         :param n_components: Transforming from high dimension data to lower dimension n_components.
55 |         :return: Transformed set of low dimensional X matrix
56 |         """
57 |         sw, sb = self.scatter_matrix(X, y)
58 |         A = torch.mm(torch.pinverse(sw), sb)
59 |         eigen_values, eigen_vectors = torch.eig(A, eigenvectors=True)
60 |         eigen_sorted_index = torch.argsort(eigen_values[:, 0], descending=True)
61 |         eigen_vectors_sorted = eigen_vectors[:, eigen_sorted_index]
62 |         component_vector = eigen_vectors_sorted[:, 0:n_components]
63 |         component_vector = component_vector.type(torch.DoubleTensor)
64 |         transformed = torch.mm(X, component_vector)
65 |         return transformed
66 | 
67 |     def plot_in_2d(self, X, y, title=None):
68 |         """ Plot the dataset X and the corresponding labels y in 2D using the LDA
69 |         transformation."""
70 |         X_transformed = self.transform(X, y, n_components=2)
71 |         x1 = X_transformed[:, 0]
72 |         x2 = X_transformed[:, 1]
73 |         plt.scatter(x1, x2, c=y)
74 |         if title: plt.title(title)
75 |         plt.show()
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     data = load_iris()
80 |     X = torch.tensor(data.data, dtype=torch.double)
81 |     y = torch.tensor(data.target)
82 |     mclda = MultiClassLDA()
83 |     mclda.plot_in_2d(X, y)
84 | 


--------------------------------------------------------------------------------
/Day-02-Logistic-Regression/LogisticRegression.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.datasets import make_blobs
 3 | 
 4 | class LogisticRegression:
 5 |     def __init__(self, X):
 6 |         """
 7 |         :param X: Input tensor
 8 |         :keyword lr: learning rate
 9 |         :keyword epochs: number of times the model iterates over complete dataset
10 |         :keyword weights: parameters learned during training
11 |         :keyword bias: parameter learned during training
12 |         """
13 |         self.lr = 0.1
14 |         self.epochs = 1000
15 |         self.m, self.n = X.shape
16 |         self.weights = torch.zeros((self.n, 1), dtype=torch.double)
17 |         self.bias = 0
18 | 
19 |     def sigmoid(self, z):
20 |         """
21 |         :param z: latent variable represents (wx + b)
22 |         :return: squashes the real value between 0 and 1 representing probability score.
23 |         """
24 |         return 1 / (1 + torch.exp(-z))
25 | 
26 |     def loss(self, yhat):
27 |         """
28 |         :param yhat: Estimated y
29 |         :return: Log loss - When y=1, it cancels out half function, remaining half is considered for loss calculation and vice-versa
30 |         """
31 |         return -(1 / self.m) * torch.sum(y * torch.log(yhat) + (1 - y) * torch.log(1 - yhat))
32 | 
33 |     def gradient(self, y_predict):
34 |         """
35 |         :param y_predict: Estimated y
36 |         :return: gradient is calculated to find how much change is required in parameters to reduce the loss.
37 |         """
38 |         dw = 1 / self.m * torch.mm(X.T, (y_predict - y))
39 |         db = 1 / self.m * torch.sum(y_predict - y)
40 |         return dw, db
41 | 
42 |     def run(self, X, y):
43 |         """
44 |         :param X: Input tensor
45 |         :param y: Output tensor
46 |         :var y_predict: Predicted tensor
47 |         :var cost: Difference between ground truth and predicted
48 |         :var dw, db: Weight and bias update for weight tensor and bias scalar
49 |         :return: updated weights and bias
50 |         """
51 |         for epoch in range(1, self.epochs + 1):
52 | 
53 |             y_predict = self.sigmoid(torch.mm(X, self.weights) + self.bias)
54 |             cost = self.loss(y_predict)
55 |             dw, db = self.gradient(y_predict)
56 | 
57 |             self.weights -= self.lr * dw
58 |             self.bias -= self.lr * db
59 | 
60 |             if epoch % 100 == 0:
61 |                 print(f"Cost after iteration {epoch}: {cost}")
62 | 
63 |         return self.weights, self.bias
64 | 
65 |     def predict(self, X):
66 |         """
67 |         :param X: Input tensor
68 |         :var y_predict_labels: Converts float value to int/bool true(1) or false(0)
69 |         :return: outputs labels as 0 and 1
70 |         """
71 |         y_predict = self.sigmoid(torch.mm(X, self.weights) + self.bias)
72 |         y_predict_labels = y_predict > 0.5
73 | 
74 |         return y_predict_labels
75 | 
76 | if __name__ == '__main__':
77 |     """
78 |     :var manual_seed: for reproducing the results
79 |     :desc unsqueeze: adds a dimension to the tensor at specified position.
80 |     """
81 |     torch.manual_seed(0)
82 |     X, y = make_blobs(n_samples=1000, centers=2)
83 |     X = torch.tensor(X)
84 |     y = torch.tensor(y).unsqueeze(1)
85 |     lr = LogisticRegression(X)
86 |     w, b = lr.run(X, y)
87 |     y_predict = lr.predict(X)
88 | 
89 |     print(f"Accuracy: {torch.sum(y == y_predict) // X.shape[0]}")
90 | 


--------------------------------------------------------------------------------
/Day-22-AffinityPropagation/AffinityPropagation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Reading: https://en.wikipedia.org/wiki/Affinity_propagation
 3 | Applicable: Human Face Clustering
 4 | """
 5 | import torch
 6 | from scipy.spatial.distance import pdist,squareform
 7 | from sklearn.datasets import load_breast_cancer
 8 | 
 9 | class AffinityPropagation:
10 |     def __init__(self, similariy_matrix, max_iteration=200, num_iter=5, alpha=0.5, print_every=100):
11 |         """
12 |         :param similariy_matrix:
13 |         :param max_iteration:
14 |         :param num_iter:
15 |         :param alpha:
16 |         :param print_every:
17 |         """
18 |         self.s = similariy_matrix
19 |         self.max_iteration = max_iteration
20 |         self.alpha = alpha
21 |         self.print_every = print_every
22 |         N, N = self.s.shape
23 |         self.r = torch.zeros((N, N))
24 |         self.a = torch.zeros((N, N))
25 | 
26 |     def step(self):
27 |         """
28 |         :param r is responsiblity matrix, For each data point x_i, how well-suited is x_k as it exempler along with
29 |         other exemplars.
30 |         :param a is availability matrix, For appropriate is x_k as exemplers for x_i, while keeping other data points
31 |         who keeps x_k as exemplar.
32 |         :return:
33 |         """
34 |         N, N = self.s.shape
35 |         old_r = self.r
36 |         old_a = self.a
37 |         a_plus_s = self.a + self.s
38 | 
39 |         first_max = torch.max(a_plus_s, dim=1)
40 |         first_max_indices = torch.argmax(a_plus_s, dim=1)
41 |         first_max = torch.reshape(torch.repeat_interleave(first_max.values, N), (N, N))
42 |         a_plus_s[range(N), first_max_indices] = float('-inf')
43 |         second_max = torch.max(a_plus_s, dim=1).values
44 |         # responsibility Update
45 |         r = self.s - first_max
46 |         r[range(N), first_max_indices] = self.s[range(N), first_max_indices] - second_max[range(N)]
47 |         r = self.alpha * old_r + (1 - self.alpha) * r
48 |         rp = torch.maximum(r, torch.scalar_tensor(0))
49 |         m = rp.size(0)
50 |         rp.as_strided([m], [m + 1]).copy_(torch.diag(r))
51 |         a = torch.reshape(torch.repeat_interleave(torch.sum(rp, dim=0), N),(N, N)).T - rp
52 |         da = torch.diag(a)
53 |         a = torch.minimum(a, torch.scalar_tensor(0))
54 |         k = a.size(0)
55 |         a.as_strided([k], [k+1]).copy_(da)
56 |         # Availibility Update
57 |         a = self.alpha * old_a + (1 - self.alpha) * a
58 | 
59 |         return r, a
60 | 
61 |     def solve(self):
62 |         for i in range(self.max_iteration):
63 |             self.r, self.a = self.step()
64 | 
65 |         e = self.r + self.a
66 | 
67 |         N, N = e.shape
68 |         I = torch.where(torch.diag(e) > 0)[0]
69 |         K = len(I)
70 | 
71 |         c = self.s[:, I]
72 |         c = torch.argmax(c, dim=1)
73 |         c[I] = torch.arange(0, K)
74 |         idx = I[c]
75 |         exemplar_indices = I
76 |         exemplar_assignment = idx
77 |         return exemplar_indices, exemplar_assignment
78 | 
79 | if __name__ == "__main__":
80 |     """
81 |     :param similarity_matrix
82 |     It finds the similarity between data points.
83 |     
84 |     """
85 |     data = load_breast_cancer()
86 |     x = torch.tensor(data.data, dtype=torch.double)
87 | 
88 |     similarity_matrix = squareform(pdist(x, metric='euclidean'))
89 |     similarity_matrix = torch.from_numpy(similarity_matrix)
90 |     max_iteration = 3000
91 |     affinity_prop = AffinityPropagation(similarity_matrix, max_iteration=max_iteration,
92 |                                  alpha=0.5)
93 |     indices, assignment = affinity_prop.solve()
94 | 


--------------------------------------------------------------------------------
/Day-12-LDA/NaiveBayes.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.metrics import accuracy_score
 5 | from sklearn.model_selection import train_test_split
 6 | 
 7 | class NaiveBayes:
 8 |     def __init__(self, X, y):
 9 |         """
10 |         :param X: input tensor
11 |         :param y: target tensor
12 |         :var total_samples: Number of Samples
13 |         :var feature_count: Number of Features
14 |         :var mu: mean
15 |         :var sigma: variance
16 |         :var e: epsilon
17 |         :var n_classes: number of classes
18 | 
19 |         why e - epsilon ?
20 |         # If the ratio of data variance between dimensions is too small, it
21 |         # will cause numerical errors. To address this, we artificially
22 |         # boost the variance by epsilon, a small fraction of the standard
23 |         # deviation of the largest dimension.
24 |         """
25 |         self.total_samples, self.feature_count = X.shape[0], X.shape[1]
26 |         self.mu = {}
27 |         self.sigma = {}
28 |         self.prior_probability_X = {}
29 |         self.e = 1e-4
30 |         self.n_classes = len(y.unique())
31 | 
32 |     def find_mu_and_sigma(self, X, y):
33 |         """
34 |         Bayes Theorem:
35 |         P(Y|X) = P(X|Y) * P(Y) / P(X)
36 | 
37 |         :type mu: dict
38 |         :type sigma: dict
39 |         :type prior_probability: dict
40 |         :describe mu: keys are class label and values are feature's mean values.
41 |         :describe sigma: keys are class label and values are feature's variance values.
42 |         :describe prior probability of x: It calculates the prior prabability of X for each class. P(X).
43 |         :return:
44 |         """
45 |         for cls in range(self.n_classes):
46 |             X_class = X[y==cls]
47 |             self.mu[cls] = torch.mean(X_class, dim=0)
48 |             self.sigma[cls] = torch.var(X_class, dim=0)
49 |             self.prior_probability_X[cls] = X_class.shape[0] / X.shape[0]
50 | 
51 |     def gaussian_naive_bayes(self, X, mu, sigma):
52 |         """
53 |         :return: Multivariate normal(gaussian) distribution - Maximum Likelihood Estimation
54 |         https://www.statlect.com/fundamentals-of-statistics/multivariate-normal-distribution-maximum-likelihood
55 | 
56 |         Log Likelihood Function = Constant - probability
57 |         """
58 |         constant = -self.feature_count / 2 * torch.log(2 * torch.tensor(np.pi)) - 0.5 * torch.sum(torch.log(sigma+self.e))
59 |         probability = 0.5 * torch.sum(torch.pow(X-mu, 2) / (sigma + self.e), dim=1)
60 |         return constant - probability
61 | 
62 |     def predict_probability(self, X):
63 |         """
64 |         Calculating probabilities for each sample input in X using prior probability
65 |         and gaussian density function.
66 |         torch.argmax: To find the class with max-probability.
67 |         Note: We are calculate log probabilities as in Sklearn's predict_log_proba, that why we have + sign between
68 |         prior probabilites and likelihood (class probability).
69 | 
70 |         :return:
71 |         """
72 |         probabilities = torch.zeros((X.shape[0], self.n_classes))
73 |         for cls in range(self.n_classes):
74 |             class_probability = self.gaussian_naive_bayes(X, self.mu[cls], self.sigma[cls])
75 |             probabilities[:, cls] = class_probability + torch.log(torch.scalar_tensor(self.prior_probability_X[cls]))
76 | 
77 | 
78 |         return torch.argmax(probabilities, dim=1)
79 | 
80 | if __name__ == '__main__':
81 |     iris = load_iris()
82 |     X = torch.tensor(iris.data)
83 |     y = torch.tensor(iris.target)
84 |     torch.manual_seed(0)
85 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
86 |     GNB = NaiveBayes(x_train, y_train)
87 |     GNB.find_mu_and_sigma(x_train, y_train)
88 |     y_pred = GNB.predict_probability(x_test)
89 |     print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}')
90 | 


--------------------------------------------------------------------------------
/Day-28-Activations/MLP.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.datasets import load_digits
 3 | from sklearn.model_selection import train_test_split
 4 | def accuracy_score(y, p):
 5 |     accuracy = torch.sum(y == p, dim=0) / len(y)
 6 |     return accuracy
 7 | 
 8 | def to_categorical(X, n_col=None):
 9 |     if not n_col:
10 |         n_col = torch.amax(X) + 1
11 | 
12 |     one_hot = torch.zeros((X.shape[0], n_col))
13 |     one_hot[torch.arange(X.shape[0]), X] = 1
14 |     return one_hot
15 | 
16 | def normalization(X):
17 |     """
18 |     :param X: Input tensor
19 |     :return: Normalized input using l2 norm.
20 |     """
21 |     l2 = torch.norm(X, p=2, dim=-1)
22 |     l2[l2 == 0] = 1
23 |     return X / l2.unsqueeze(1)
24 | 
25 | class CrossEntropy:
26 |     def __init__(self):
27 |         pass
28 |     def loss(self, y, p):
29 |         p = torch.clip(p, 1e-15, 1-1e-15)
30 |         return - y * torch.log(p) - (1 -y) * torch.log(1 - p)
31 | 
32 |     def accuracy_score(self, y, p):
33 |         return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1))
34 | 
35 |     def gradient(self, y, p):
36 |         p = torch.clip(p, 1e-15, 1 - 1e-15)
37 |         return - (y / p) + (1 - y) / (1 -p)
38 | class MultiLayerPerceptron:
39 |     def __init__(self, n_hidden, activation_function_hidden_layer, activation_function_output_layer, n_iterations=1000, learning_rate=0.001):
40 |         self.n_hidden = n_hidden
41 |         self.n_iterations = n_iterations
42 |         self.learning_rate = learning_rate
43 |         self.hidden_activation = activation_function_hidden_layer
44 |         self.output_activation = activation_function_output_layer
45 |         self.loss = CrossEntropy()
46 | 
47 |     def initalize_weight(self, X, y):
48 |         n_samples, n_features = X.shape
49 |         _, n_outputs = y.shape
50 |         limit = 1 / torch.sqrt(torch.scalar_tensor(n_features))
51 |         self.W = torch.DoubleTensor(n_features, self.n_hidden).uniform_(-limit, limit)
52 | 
53 |         self.W0 = torch.zeros((1, self.n_hidden))
54 |         limit = 1 / torch.sqrt(torch.scalar_tensor(self.n_hidden))
55 |         self.V = torch.DoubleTensor(self.n_hidden, n_outputs).uniform_(-limit, limit)
56 |         self.V0 = torch.zeros((1, n_outputs))
57 | 
58 |     def fit(self, X, y):
59 |         self.initalize_weight(X, y)
60 |         for i in range(self.n_iterations):
61 |             hidden_input =  torch.mm(X, self.W) + self.W0
62 |             hidden_output = self.hidden_activation(hidden_input)
63 | 
64 |             output_layer_input = torch.mm(hidden_output, self.V) + self.V0
65 |             y_pred  = self.output_activation(output_layer_input)
66 | 
67 |             grad_wrt_first_output = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input)
68 |             grad_v = torch.mm(hidden_output.T, grad_wrt_first_output)
69 |             grad_v0 = torch.sum(grad_wrt_first_output, dim=0, keepdim=True)
70 | 
71 |             grad_wrt_first_hidden = torch.mm(grad_wrt_first_output, self.V.T) * self.hidden_activation.gradient(hidden_input)
72 |             grad_w = torch.mm(X.T, grad_wrt_first_hidden)
73 |             grad_w0 = torch.sum(grad_wrt_first_hidden, dim=0, keepdim=True)
74 | 
75 |             # Update weights (by gradient descent)
76 |             # Move against the gradient to minimize loss
77 |             self.V -= self.learning_rate * grad_v
78 |             self.V0 -= self.learning_rate * grad_v0
79 |             self.W -= self.learning_rate * grad_w
80 |             self.W0 -= self.learning_rate * grad_w0
81 | 
82 |             # Use the trained model to predict labels of X
83 | 
84 |     def predict(self, X):
85 |         # Forward pass:
86 |         hidden_input = torch.mm(X,self.W) + self.W0
87 |         hidden_output = self.hidden_activation(hidden_input)
88 |         output_layer_input = torch.mm(hidden_output, self.V) + self.V0
89 |         y_pred = self.output_activation(output_layer_input)
90 |         return y_pred
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/Day-05-Naive-Bayes/NaiveBayes.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.metrics import accuracy_score
 5 | from sklearn.model_selection import train_test_split
 6 | 
 7 | class NaiveBayes:
 8 |     def __init__(self, X, y):
 9 |         """
10 |         why e - epsilon ?
11 |         # If the ratio of data variance between dimensions is too small, it
12 |         # will cause numerical errors. To address this, we artificially
13 |         # boost the variance by epsilon, a small fraction of the standard
14 |         # deviation of the largest dimension.
15 | 
16 |         :param X: input tensor
17 |         :param y: target tensor
18 |         :var total_samples: Number of Samples
19 |         :var feature_count: Number of Features
20 |         :var mu: mean
21 |         :var sigma: variance
22 |         :var e: epsilon
23 |         :var n_classes: number of classes
24 |         """
25 |         self.total_samples, self.feature_count = X.shape[0], X.shape[1]
26 |         self.mu = {}
27 |         self.sigma = {}
28 |         self.prior_probability_X = {}
29 |         self.e = 1e-4
30 |         self.n_classes = len(y.unique())
31 | 
32 |     def find_mu_and_sigma(self, X, y):
33 |         """
34 |         Bayes Theorem:
35 |         P(Y|X) = P(X|Y) * P(Y) / P(X)
36 | 
37 |         :type mu: dict
38 |         :type sigma: dict
39 |         :type prior_probability: dict
40 |         :describe mu: keys are class label and values are feature's mean values.
41 |         :describe sigma: keys are class label and values are feature's variance values.
42 |         :describe prior probability of x: It calculates the prior prabability of X for each class. P(X).
43 |         :return:
44 |         """
45 |         for cls in range(self.n_classes):
46 |             X_class = X[y==cls]
47 |             self.mu[cls] = torch.mean(X_class, dim=0)
48 |             self.sigma[cls] = torch.var(X_class, dim=0)
49 |             self.prior_probability_X[cls] = X_class.shape[0] / X.shape[0]
50 | 
51 |     def gaussian_naive_bayes(self, X, mu, sigma):
52 |         """
53 |         :return: Multivariate normal(gaussian) distribution - Maximum Likelihood Estimation
54 |         https://www.statlect.com/fundamentals-of-statistics/multivariate-normal-distribution-maximum-likelihood
55 | 
56 |         Log Likelihood Function = Constant - probability
57 |         """
58 |         constant = - self.feature_count / 2 * torch.log(2 * torch.tensor(np.pi)) - 0.5 * torch.sum(torch.log(sigma+self.e))
59 |         probability = 0.5 * torch.sum(torch.pow(X-mu, 2) / (sigma + self.e), dim=1)
60 |         return constant - probability
61 | 
62 |     def predict_probability(self, X):
63 |         """
64 |         Calculating probabilities for each sample input in X using prior probability
65 |         and gaussian density function.
66 | 
67 |         torch.argmax: To find the class with max-probability.
68 | 
69 |         Note: We are calculate log probabilities as in Sklearn's predict_log_proba, that why we have + sign between
70 |         prior probabilites and likelihood (class probability).
71 | 
72 |         :return:
73 |         """
74 |         probabilities = torch.zeros((X.shape[0], self.n_classes))
75 |         for cls in range(self.n_classes):
76 |             class_probability = self.gaussian_naive_bayes(X, self.mu[cls], self.sigma[cls])
77 |             probabilities[:, cls] = class_probability + torch.log(torch.scalar_tensor(self.prior_probability_X[cls]))
78 | 
79 | 
80 |         return torch.argmax(probabilities, dim=1)
81 | 
82 | if __name__ == '__main__':
83 |     iris = load_iris()
84 |     X = torch.tensor(iris.data)
85 |     y = torch.tensor(iris.target)
86 |     torch.manual_seed(0)
87 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
88 |     GNB = NaiveBayes(x_train, y_train)
89 |     GNB.find_mu_and_sigma(x_train, y_train)
90 |     y_pred = GNB.predict_probability(x_test)
91 |     print(f'Accuracy Score: {accuracy_score(y_test, y_pred)}')
92 | 


--------------------------------------------------------------------------------
/Day-29-Optimizers/optimizer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | class StochasticGradientDescentWithMomentum:
  4 |     def __init__(self, learning_rate=0.001, momentum=0):
  5 |         self.lr = learning_rate
  6 |         self.momentum = momentum
  7 |         self.w_update = None
  8 | 
  9 |     def update(self, w, gradient_wrt_w):
 10 |         if self.w_update is None:
 11 |             self.w_update = torch.zeros(w.shape)
 12 | 
 13 |         self.w_update = self.momentum * self.w_update + (1 - self.momentum) * gradient_wrt_w
 14 |         return w - self.lr * self.w_update
 15 | 
 16 | class NesterovAcceleratedGradient:
 17 |     def __init__(self, learning_rate=0.001, momentum=0.4):
 18 |         self.lr = learning_rate
 19 |         self.momentum = momentum
 20 |         self.w_update = torch.tensor([])
 21 | 
 22 |     def update(self, w, gradient_function):
 23 |         approx_future_gradient = torch.clip(gradient_function(w - self.momentum * self.w_update), -1, 1)
 24 | 
 25 |         if not self.w_update.any():
 26 |             self.w_update = torch.zeros(w.shape)
 27 | 
 28 |         self.w_update = self.momentum * self.w_update + self.lr * approx_future_gradient
 29 |         return w - self.w_update
 30 | 
 31 | class Adagrad:
 32 |     def __init__(self, learning_rate=0.01):
 33 |         self.lr = learning_rate
 34 |         self.G = None
 35 |         self.eps = 1e-8
 36 | 
 37 |     def update(self, w, gradient_wrt_w):
 38 |         if self.G is None:
 39 |             self.G = torch.zeros(w.shape)
 40 | 
 41 |         self.G += torch.pow(gradient_wrt_w, 2)
 42 |         return w - self.lr * gradient_wrt_w / torch.sqrt(self.G + self.eps)
 43 | 
 44 | class Adadelta:
 45 |     def __init__(self, rho=0.95, eps=1e-6):
 46 |         self.E_W_update = None
 47 |         self.E_gradient = None
 48 |         self.w_update = None
 49 |         self.eps = eps
 50 |         self.rho = rho
 51 | 
 52 |     def update(self, w, gradient_wrt_w):
 53 |         if self.w_update is None:
 54 |             self.w_update = torch.zeros(w.shape)
 55 |             self.E_gradient = torch.zeros(gradient_wrt_w.shape)
 56 |             self.E_W_update = torch.zeros(w.shape)
 57 | 
 58 |         self.E_gradient = self.rho * self.E_gradient + (1 - self.rho) * torch.pow(gradient_wrt_w, 2)
 59 |         RMS_Delta_W = torch.sqrt(self.E_W_update + self.eps)
 60 |         RMS_gradient = torch.sqrt(self.E_gradient + self.eps)
 61 | 
 62 |         adaptive_lr = RMS_Delta_W / RMS_gradient
 63 |         self.w_update = adaptive_lr * gradient_wrt_w
 64 |         self.E_W_update = self.rho * self.E_W_update + (1 - self.rho) * torch.pow(self.w_update, 2)
 65 |         return w - self.w_update
 66 | 
 67 | class RMSprop:
 68 |     def __init__(self, learning_rate=0.01, rho=0.9):
 69 |         self.lr = learning_rate
 70 |         self.Eg = None
 71 |         self.eps = 1e-8
 72 |         self.rho = rho
 73 | 
 74 |     def update(self, w, gradient_wrt_w):
 75 |         if self.Eg is None:
 76 |             self.Eg = torch.zeros(gradient_wrt_w.shape)
 77 | 
 78 |         self.Eg = self.rho * self.Eg + (1 -  self.rho) * torch.pow(gradient_wrt_w, 2)
 79 |         return w - self.lr * gradient_wrt_w / torch.sqrt(self.Eg + self.eps)
 80 | 
 81 | class Adam:
 82 |     def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999):
 83 |         self.lr = learning_rate
 84 |         self.eps = 1e-8
 85 |         self.m = None
 86 |         self.v = None
 87 |         self.b1 = b1
 88 |         self.b2 = b2
 89 | 
 90 |     def update(self, w, gradient_wrt_w):
 91 |         if self.m is None:
 92 |             self.m = torch.zeros(gradient_wrt_w.shape)
 93 |             self.v = torch.zeros(gradient_wrt_w.shape)
 94 | 
 95 |         self.m = self.b1 * self.m + (1 - self.b1) * gradient_wrt_w
 96 |         self.v = self.b2 * self.v + (1 - self.b2) * torch.pow(gradient_wrt_w, 2)
 97 | 
 98 |         m_hat = self.m / (1 - self.b1)
 99 |         v_hat = self.v / (1 - self.b2)
100 | 
101 |         self.w_update = self.lr * m_hat / torch.sqrt(v_hat) + self.eps
102 | 
103 |         return w - self.w_update
104 | 


--------------------------------------------------------------------------------
/Day-07-SVM/svm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.preprocessing import MinMaxScaler
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.datasets import load_breast_cancer
 5 | from sklearn.metrics import accuracy_score
 6 | from sklearn.utils import shuffle
 7 | 
 8 | class SVM:
 9 |     def __init__(self, X, y, C=1.0):
10 |         self.total_samples, self.features_count = X.shape[0], X.shape[1]
11 |         self.n_classes = len(y.unique())
12 |         self.learning_rate = 0.001
13 |         self.C = C
14 | 
15 |     def loss(self, X, W, y):
16 |         """
17 |         C parameter tells the SVM optimization how much you want to avoid misclassifying each training
18 |         example. For large values of C, the optimization will choose a smaller-margin hyperplane if that
19 |         hyperplane does a better job of getting all the training points classified correctly. Conversely,
20 |         a very small value of C will cause the optimizer to look for a larger-margin separating hyperplane,
21 |         even if that hyperplane misclassifies more points. For very tiny values of C, you should get
22 |         misclassified examples, often even if your training data is linearly separable.
23 | 
24 |         :param X:
25 |         :param W:
26 |         :param y:
27 |         :return:
28 |         """
29 |         num_samples = X.shape[0]
30 |         distances = 1 - y * (torch.mm(X, W.T))
31 | 
32 |         distances[distances < 0] = 0
33 |         hinge_loss = self.C * (torch.sum(distances) // num_samples)
34 |         cost = 1 / 2 * torch.mm(W, W.T) + hinge_loss
35 |         return cost
36 | 
37 |     def gradient_update(self, W, X, y):
38 |         """
39 |         :param W: Weight Matrix
40 |         :param X: Input Tensor
41 |         :param y: Ground truth tensor
42 |         :return: change in weight
43 |         """
44 |         distance = 1 - (y * torch.mm(X, W.T))
45 |         dw = torch.zeros((1, X.shape[1]),dtype=torch.double)
46 |         for idx, dist in enumerate(distance):
47 |             if max(0, dist) == 0:
48 |                 di = W
49 |             else:
50 |                 di = W - (self.C * y[idx] * X[idx])
51 | 
52 |             dw += di
53 | 
54 |         dw = dw / len(y)
55 |         return dw
56 | 
57 |     def fit(self, X, y, max_epochs):
58 |         """
59 |         :param X: Input Tensor
60 |         :param y: Output tensor
61 |         :param max_epochs: Number of epochs the complete dataset is passed through the model
62 |         :return: learned weight of the svm model
63 |         """
64 |         weight = torch.randn((1, X.shape[1]), dtype=torch.double) * torch.sqrt(torch.scalar_tensor(1./X.shape[1]))
65 |         cost_threshold = 0.0001
66 |         previous_cost = float('inf')
67 |         nth = 0
68 |         for epoch in range(1, max_epochs+1):
69 |             X, y = shuffle(X, y)
70 |             for idx, x in enumerate(X):
71 |                 weight_update = self.gradient_update(weight, torch.tensor(x).unsqueeze(0), y[idx])
72 |                 weight = weight - (self.learning_rate * weight_update)
73 | 
74 |             if epoch % 100 == 0:
75 |                 cost = self.loss(X, weight, y)
76 |                 print(f'Loss at epoch {epoch}: {cost}')
77 |                 if abs(previous_cost - cost) < cost_threshold * previous_cost:
78 |                     return weight
79 |                 previous_cost = cost
80 |                 nth += 1
81 |         return weight
82 | 
83 | if __name__ == '__main__':
84 |     num_epochs = 1000
85 |     breast_cancer = load_breast_cancer()
86 |     X = breast_cancer.data
87 |     X_normalized = MinMaxScaler().fit_transform(X)
88 |     X = torch.tensor(X_normalized)
89 |     y = torch.tensor(breast_cancer.target).unsqueeze(1)
90 |     bias = torch.ones((X.shape[0], 1))
91 |     X = torch.cat((bias, X), dim=1)
92 |     torch.manual_seed(0)
93 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
94 |     svm = SVM(x_train, y_train)
95 |     model_weights = svm.fit(x_train, y_train, max_epochs=num_epochs)
96 |     y_pred = torch.sign(torch.mm(x_test, model_weights.T))
97 |     print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
98 | 


--------------------------------------------------------------------------------
/Day-25-RANSAC/ransac.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import random
  4 | from sklearn.datasets import make_regression
  5 | import matplotlib.pyplot as plt
  6 | import pandas as pd
  7 | 
  8 | class LeastSquareModel:
  9 |     def fit(self, A, Y):
 10 |         A_T = A.T
 11 |         A_T_A = torch.mm(A_T, A)
 12 |         A_T_Y = torch.mm(A_T, Y)
 13 |         model = torch.mm(torch.pinverse(A_T_A),A_T_Y)
 14 | 
 15 |         return model
 16 | 
 17 | class RansacModel:
 18 |     def __init__(self, curve_fitting_model):
 19 |         self.curve_fitting_model = curve_fitting_model
 20 | 
 21 |     def fit(self, A, Y, num_sample, threshold):
 22 |         num_iterations = math.inf
 23 |         iterations_done = 0
 24 |         num_samples = 3
 25 |         max_inlier_count = 0
 26 |         best_model = None
 27 |         probability_outlier = torch.scalar_tensor(0.5, dtype=torch.double)
 28 |         desired_prob = torch.scalar_tensor(0.95, dtype=torch.double)
 29 |         total_data = torch.column_stack((A, Y))
 30 |         data_size = len(total_data)
 31 | 
 32 |         while num_iterations > iterations_done:
 33 | 
 34 |             random.shuffle(total_data)
 35 |             sample_data = total_data[:num_samples, :]
 36 |             estimated_model = self.curve_fitting_model.fit(sample_data[:, :-1], sample_data[:, -1:])
 37 |             y_cap = torch.mm(A, estimated_model)
 38 |             error = torch.abs(Y - y_cap.T)
 39 |             inlier_count = torch.count_nonzero(error<threshold)
 40 | 
 41 |             if inlier_count > max_inlier_count:
 42 |                 max_inlier_count = inlier_count
 43 |                 best_model = estimated_model
 44 | 
 45 |             probability_outlier = 1 - inlier_count/data_size
 46 |             #print('# inliers:', inlier_count)
 47 |             #print('# prob_outlier:', probability_outlier)
 48 |             num_iterations = torch.log(1 - desired_prob) / torch.log(1 - (1 - probability_outlier) ** num_sample)
 49 |             iterations_done = iterations_done + 1
 50 | 
 51 |             #print('# s:', iterations_done)
 52 |             #print('# n:', num_iterations)
 53 |             #print('# max_inlier_count: ', max_inlier_count)
 54 | 
 55 |         return best_model
 56 | 
 57 | def fit_curve(X, y):
 58 |     x_square = torch.pow(X, 2)
 59 | 
 60 |     A = torch.stack((x_square, X, torch.ones(X.shape[0]).unsqueeze(1)),dim=1)
 61 |     A = A.squeeze(2)
 62 |     threshold = torch.std(y) / 5
 63 |     ls_model = LeastSquareModel()
 64 |     ls_model_estimate = ls_model.fit(A, y)
 65 | 
 66 |     ls_model_y = torch.mm(A, ls_model_estimate)
 67 | 
 68 |     ransac_model = RansacModel(ls_model)
 69 |     ransac_model_estimate = ransac_model.fit(A, y, 3, threshold)
 70 |     ransac_model_y = torch.mm(A, ransac_model_estimate)
 71 | 
 72 |     return ls_model_y, ransac_model_y
 73 | 
 74 | if __name__ == '__main__':
 75 |     X1, y1 = make_regression(n_features=1, n_targets=1)
 76 |     X2, y2 = make_regression(n_features=1, n_targets=1)
 77 | 
 78 |     # X1, y1 = data1['x '], data1['y']
 79 |     # X2, y2 = data2['X'], data2['y']
 80 |     X1, y1 = torch.tensor(X1, dtype=torch.double), torch.tensor(y1, dtype=torch.double).unsqueeze(1)
 81 |     X2, y2 = torch.tensor(X2, dtype=torch.double), torch.tensor(y2, dtype=torch.double).unsqueeze(1)
 82 |     ls_model_y1, ransac_model_y1 = fit_curve(X1, y1)
 83 |     ls_model_y2, ransac_model_y2 = fit_curve(X2, y2)
 84 | 
 85 |     fig, (ax1, ax2) = plt.subplots(1, 2)
 86 | 
 87 |     ax1.set_title('Dataset-1')
 88 |     ax1.scatter(X1, y1, marker='o', color=(0, 1, 0), label='data points')
 89 |     ax1.plot(X1, ls_model_y1, color='red', label='Least sqaure model')
 90 |     ax1.plot(X1, ransac_model_y1, color='blue', label='Ransac model')
 91 |     ax1.set(xlabel='x-axis', ylabel='y-axis')
 92 |     ax1.legend()
 93 | 
 94 |     ax2.set_title('Dataset-2')
 95 |     ax2.scatter(X2, y2, marker='o', color=(0, 1, 0), label='data points')
 96 |     ax2.plot(X2, ls_model_y2, color='red', label='Least sqaure model')
 97 |     ax2.plot(X2, ransac_model_y2, color='blue', label='Ransac model')
 98 |     ax2.set(xlabel='x-axis', ylabel='y-axis')
 99 |     ax2.legend()
100 | 
101 |     plt.show()
102 | 


--------------------------------------------------------------------------------
/Day-24-Regularization/regularization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.datasets import load_iris
 3 | 
 4 | class Regularization:
 5 |     def __init__(self, X):
 6 |         self.X = X
 7 | 
 8 |     def dropout(self, drop_probability):
 9 |         """
10 |         Dropout is a regularization technique for neural networks that drops a unit (along with connections) at
11 |         training time with a specified probability P (a common value is P = 0.5). At test time, all units are present,
12 |         but with weights scaled by p(i.e. w becomes pw ).
13 |         The idea is to prevent co-adaptation, where the neural network becomes too reliant on particular
14 |         connections, as this could be symptomatic of overfitting. Intuitively, dropout can be thought of as creating
15 |          an implicit ensemble of neural networks.
16 |         :param drop_probability: float value between 0 to 1
17 |         """
18 |         if drop_probability < 1.0:
19 |             keep_probability = 1 - drop_probability
20 |         masker = torch.FloatTensor(self.X.shape).uniform_(0, 1)
21 |         masked = masker < keep_probability
22 | 
23 |         if keep_probability > 0.0:
24 |             scale = 1 / keep_probability
25 |         else:
26 |             scale = 0.0
27 | 
28 |         return masked * self.X * scale
29 | 
30 |     def L2_Regularization(self, y, W, lambda_value):
31 |         """
32 |         Weight Decay, or L2 Regularization, is a regularization technique applied to the weights of a neural network.
33 |         We minimize a loss function compromising both the primary loss function and a penalty on the L2 Norm of the
34 |         weights:
35 |                 L_new(w) = L_original(w) + lambda * W_T * W
36 |         where  is a value determining the strength of the penalty (encouraging smaller weights).
37 |         Weight decay can be incorporated directly into the weight update rule, rather than just implicitly by defining
38 |         it through to objective function. Often weight decay refers to the implementation where we specify it directly
39 |         in the weight update rule (whereas L2 regularization is usually the implementation which is specified in the
40 |         objective function).
41 |         """
42 |         Regularization_term = (lambda_value * torch.mm(W, W.T)).type(torch.DoubleTensor) / (2 * y.shape[0])
43 |         output = torch.sum((y - torch.mm(X, W.T))**2, dim=0) + Regularization_term
44 |         return output
45 | 
46 |     def L1_Regularization(self, y, W, lambda_value):
47 |         """
48 |          L1 Regularization is a regularization technique applied to the weights of a neural network. We minimize a loss
49 |         function compromising both the primary loss function and a penalty on the L1 Norm of the weights:
50 |             L_new(w) = L_original(w) + lambda * ||W||
51 |         where is a value determining the strength of the penalty. In contrast to weight decay, regularization promotes
52 |         sparsity; i.e. some parameters have an optimal value of zero.
53 |         """
54 |         Regularization_term = torch.sum((lambda_value * torch.abs(W)).type(torch.DoubleTensor) / (2 * y.shape[0]),dim=1)
55 |         output = torch.sum((y - torch.mm(X, W.T))**2, dim=0) + Regularization_term
56 |         return output
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     """
61 |     Dropout: 
62 |     
63 |     A = torch.arange(20).reshape((5, 4))
64 |     print(A)
65 |     Regularizer = Regularization(X=A)
66 |     print(Regularizer.dropout(drop_probability=0.5))
67 |     
68 |     L2 Regularization or Weight Decay:
69 |     
70 |     data = load_iris()
71 |     X = torch.tensor(data.data, dtype=torch.double)
72 |     y = torch.tensor(data.target).unsqueeze(1)
73 |     W = torch.FloatTensor(X.shape[1]).uniform_(0, 1).unsqueeze(0).type(torch.DoubleTensor)
74 |     Regularizer = Regularization(X)
75 |     Regularizer.L2_Regularization(y=y, W=W, lambda_value=0.7)
76 | 
77 |     L1 Regularization:
78 | 
79 |     data = load_iris()
80 |     X = torch.tensor(data.data, dtype=torch.double)
81 |     y = torch.tensor(data.target).unsqueeze(1)
82 |     W = torch.FloatTensor(X.shape[1]).uniform_(0, 1).unsqueeze(0).type(torch.DoubleTensor)
83 |     Regularizer = Regularization(X)
84 |     print(Regularizer.L1_Regularization(y=y, W=W, lambda_value=0.7))
85 |     """
86 | 


--------------------------------------------------------------------------------
/Day-08-tf-idf/tfidf.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from nltk.tokenize import word_tokenize
  3 | 
  4 | class TF_IDF:
  5 |     """
  6 |     TF - Term Frequency:  count of specific word in document / total no. of words in document
  7 |     IDF - Inverse Document Frequency: Log ratio of (Total no. of documents / no. of documents containing words)
  8 |     """
  9 |     def text_processing(self, X):
 10 |         """
 11 |         text processing: We clean our text by removing special character and keeping them as lower case and each
 12 |         line sentence is converted into list of words and then finding the total number of unqiue words in the all the
 13 |         documents combined together.
 14 |         :param X: List of documents
 15 |         :return: Unique words (Vocabulary), all documents [[d_1], [d_2], ..[d_n]]
 16 |         """
 17 |         documents = []
 18 |         vocabulary = []
 19 |         for document in X:
 20 |             document_words = [word.lower() for word in word_tokenize(document) if word.isalpha()]
 21 |             documents.append(document_words)
 22 |             for word in document_words:
 23 |                 if word not in vocabulary:
 24 |                     vocabulary.append(word)
 25 | 
 26 |         vocabulary = set(vocabulary)
 27 |         return vocabulary, documents
 28 | 
 29 |     def strtoint(self, vocabulary):
 30 |         """
 31 |         :param vocabulary: all unique in the documents
 32 |         :return: mapping words to integer such as {'the': 1}
 33 |         """
 34 |         wordToInt = {}
 35 |         for i, vocab in enumerate(vocabulary):
 36 |             wordToInt[vocab] = i
 37 | 
 38 |         return wordToInt
 39 | 
 40 |     def vocab_frequency(self, vocabulary, documents):
 41 |         """
 42 |         :param vocabulary: all unique in the documents
 43 |         :param documents: all the documents
 44 |         :return: Frequency of word in all the documents combined together
 45 |         """
 46 |         word_frequency = {}
 47 |         for word in vocabulary:
 48 |             word_frequency[word] = 0
 49 |             for document in documents:
 50 |                 if word in document:
 51 |                     word_frequency[word] += 1
 52 | 
 53 |         return word_frequency
 54 | 
 55 |     def tf(self, input_document, word):
 56 |         """
 57 |         Calculating term_frequency
 58 |         :param input_document: test document
 59 |         :param word: each word in the test document
 60 |         :return: tf value (refer the formula above)
 61 |         """
 62 |         num_words = len(input_document)
 63 |         word_frequency = len([token for token in input_document if token==word])
 64 |         return word_frequency/num_words
 65 | 
 66 |     def idf(self, word, word_frequency, documents):
 67 |         """
 68 |         :param word: words of the test input document
 69 |         :param word_frequency: word frequency w.r.t all the documents available.
 70 |         :param documents: all the documents
 71 |         :return: idf value
 72 |         """
 73 |         try:
 74 |             word_frequency = word_frequency[word] + 1
 75 |         except:
 76 |             word_frequency = 1
 77 | 
 78 |         return torch.log(torch.scalar_tensor(len(documents))/word_frequency)
 79 | 
 80 |     def fit_tranform(self, document, vocabulary, wordToInt, word_frequency, documents):
 81 |         """
 82 |         :param document: test input document
 83 |         :param vocabulary: all unique words
 84 |         :param wordToInt: word to int mapping
 85 |         :param word_frequency: each word frequency throughout all the documents
 86 |         :param documents: all the documents
 87 |         :return: tf_idf vector for test input document
 88 |         """
 89 |         tfidf_vector = torch.zeros((len(vocabulary), ), dtype=torch.double)
 90 |         for word in document:
 91 |             tf = self.tf(document, word)
 92 |             idf = self.idf(word, word_frequency, documents)
 93 |             tfidf_values = tf * idf
 94 |             tfidf_vector[wordToInt[word]] = tfidf_values
 95 | 
 96 |         return tfidf_vector
 97 | 
 98 | if __name__ == '__main__':
 99 |     vectors = []
100 |     documents = ['Hi, how are you?',
101 |                  'What are you doing?',
102 |                  'what is your name?',
103 |                  'who are you?']
104 | 
105 |     tfidf_vectorizer = TF_IDF()
106 |     vocabulary, processed_documents = tfidf_vectorizer.text_processing(documents)
107 |     wordToInt = tfidf_vectorizer.strtoint(vocabulary)
108 |     vocab_frequecy = tfidf_vectorizer.vocab_frequency(vocabulary, processed_documents)
109 |     _, new_document = tfidf_vectorizer.text_processing([documents[0]])
110 |     print(tfidf_vectorizer.fit_tranform(new_document[0],vocabulary, wordToInt, vocab_frequecy, documents))
111 | 


--------------------------------------------------------------------------------
/Day-14-DBSCAN/dbscan.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Checkout Density Based Spectral Clustering Blag:
  3 | https://blog.dominodatalab.com/topology-and-density-based-clustering/
  4 | 
  5 | - Compared to centroid-based clustering like k-means, density-based clustering works by
  6 | identifying “dense” clusters of points, allowing it to learn clusters of arbitrary shape
  7 | and identify outliers in the data.
  8 | """
  9 | import torch
 10 | from sklearn.datasets import load_iris
 11 | from sklearn.model_selection import train_test_split
 12 | from sklearn.metrics import accuracy_score
 13 | from sklearn import datasets
 14 | 
 15 | class DBScan:
 16 |     def __init__(self, eps = 2.5, min_points=30):
 17 |         """
 18 |         eps - radius distance around which a cluster is considered.
 19 |         min_points -  Number of points to be present inside the radius
 20 |         (check out density reachable or border points from blog to understand how cluster points are considered)
 21 |         """
 22 |         self.eps = eps
 23 |         self.minimum_points = min_points
 24 | 
 25 |     def euclidean_distance(self, x1, x2):
 26 |         """
 27 |         :param x1: input tensor
 28 |         :param x2: input tensor
 29 |         :return: distance between tensors
 30 |         """
 31 |         return torch.cdist(x1, x2)
 32 | 
 33 |     def direct_neighbours(self, sample):
 34 |         """
 35 |         :param sample: Sample whose neighbors needs to be identified
 36 |         :return: all the neighbors within eps distance
 37 |         """
 38 |         neighbors = []
 39 |         idxs = torch.arange(self.X.shape[0])
 40 |         for i, _sample in enumerate(self.X[idxs != sample]):
 41 | 
 42 |             distance = self.euclidean_distance(self.X[sample].unsqueeze(0), _sample.unsqueeze(0))
 43 |             if distance < self.eps:
 44 |                 neighbors.append(i)
 45 | 
 46 |         return torch.tensor(neighbors)
 47 | 
 48 |     def density_neighbors(self, sample, neighbors):
 49 |         """
 50 |         Recursive method which expands the cluster until we have reached the border
 51 |         of the dense area (density determined by eps and min_samples)
 52 | 
 53 |         :param sample: Sample whose border points to be identified
 54 |         :param neighbors: samples and its neighbors within eps distance
 55 |         :return: It updates the number of points assigned to each cluster, by finding
 56 |         border points and its relative points. In a sense, it expands cluster.
 57 |         """
 58 |         cluster = [sample]
 59 |         for neighbor_i in neighbors:
 60 |             if not neighbor_i in self.visited_samples:
 61 |                 self.visited_samples.append(neighbor_i)
 62 |                 self.neighbors[neighbor_i] = self.direct_neighbours(neighbor_i)
 63 | 
 64 |                 if len(self.neighbors[neighbor_i]) >= self.minimum_points:
 65 |                     expanded_cluster = self.density_neighbors(
 66 |                         neighbor_i, self.neighbors[neighbor_i])
 67 |                     cluster = cluster + expanded_cluster
 68 |                 else:
 69 |                     cluster.append(neighbor_i)
 70 | 
 71 |         return cluster
 72 | 
 73 |     def get_cluster_label(self):
 74 |         """
 75 |         :return: assign cluster label based on expanded clusters
 76 |         """
 77 |         labels = torch.zeros(self.X.shape[0]).fill_(len(self.clusters))
 78 |         for cluster_i, cluster in enumerate(self.clusters):
 79 |             for sample_i in cluster:
 80 |                 labels[sample_i] = cluster_i
 81 | 
 82 |         return labels
 83 | 
 84 |     def predict(self, X):
 85 |         """
 86 |         :param X: input tensor
 87 |         :return: predicting the labels os samples depending on its distance from clusters
 88 |         """
 89 |         self.X = X
 90 |         self.clusters = []
 91 |         self.visited_samples = []
 92 |         self.neighbors = {}
 93 |         n_samples = X.shape[0]
 94 | 
 95 |         for sample_i in range(n_samples):
 96 |             if sample_i in self.visited_samples:
 97 |                 continue
 98 |             self.neighbors[sample_i] = self.direct_neighbours(sample_i)
 99 |             if len(self.neighbors[sample_i]) >= self.minimum_points:
100 |                 self.visited_samples.append(sample_i)
101 |                 new_cluster = self.density_neighbors(
102 |                     sample_i, self.neighbors[sample_i])
103 |                 self.clusters.append(new_cluster)
104 | 
105 |         cluster_labels = self.get_cluster_label()
106 |         return cluster_labels
107 | 
108 | if __name__ == '__main__':
109 |     iris = load_iris()
110 |     torch.manual_seed(0)
111 |     X = torch.tensor(iris.data, dtype=torch.float)
112 |     y = torch.tensor(iris.target)
113 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
114 |     dbscan = DBScan(eps=0.25, min_points=20)
115 |     ypred = dbscan.predict(x_train)
116 |     print(f'Accuracy Score: {accuracy_score(y_train, ypred)}')
117 | 


--------------------------------------------------------------------------------
/Day-27-MLP/mlp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multi-Layer Preceptron
  3 | """
  4 | import torch
  5 | from sklearn.datasets import load_digits
  6 | from sklearn.model_selection import train_test_split
  7 | 
  8 | class Sigmoid:
  9 |     def __call__(self, X):
 10 |         return 1 / (1 + torch.exp(-X))
 11 | 
 12 |     def gradient(self, X):
 13 |         return self.__call__(X) * (1 - self.__call__(X))
 14 | 
 15 | class Softmax:
 16 |     def __call__(self, X):
 17 |         e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True).values)
 18 |         return e_x / torch.sum(e_x, dim=1, keepdim=True)
 19 | 
 20 |     def gradient(self, X):
 21 |         p = self.__call__(X)
 22 |         return p * (1 - p)
 23 | 
 24 | def accuracy_score(y, p):
 25 |     accuracy = torch.sum(y == p, dim=0) / len(y)
 26 |     return accuracy
 27 | 
 28 | def to_categorical(X, n_col=None):
 29 |     if not n_col:
 30 |         n_col = torch.amax(X) + 1
 31 | 
 32 |     one_hot = torch.zeros((X.shape[0], n_col))
 33 |     one_hot[torch.arange(X.shape[0]), X] = 1
 34 |     return one_hot
 35 | 
 36 | def normalization(X):
 37 |     """
 38 |     :param X: Input tensor
 39 |     :return: Normalized input using l2 norm.
 40 |     """
 41 |     l2 = torch.norm(X, p=2, dim=-1)
 42 |     l2[l2 == 0] = 1
 43 |     return X / l2.unsqueeze(1)
 44 | 
 45 | class CrossEntropy:
 46 |     def __init__(self):
 47 |         pass
 48 |     def loss(self, y, p):
 49 |         p = torch.clip(p, 1e-15, 1-1e-15)
 50 |         return - y * torch.log(p) - (1 -y) * torch.log(1 - p)
 51 | 
 52 |     def accuracy_score(self, y, p):
 53 |         return accuracy_score(torch.argmax(y, dim=1), torch.argmax(p, dim=1))
 54 | 
 55 |     def gradient(self, y, p):
 56 |         p = torch.clip(p, 1e-15, 1 - 1e-15)
 57 |         return - (y / p) + (1 - y) / (1 -p)
 58 | 
 59 | class MultiLayerPerceptron:
 60 |     def __init__(self, n_hidden, n_iterations=1000, learning_rate=0.001):
 61 |         self.n_hidden = n_hidden
 62 |         self.n_iterations = n_iterations
 63 |         self.learning_rate = learning_rate
 64 |         self.hidden_activation = Sigmoid()
 65 |         self.output_activation = Softmax()
 66 |         self.loss = CrossEntropy()
 67 | 
 68 |     def initalize_weight(self, X, y):
 69 |         n_samples, n_features = X.shape
 70 |         _, n_outputs = y.shape
 71 |         limit = 1 / torch.sqrt(torch.scalar_tensor(n_features))
 72 |         self.W = torch.DoubleTensor(n_features, self.n_hidden).uniform_(-limit, limit)
 73 | 
 74 |         self.W0 = torch.zeros((1, self.n_hidden))
 75 |         limit = 1 / torch.sqrt(torch.scalar_tensor(self.n_hidden))
 76 |         self.V = torch.DoubleTensor(self.n_hidden, n_outputs).uniform_(-limit, limit)
 77 |         self.V0 = torch.zeros((1, n_outputs))
 78 | 
 79 |     def fit(self, X, y):
 80 |         self.initalize_weight(X, y)
 81 |         for i in range(self.n_iterations):
 82 |             hidden_input =  torch.mm(X, self.W) + self.W0
 83 |             hidden_output = self.hidden_activation(hidden_input)
 84 | 
 85 |             output_layer_input = torch.mm(hidden_output, self.V) + self.V0
 86 |             y_pred  = self.output_activation(output_layer_input)
 87 | 
 88 |             grad_wrt_first_output = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input)
 89 |             grad_v = torch.mm(hidden_output.T, grad_wrt_first_output)
 90 |             grad_v0 = torch.sum(grad_wrt_first_output, dim=0, keepdim=True)
 91 | 
 92 |             grad_wrt_first_hidden = torch.mm(grad_wrt_first_output, self.V.T) * self.hidden_activation.gradient(hidden_input)
 93 |             grad_w = torch.mm(X.T, grad_wrt_first_hidden)
 94 |             grad_w0 = torch.sum(grad_wrt_first_hidden, dim=0, keepdim=True)
 95 | 
 96 |             # Update weights (by gradient descent)
 97 |             # Move against the gradient to minimize loss
 98 |             self.V -= self.learning_rate * grad_v
 99 |             self.V0 -= self.learning_rate * grad_v0
100 |             self.W -= self.learning_rate * grad_w
101 |             self.W0 -= self.learning_rate * grad_w0
102 | 
103 |             # Use the trained model to predict labels of X
104 | 
105 |     def predict(self, X):
106 |         # Forward pass:
107 |         hidden_input = torch.mm(X,self.W) + self.W0
108 |         hidden_output = self.hidden_activation(hidden_input)
109 |         output_layer_input = torch.mm(hidden_output, self.V) + self.V0
110 |         y_pred = self.output_activation(output_layer_input)
111 |         return y_pred
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     data = load_digits()
116 |     X = normalization(torch.tensor(data.data, dtype=torch.double))
117 |     y = torch.tensor(data.target)
118 | 
119 |     # Convert the nominal y values to binary
120 |     y = to_categorical(y)
121 | 
122 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)
123 |     # MLP
124 |     clf = MultiLayerPerceptron(n_hidden=16,
125 |                                n_iterations=1000,
126 |                                learning_rate=0.01)
127 | 
128 |     clf.fit(X_train, y_train)
129 |     y_pred = torch.argmax(clf.predict(X_test), dim=1)
130 |     y_test = torch.argmax(y_test, dim=1)
131 | 
132 |     accuracy = accuracy_score(y_test, y_pred)
133 |     print("Accuracy:", accuracy)
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/Day-13-Adaboost/adaboost.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adaboost Algorithm Blog post:
  3 | https://www.mygreatlearning.com/blog/adaboost-algorithm/
  4 | """
  5 | import torch
  6 | from sklearn.datasets import load_breast_cancer
  7 | from sklearn.model_selection import train_test_split
  8 | from sklearn.metrics import accuracy_score
  9 | 
 10 | class stump:
 11 |     "Each Stump is a weak classifier and combination of them are referred as Boosting Mechanism"
 12 |     def __init__(self):
 13 |         """
 14 |         * Polarity is used to classify sample as either 1 or -1
 15 |         * feature index is for identifying node for separating classes
 16 |         * features are compared against threshold value
 17 |         * Alpha value indicates the classifier accuracy
 18 |         """
 19 |         self.polarity = 1
 20 |         self.feature_index = None
 21 |         self.threshold = None
 22 |         self.alpha = None
 23 | 
 24 | class Adaboost:
 25 |     def __init__(self, num_classifiers):
 26 |         """
 27 |         :param num_classifiers: Number of weak classifiers
 28 |         """
 29 |         self.num_classifiers = num_classifiers
 30 | 
 31 |     def fit(self, X, y):
 32 |         """
 33 |         :param X: Input tensor
 34 |         :param y: output tensor
 35 |         :return: Creates a list of weak classifier with set of properties as
 36 |         mentioned in stump class.
 37 |         * Initialize weights to 1/N, N is number of samples
 38 |         * Iterate through different weak classifiers
 39 |         * Minimum error given for using a certain feature value threshold for predicting sample label
 40 |         * Iterate through each feature and its unique values to find the threshold value
 41 |         * Label samples with value less than threshold as -1
 42 |         * Error, Sum of weights of misclassified samples
 43 |         * If the error is over 50% we flip the polarity so that samples that were classified as 0 are
 44 |         classified as 1, and vice versa. E.g error = 0.8 => (1 - error) = 0.2
 45 |         * If this threshold resulted in the smallest error we save the configuration
 46 |         * Calculate the alpha which is used to update the sample weights,
 47 |         Alpha is also an approximation of this classifier's proficiency
 48 |         * set all predictions to '1' initially
 49 |         * The indexes where the sample values are below threshold, label them as -1
 50 |         * Updated weights and normalize to one
 51 |         * save each weak classifier
 52 |         """
 53 |         n_samples, n_features = X.shape[0], X.shape[1]
 54 |         weight = torch.zeros(n_samples).fill_(1/n_samples)
 55 |         self.clfs = []
 56 |         for _ in range(self.num_classifiers):
 57 |             clf = stump()
 58 |             minimum_error = float('inf')
 59 |             for feature_i in range(n_features):
 60 |                 feature_values = X[:, feature_i].unsqueeze(1)
 61 |                 unqiue_values =  feature_values.unique()
 62 |                 for threshold in unqiue_values:
 63 |                      p = 1
 64 |                      prediction = torch.ones(y.shape)
 65 |                      prediction[X[:, feature_i] < threshold] = -1
 66 |                      error = torch.sum(weight[y != prediction])
 67 |                      if error > 0.5:
 68 |                          error = 1 - error
 69 |                          p = -1
 70 | 
 71 |                      if error < minimum_error:
 72 |                          clf.polarity = p
 73 |                          clf.threshold = threshold
 74 |                          clf.feature_index = feature_i
 75 |                          minimum_error = error
 76 | 
 77 |             clf.alpha = 0.5 * torch.log(1.0 - minimum_error) / (minimum_error + 1e-10)
 78 |             predictions = torch.ones(y.shape)
 79 |             negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
 80 |             predictions[negative_idx] = -1
 81 | 
 82 |             weight *= torch.exp(-clf.alpha * y * predictions)
 83 |             weight /= torch.sum(weight)
 84 | 
 85 |             self.clfs.append(clf)
 86 | 
 87 |     def predict(self, X):
 88 |         """
 89 |         same process as mentioned above.
 90 |         :param X:
 91 |         :return: predicted estimate of ground truth.
 92 |         """
 93 |         n_samples = X.shape[0]
 94 |         y_pred = torch.zeros((n_samples, 1))
 95 |         for clf in self.clfs:
 96 |             predictions = torch.ones(y_pred.shape)
 97 |             negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
 98 |             predictions[negative_idx] = -1
 99 |             y_pred += clf.alpha * predictions
100 | 
101 |         print(y_pred)
102 |         y_pred = torch.sign(y_pred).flatten()
103 |         print(y_pred)
104 |         return y_pred
105 | 
106 | if __name__ == '__main__':
107 |     breast_cancer = load_breast_cancer()
108 |     torch.manual_seed(0)
109 |     X = torch.tensor(breast_cancer.data, dtype=torch.float)
110 |     y = torch.tensor(breast_cancer.target)
111 |     n_classes = len(torch.unique(y))
112 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
113 |     clf = Adaboost(num_classifiers=20)
114 |     clf.fit(x_train, y_train)
115 |     y_pred = clf.predict(x_test)
116 | 
117 |     accuracy = accuracy_score(y_test, y_pred)
118 |     print ("Accuracy:", accuracy)
119 | 


--------------------------------------------------------------------------------
/Day-20-SpectralClustering/spectralClustering.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reference: https://en.wikipedia.org/wiki/Spectral_clustering
  3 | Blog Post: https://towardsdatascience.com/spectral-clustering-aba2640c0d5b
  4 | """
  5 | import torch
  6 | from sklearn.datasets import make_moons
  7 | import matplotlib.pyplot as plt
  8 | from scipy.spatial.distance import pdist, squareform
  9 | from sklearn.cluster import KMeans
 10 | 
 11 | def SpectralClustering(X, K=8, adj=True, metric='euclidean', sim_graph='fully_connect', sigma=1, knn=10, epsilon=0.5, normalized=1):
 12 |     """
 13 |     :param X: Input tensor
 14 |     :param K: cluster to look out for using KMeans
 15 |     :param adj: Adjacency Matrix
 16 |     :param metric:
 17 |     :param sim_graph: Technique to create edges between nodes in graph.
 18 |     :param sigma: Parameter for RBF kernel
 19 |     :param knn: To connect with 10 nearest neighors with edges
 20 |     :param epsilon:Parameter for finding edges
 21 |     :param normalized:
 22 |     :return:
 23 |     """
 24 | 
 25 |     # To convert our adjacency matrix as connected graph we can use technique like KNN.
 26 | 
 27 |     if not adj:
 28 |         adjacency_matrix = squareform(X, metric=metric)
 29 |     else:
 30 |         adjacency_matrix = X
 31 | 
 32 |     if sim_graph == 'fully_connect':
 33 |         adjacency_matrix = torch.from_numpy(adjacency_matrix)
 34 |         w = torch.exp(-adjacency_matrix/ (2 * sigma))
 35 | 
 36 |     elif sim_graph =='eps_neighbor':
 37 |         adjacency_matrix = torch.from_numpy(adjacency_matrix)
 38 |         w = (adjacency_matrix <= epsilon).type(torch.DoubleTensor)
 39 |     elif sim_graph == 'knn':
 40 |         adjacency_matrix = torch.from_numpy(adjacency_matrix)
 41 |         w = torch.zeros(adjacency_matrix.shape)
 42 |         adjacency_sort = torch.argsort(adjacency_matrix, dim=1)
 43 |         for i in range(adjacency_sort.shape[0]):
 44 |             w[i, adjacency_sort[i, :][:(knn+1)]] = 1
 45 |     elif sim_graph == 'mutual_knn':
 46 |         adjacency_matrix = torch.from_numpy(adjacency_matrix)
 47 |         w1 = torch.zeros(adjacency_matrix.shape)
 48 |         adjacency_sort = torch.argsort(adjacency_matrix, dim=1)
 49 |         for i in range(adjacency_matrix.shape[0]):
 50 |             for j in adjacency_sort[i, :][:(knn+1)]:
 51 |                 if i==j:
 52 |                     w1[i, i] = 1
 53 |                 elif w1[i, j] == 0 and w1[j, i]==0:
 54 |                     w1[i, j] = 0.5
 55 |                 else:
 56 |                     w1[i, j] = w1[j, i] = 1
 57 |         w = w1[w1>0.5].type(torch.DoubleTensor).clone
 58 |     else:
 59 |         raise ValueError("The 'sim_graph' argument should be one of the strings, 'fully_connect', 'eps_neighbor', 'knn', or 'mutual_knn'!")
 60 | 
 61 |     #Degree Matrix
 62 |     D = torch.diag(torch.sum(w, dim=1))
 63 | 
 64 |     #Graph Laplacian
 65 |     L = D - w
 66 | 
 67 |     # Finding eigen Value of Graph Laplacian Matrix,
 68 |     """
 69 |     The eigenvalues of the Laplacian indicated that there were four clusters.
 70 |     The vectors associated with those eigenvalues contain information on how to segment the nodes.
 71 |     """
 72 |     if normalized == 1:
 73 |         D_INV = torch.diag(1/torch.diag(D))
 74 |         lambdas, V = torch.eig(torch.mm(D_INV, L), eigenvectors=True)
 75 |         ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0))
 76 |         V_K = V[:, ind[:K]]
 77 | 
 78 |     elif normalized == 2:
 79 |         D_INV_SQRT = torch.diag(1/torch.sqrt(torch.diag(D)))
 80 |         lambdas, V = torch.eig(torch.matmul(torch.matmul(D_INV_SQRT, L), D_INV_SQRT))
 81 |         ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0))
 82 |         V_K = torch.real(V[:, ind[:,K]])
 83 |         if any(V_K.sum(dim=1) == 0):
 84 |             raise ValueError("Can't normalize the matrix with the first K eigenvectors as columns! Perhaps the \
 85 |                              number of clusters K or the number of neighbors in k-NN is too small.")
 86 |         V_K = V_K/torch.reshape(torch.norm(V_K, dim=1), (V_K.shape[0], 1))
 87 |     else:
 88 |         lambdas, V = torch.eig(L)
 89 |         ind = torch.argsort(torch.norm(torch.reshape(lambdas[:,0], (1, len(lambdas))), dim=0))
 90 |         V_K = torch.real(V[:, ind[:K]])
 91 | 
 92 |     # KMeans is used for assigning the labels to the clusters.
 93 |     kmeans = KMeans(n_clusters=K, init='k-means++', random_state=0).fit(V_K)
 94 |     return kmeans
 95 | 
 96 | if __name__ == '__main__':
 97 |     moon_data, moon_labels = make_moons(100, noise=0.05)
 98 |     moon_data = torch.tensor(moon_data)
 99 |     moon_labels = torch.tensor(moon_labels)
100 |     # Compute the adjacency matrix, Similarity Matrix.
101 |     Adj_mat = squareform(pdist(moon_data, metric='euclidean', p=2))
102 |     # Spectral clustering...
103 |     spec_re1 = SpectralClustering(Adj_mat, K=2, sim_graph='fully_connect', sigma=0.01, normalized=1)
104 |     spec_re2 = SpectralClustering(Adj_mat, K=2, sim_graph='knn', knn=10, normalized=1)
105 | 
106 |     # Often need to change figsize when doing subplots
107 |     plt.figure(figsize=(8, 4))
108 |     plt.subplot(1, 2, 1)
109 |     plt.scatter(x=moon_data[:, 0], y=moon_data[:, 1], c=spec_re1.labels_, s=2)
110 |     plt.colorbar()
111 |     plt.title('Fully connected graph with RBF kernel ($\sigma=0.01$)')
112 | 
113 |     plt.subplot(1, 2, 2)
114 |     plt.scatter(x=moon_data[:, 0], y=moon_data[:, 1], c=spec_re2.labels_, s=2)
115 |     plt.colorbar()
116 |     plt.title('$k$-Nearest Neighbor graphs ($k=10$)')
117 | 
118 |     plt.suptitle('Spectral Clustering', y=-0.01)
119 | 
120 |     # Automatrically adjust padding between subpots
121 |     plt.tight_layout()
122 |     plt.show()
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/Day-17-K-Medoids/PAM.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from utility import euclidean_distance
  3 | from sklearn.datasets import load_iris
  4 | from sklearn.metrics import accuracy_score
  5 | from sklearn.model_selection import train_test_split
  6 | """
  7 | K-Medoids also known as Partitioned Around Medoids.
  8 | """
  9 | class PAM:
 10 |     def __init__(self, k=2):
 11 |         """
 12 |         :param k: Number of clusters to be formed using Medoids
 13 |         """
 14 |         self.k = k
 15 | 
 16 |     def random_medoids(self, X):
 17 |         """
 18 |         Similar to KMeans, selecting a random samples from dataset as medoids
 19 |         :param X: Input tensor
 20 |         :return: For iris dataset, three medoids are selected.
 21 |         """
 22 |         n_samples, n_features = X.shape[0], X.shape[1]
 23 |         medoids = torch.zeros((self.k, n_features))
 24 |         for i in range(self.k):
 25 |             idx = torch.randperm(len(X))[1]
 26 |             medoid = X[idx]
 27 |             medoids[i] = medoid
 28 | 
 29 |         return  medoids
 30 | 
 31 |     def closest_medoid(self, sample, medoids):
 32 |         """
 33 |         Calculate distance between each sample and every medoids
 34 |         :param sample: Data point
 35 |         :param medoids: Similar to centroid in KMeans.
 36 |         :return: Assigining medoid to each sample
 37 |         """
 38 |         closest_i = None
 39 |         closest_distance = float('inf')
 40 |         for i, medoid in enumerate(medoids):
 41 |             distance = euclidean_distance(sample, medoid)
 42 |             if distance < closest_distance:
 43 |                 closest_i = i
 44 |                 closest_distance = distance
 45 |         return closest_i
 46 | 
 47 |     def create_clusters(self, X, medoids):
 48 |         """
 49 |         Creating clusters after assigning samples to each medoid
 50 |         :return:
 51 |         """
 52 |         clusters = [[] for _ in range(self.k)]
 53 |         for sample_i, sample in enumerate(X):
 54 |             medoid_i = self.closest_medoid(sample, medoids)
 55 |             clusters[medoid_i].append(sample_i)
 56 | 
 57 |         return clusters
 58 | 
 59 |     def calculate_cost(self, X, clusters, medoids):
 60 |         """
 61 |         Total distance between samples and their medoid
 62 |         :param clusters: Three medoids with samples assigned to each of them
 63 |         :return: Total distance as mentioned above
 64 |         """
 65 |         cost = 0
 66 |         for i, cluster in enumerate(clusters):
 67 |             medoid = medoids[i]
 68 |             for sample_i in cluster:
 69 |                 cost += euclidean_distance(X[sample_i], medoid)
 70 | 
 71 |         return cost
 72 | 
 73 |     def get_non_medoids(self, X, medoids):
 74 |         """
 75 |         Mediods are points in cluster acts reference for all other points(non-medoids)
 76 |         to find distance between them.
 77 |         :return: all the data point which are not medoids.
 78 |         """
 79 |         non_medoids = []
 80 |         for sample in X:
 81 |             if not sample in medoids:
 82 |                 non_medoids.append(sample)
 83 | 
 84 |         return non_medoids
 85 | 
 86 |     def get_cluster_label(self, clusters, X):
 87 |         """
 88 |         Assigning each sample as index to a medoid.
 89 |         """
 90 |         y_pred = torch.zeros(X.shape[0])
 91 |         for cluster_i in range(len(clusters)):
 92 |             cluster = clusters[cluster_i]
 93 |             for sample_i in cluster:
 94 |                 y_pred[sample_i] = cluster_i
 95 | 
 96 |         return y_pred
 97 | 
 98 |     def predict(self, X):
 99 |         """
100 |         Do Partitioning Around Medoids and return the cluster labels
101 |         * First, randomly selection medoids
102 |         * Create cluster based on medoids selected and samples
103 |         * Cost(distance) of the existing cluster and the samples in it.
104 |         * Iterate, until we find the least cost with best medoids.
105 |         * Find all non-medoids
106 |         :return: Predicting medoid for test sample or a data point.
107 |         """
108 | 
109 |         medoids = self.random_medoids(X)
110 |         clusters = self.create_clusters(X, medoids)
111 |         cost = self.calculate_cost(X, clusters, medoids)
112 | 
113 |         while True:
114 |             best_medoids = medoids
115 |             lowest_cost = cost
116 |             for medoid in medoids:
117 |                 non_medoids = self.get_non_medoids(X, medoids)
118 |                 # Calculate the cost when swapping medoid and samples
119 |                 for sample in non_medoids:
120 |                     # Swap sample with the medoid
121 |                     new_medoids = medoids.clone()
122 |                     new_medoids[medoids == medoid][:4] = sample
123 |                     # Assign samples to new medoids
124 |                     new_clusters = self.create_clusters(X, new_medoids)
125 |                     # Calculate the cost with the new set of medoids
126 |                     new_cost = self.calculate_cost(X, new_clusters, new_medoids)
127 |                     # If the swap gives us a lower cost we save the medoids and cost
128 |                     if new_cost < lowest_cost:
129 |                         lowest_cost = new_cost
130 |                         best_medoids = new_medoids
131 |             # If there was a swap that resultet in a lower cost we save the
132 |             # resulting medoids from the best swap and the new cost
133 |             if lowest_cost < cost:
134 |                 cost = lowest_cost
135 |                 medoids = best_medoids
136 |             else:
137 |                 break
138 | 
139 |         final_clusters = self.create_clusters(X, medoids)
140 |         # Return the samples cluster indices as labels
141 |         return self.get_cluster_label(final_clusters, X)
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     data = load_iris()
146 |     X = data.data
147 |     y = data.target
148 |     # Cluster the data using K-Medoids
149 |     X = torch.tensor(X, dtype=torch.float)
150 |     y = torch.tensor(y)
151 |     clf = PAM(k=3)
152 |     y_pred = clf.predict(X)
153 |     print(accuracy_score(y_pred, y))
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/Day-04-KMeans-Clustering/KMeans.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import scipy
  3 | import numpy as np
  4 | from sklearn.datasets import load_iris
  5 | from sklearn.model_selection import train_test_split
  6 | from sklearn.metrics import accuracy_score
  7 | 
  8 | 
  9 | class KMeans:
 10 |     def __init__(self, X, k, iterations):
 11 |         """
 12 |         :param X: input tensor
 13 |         :param k: Number of clusters
 14 |         :variable samples: Number of samples
 15 |         :variable features: Number of features
 16 |         """
 17 |         self.k = k
 18 |         self.max_iterations = iterations
 19 |         self.samples = X.shape[0]
 20 |         self.features = X.shape[1]
 21 |         self.KMeans_Centroids = []
 22 | 
 23 |     # def initialize_centroid(self, X):
 24 |     #     return X[torch.randint(X.shape[0], (self.k,))]
 25 | 
 26 |     def initialize_centroid(self, X, K):
 27 |         """
 28 |         Initialization Technique is KMeans++. Thanks to stackoverflow.
 29 |         https://stackoverflow.com/questions/5466323/how-could-one-implement-the-k-means-algorithm
 30 |         :param X: Input Tensor
 31 |         :param K: Number of clusters to build
 32 |         :return: Selection of three centroid vector from X
 33 |         """
 34 |         I = [0]
 35 |         C = [X[0]]
 36 |         for k in range(1, K):
 37 |             D2 = np.array([min([np.inner(c - x, c - x) for c in C]) for x in X])
 38 |             probs = D2 / D2.sum()
 39 |             cumprobs = probs.cumsum()
 40 |             r = torch.rand(1).item()
 41 | 
 42 |             for j, p in enumerate(cumprobs):
 43 |                 if r < p:
 44 |                     i = j
 45 |                     break
 46 |             I.append(i)
 47 |         return X[I]
 48 | 
 49 |     def distance(self, sample, centroid, dim=None, default="euclidean"):
 50 |         if default == "euclidean":
 51 |             return torch.norm(sample - centroid, 2, 0)
 52 |         elif default == "manhattan":
 53 |             return torch.sum(torch.abs(sample - centroid))
 54 |         elif default == "cosine":
 55 |             return torch.sum(sample * centroid) / (torch.norm(sample) * torch.norm(centroid))
 56 |         else:
 57 |             raise ValueError("Unknown similarity distance type")
 58 | 
 59 |     def closest_centroid(self, sample, centroids):
 60 |         """
 61 |         :param sample: sample whose distance from centroid is to be measured
 62 |         :param centroids: all the centroids of all the clusters
 63 |         :return: centroid's index is passed for each sample
 64 |         """
 65 |         closest = None
 66 |         min_distance = float('inf')
 67 |         for idx, centroid in enumerate(centroids):
 68 |             distance = self.distance(sample, centroid)
 69 |             if distance < min_distance:
 70 |                 closest = idx
 71 |                 min_distance = distance
 72 | 
 73 |         return closest
 74 | 
 75 |     def create_clusters(self, centroids, X):
 76 |         """
 77 |         :param centroids: Centroids of all clusters
 78 |         :param X: Input tensor
 79 |         :return: Assigning each sample to a cluster.
 80 |         """
 81 |         n_samples = X.shape[0]
 82 |         k_clusters = [[] for _ in range(self.k)]
 83 |         for idx, sample in enumerate(X):
 84 |             centroid_index = self.closest_centroid(sample, centroids)
 85 |             k_clusters[centroid_index].append(idx)
 86 | 
 87 |         return k_clusters
 88 | 
 89 |     def update_centroids(self, clusters, X):
 90 |         """
 91 |         :return: Updating centroids after each iteration.
 92 |         """
 93 |         centroids = torch.zeros((self.k, self.features))
 94 |         for idx, cluster in enumerate(clusters):
 95 |             centroid = torch.mean(X[cluster], dim=0)
 96 |             centroids[idx] = centroid
 97 | 
 98 |         return centroids
 99 | 
100 |     def label_clusters(self, clusters, X):
101 |         """
102 |         Labeling the samples with index of clusters
103 |         :return: labeled samples
104 |         """
105 |         y_pred = torch.zeros(X.shape[0])
106 |         for idx, cluster in enumerate(clusters):
107 |             for sample_idx in cluster:
108 |                 y_pred[sample_idx] = idx
109 | 
110 |         return y_pred
111 | 
112 |     def fit(self, X):
113 |         """
114 |         Initializing centroid using Kmeans++, then find distance between each sample and initial centroids, then assign
115 |         cluster label based on min_distance, repeat this process for max_iteration and simultaneously updating
116 |         centroid by calculating distance between sample and updated centroid. Convergence happen when difference between
117 |         previous and updated centroid is None.
118 |         :return: updated centroids of the cluster after max_iterations.
119 |         """
120 |         centroids = self.initialize_centroid(X, self.k)
121 |         for _ in range(self.max_iterations):
122 |             clusters = self.create_clusters(centroids, X)
123 |             previous_centroids = centroids
124 |             centroids = self.update_centroids(clusters, X)
125 |             difference = centroids - previous_centroids
126 | 
127 |             # print(difference)
128 |             if not difference.numpy().any():
129 |                 break
130 | 
131 |         self.KMeans_Centroids = centroids
132 |         return centroids
133 | 
134 |     def predict(self, X):
135 |         """
136 |         :return: label/cluster number for each input sample is returned
137 |         """
138 |         if not self.KMeans_Centroids.numpy().any():
139 |             raise Exception("No Centroids Found. Run KMeans fit")
140 | 
141 |         clusters = self.create_clusters(self.KMeans_Centroids, X)
142 |         labels = self.label_clusters(clusters, X)
143 | 
144 |         return labels
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     iris = load_iris()
149 |     torch.manual_seed(0)
150 |     X = torch.tensor(iris.data, dtype=torch.float)
151 |     y = torch.tensor(iris.target)
152 |     n_classes = len(torch.unique(y))
153 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
154 |     kmeans = KMeans(x_train, k=n_classes, iterations=300)
155 |     kmeans.fit(x_train)
156 |     ypred = kmeans.predict(x_test)
157 |     print(f'Accuracy Score: {accuracy_score(y_test, ypred)}')
158 | 


--------------------------------------------------------------------------------
/Day-18-TSNE/tsne.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reference: https://towardsdatascience.com/t-sne-clearly-explained-d84c537f53a
  3 | Playground: https://distill.pub/2016/misread-tsne/
  4 | Wiki: https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
  5 | """
  6 | import torch
  7 | import logging
  8 | from sklearn.datasets import load_iris, load_digits, load_diabetes
  9 | class TSNE:
 10 |     """
 11 |     The goal is to take a set of points in a high-dimensional space and find a faithful representation of those
 12 |     points in a lower-dimensional space, typically the 2D plane. The algorithm is non-linear and adapts to the
 13 |     underlying data, performing different transformations on different regions. Those differences can be a major
 14 |     source of confusion.
 15 |     """
 16 |     def __init__(self, n_components=2, preplexity=5.0, max_iter=1, learning_rate=200):
 17 |         """
 18 |         :param n_components:
 19 |         :param preplexity: how to balance attention between local and global aspects of your data. The parameter is,
 20 |         in a sense, a guess about the number of close neighbors each point has. Typical value between 5 to 50.
 21 |         With small value of preplexity, the local groups are formed and with increasing preplexity global groups are
 22 |         formed. A perplexity is more or less a target number of neighbors for our central point.
 23 |         :param max_iter: Iterations to stabilize the results and converge.
 24 |         :param learning_rate:
 25 |         """
 26 |         self.max_iter = max_iter
 27 |         self.preplexity = preplexity
 28 |         self.n_components = n_components
 29 |         self.initial_momentum = 0.5
 30 |         self.final_momentum = 0.8
 31 |         self.min_gain = 0.01
 32 |         self.lr = learning_rate
 33 |         self.tol = 1e-5
 34 |         self.preplexity_tries = 50
 35 | 
 36 |     def l2_distance(self, X):
 37 |         """
 38 |         :return: Distance between two vectors
 39 |         """
 40 |         sum_X = torch.sum(X * X, dim=1)
 41 |         return (-2* torch.mm(X, X.T) + sum_X).T + sum_X
 42 | 
 43 |     def get_pairwise_affinities(self, X):
 44 |         """
 45 |         :param X: High dimensional input
 46 |         :return: a (Gaussian) probability distribution over pairs of high-dimensional objects in such a way that similar
 47 |         objects are assigned a higher probability while dissimilar points are assigned a lower probability. To find
 48 |         variance for this distribution we use Binary search. The variance is calculated between fixed preplexity given
 49 |         by the user.
 50 |         """
 51 |         affines = torch.zeros((self.n_samples, self.n_samples), dtype=torch.float32)
 52 |         target_entropy = torch.log(torch.scalar_tensor(self.preplexity))
 53 |         distance = self.l2_distance(X)
 54 |         for i in range(self.n_samples):
 55 |             affines[i, :] = self.binary_search(distance[i], target_entropy)
 56 | 
 57 |         #affines = torch.diagonal(affines).fill_(1.0e-12)
 58 |         affines[torch.eye(affines.shape[0]).byte()] = 1.0e-12
 59 |         affines = affines.clip(min=1e-100)
 60 |         affines = (affines + affines.T)/(2*self.n_samples)
 61 |         return affines
 62 | 
 63 |     def q_distribution(self, D):
 64 |         """
 65 |         A (Student t-distirbution)distribution is learnt in lower dimensional space, n_samples and n_components
 66 |         (2 or 3 dimension), and similar to above method 'get_pairwise_affinities', we find the probability of the
 67 |         data points with high probability for closer points and less probability for disimilar points.
 68 |         """
 69 |         Q = 1.0 / (1.0 + D)
 70 |         Q[torch.eye(Q.shape[0]).byte()] = 0.0
 71 |         Q = Q.clip(min=1e-100)
 72 |         return Q
 73 | 
 74 |     def binary_search(self, dist, target_entropy):
 75 |         """
 76 |         SNE performs a binary search for the value of sigma that produces probability distribution with a fixed
 77 |         perplexity that is specified by the user.
 78 |         """
 79 |         precision_minimum = 0
 80 |         precision_maximum = 1.0e15
 81 |         precision = 1.0e5
 82 | 
 83 |         for _ in range(self.preplexity_tries):
 84 |             denominator = torch.sum(torch.exp(-dist[dist > 0.0] / precision))
 85 |             beta = torch.exp(-dist / precision) / denominator
 86 | 
 87 |             g_beta = beta[beta > 0.0]
 88 |             # Shannon Entropy
 89 |             entropy = -torch.sum(g_beta * torch.log2(g_beta))
 90 |             error = entropy - target_entropy
 91 | 
 92 |             if error > 0:
 93 |                 precision_maximum = precision
 94 |                 precision = (precision + precision_minimum) / 2.0
 95 |             else:
 96 |                 precision_minimum = precision
 97 |                 precision = (precision + precision_maximum) / 2.0
 98 | 
 99 |             if torch.abs(error) < self.tol:
100 |                 break
101 | 
102 |         return beta
103 | 
104 |     def fit_transform(self, X):
105 |         self.n_samples, self.n_features =  X.shape[0], X.shape[1]
106 |         Y = torch.randn(self.n_samples, self.n_components)
107 |         velocity = torch.zeros_like(Y)
108 |         gains = torch.ones_like(Y)
109 |         P = self.get_pairwise_affinities(X)
110 | 
111 |         iter_num = 0
112 |         while iter_num < self.max_iter:
113 |             iter_num += 1
114 |             D = self.l2_distance(Y)
115 |             Q = self.q_distribution(D)
116 |             Q_n = Q /torch.sum(Q)
117 | 
118 |             pmul = 4.0 if iter_num < 100 else 1.0
119 |             momentum = 0.5 if iter_num < 20 else 0.8
120 | 
121 |             grads = torch.zeros(Y.shape)
122 |             for i in range(self.n_samples):
123 |                 """
124 |                 Optimization using gradient to converge between the true P and estimated Q distrbution.
125 |                 """
126 |                 grad = 4 * torch.mm(((pmul * P[i] - Q_n[i]) * Q[i]).unsqueeze(0), Y[i] -Y)
127 |                 grads[i] = grad
128 | 
129 |             gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ((grads > 0) == (velocity > 0))
130 |             gains = gains.clip(min=self.min_gain)
131 | 
132 |             velocity = momentum * velocity - self.lr * (gains * grads)
133 |             Y += velocity
134 |             Y = Y - torch.mean(Y, 0)
135 |             error = torch.sum(P * torch.log(P/Q_n))
136 |             print("Iteration %s, error %s" % (iter_num, error))
137 |         return Y
138 | 
139 | if __name__ == '__main__':
140 |     data = load_diabetes()
141 |     torch.manual_seed(42)
142 |     X = torch.tensor(data.data, dtype=torch.double)
143 |     print(max(X[1,:]))
144 |     y = torch.tensor(data.target)
145 |     print(y.shape)
146 |     tsne = TSNE(n_components=2)
147 |     tsne.fit_transform(X)
148 | 


--------------------------------------------------------------------------------
/Day-10-Lasso-Ridge-Regression/Lasso_Ridge_Regression.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reference: https://github.com/eriklindernoren/ML-From-Scratch
  3 | This github repository implements high quality code as we see in official libraries like sklearn etc.
  4 | Great reference to kickstart your journey for ML programming.
  5 | """
  6 | import torch
  7 | from sklearn.datasets import load_boston
  8 | from itertools import combinations_with_replacement
  9 | from sklearn.metrics import accuracy_score
 10 | from sklearn.model_selection import train_test_split
 11 | import seaborn as sb
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | class LassoRegularization:
 15 |     def __init__(self, alpha):
 16 |         """
 17 |         :param alpha:
 18 |         * When 0, the lasso regression turns into Linear Regression
 19 |         * When increases towards infinity, it turns features coefficients into zero.
 20 |         * Try out different value to find out optimized values.
 21 |         """
 22 |         self.alpha = alpha
 23 | 
 24 |     def __call__(self, w):
 25 |         """
 26 |         :param w: Weight vector
 27 |         :return: Penalization value for MSE
 28 |         """
 29 |         return self.alpha * torch.norm(w, p=1)
 30 | 
 31 |     def grad(self, w):
 32 |         """
 33 |         :param w: weight vector
 34 |         :return: weight update based on sign value, it helps in removing coefficients from W vector
 35 |         torch.sign:
 36 |         a
 37 |         tensor([ 0.7000, -1.2000,  0.0000,  2.3000])
 38 |         torch.sign(a)
 39 |         tensor([ 1., -1.,  0.,  1.])
 40 |         """
 41 |         return self.alpha * torch.sign(w)
 42 | 
 43 | class RidgeRegularization:
 44 |     def __init__(self, alpha):
 45 |         """
 46 |         :param alpha:
 47 |         * When 0, the lasso regression turns into Linear Regression
 48 |         * When increases towards infinity, it turns features coefficients into zero.
 49 |         * Try out different value to find out optimized values.
 50 |         """
 51 |         self.alpha = alpha
 52 | 
 53 |     def __call__(self, w):
 54 |         """
 55 |         :param w: Weight vector
 56 |         :return: Penalization value for MSE
 57 |         """
 58 |         return self.alpha * 0.5 * torch.mm(w.T, w)
 59 | 
 60 |     def grad(self, w):
 61 |         """
 62 |         :param w: weight vector
 63 |         :return: weight update based on sign value, it helps in reducing the coefficient values from W vector
 64 |         """
 65 |         return self.alpha * w
 66 | 
 67 | class Regression:
 68 |     def __init__(self, learning_rate, epochs, regression_type='lasso'):
 69 |         """
 70 |         :param learning_rate: constant step while updating weight
 71 |         :param epochs: Number of epochs the data is passed through the model
 72 |         Initalizing regularizer for Lasso Regression.
 73 |         """
 74 |         self.lr = learning_rate
 75 |         self.epochs = epochs
 76 |         if regression_type == 'lasso':
 77 |             self.regularization = LassoRegularization(alpha=1.0)
 78 |         else:
 79 |             self.regularization = RidgeRegularization(alpha=2.0)
 80 | 
 81 |     def normalization(self, X):
 82 |         """
 83 |         :param X: Input tensor
 84 |         :return: Normalized input using l2 norm.
 85 |         """
 86 |         l2 = torch.norm(X, p=2, dim=-1)
 87 |         l2[l2 == 0] = 1
 88 |         return X / l2.unsqueeze(1)
 89 | 
 90 |     def polynomial_features(self, X, degree):
 91 |         """
 92 |         It creates polynomial features from existing set of features. For instance,
 93 |         X_1, X_2, X_3 are available features, then polynomial features takes combinations of
 94 |         these features to create new feature by doing X_1*X_2, X_1*X_3, X_2*X3.
 95 | 
 96 |         combinations output: [(), (0,), (1,), (2,), (3,), (0, 0), (0, 1), (0, 2), (0, 3),
 97 |         (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
 98 |         :param X: Input tensor (For Iris Dataset, (150, 4))
 99 |         :param degree: Polynomial degree of 2, i.e we'll have product of two feature vector at max.
100 |         :return: Output tensor (After adding polynomial features, the number of features increases to 15)
101 |         """
102 |         n_samples, n_features = X.shape[0], X.shape[1]
103 |         def index_combination():
104 |             combinations = [combinations_with_replacement(range(n_features), i) for i in range(0, degree+1)]
105 |             flat_combinations = [item for sublists in combinations for item in sublists]
106 |             return flat_combinations
107 | 
108 |         combinations = index_combination()
109 |         n_output_features = len(combinations)
110 |         X_new = torch.empty((n_samples, n_output_features))
111 | 
112 |         for i, index_combs in enumerate(combinations):
113 |             X_new[:, i] = torch.prod(X[:, index_combs], dim=1)
114 | 
115 |         X_new = X_new.type(torch.DoubleTensor)
116 |         return X_new
117 | 
118 |     def weight_initialization(self, n_features):
119 |         """
120 |         :param n_features: Number of features in the data
121 |         :return: creating weight vector using uniform distribution.
122 |         """
123 |         limit = 1 / torch.sqrt(torch.scalar_tensor(n_features))
124 |         #self.w = torch.FloatTensor((n_features,)).uniform(-limit, limit)
125 |         self.w = torch.distributions.uniform.Uniform(-limit, limit).sample((n_features, 1))
126 |         self.w = self.w.type(torch.DoubleTensor)
127 | 
128 |     def fit(self, X, y):
129 |         """
130 |         :param X: Input tensor
131 |         :param y: ground truth tensor
132 |         :return: updated weight vector for prediction
133 |         """
134 |         self.training_error = {}
135 |         self.weight_initialization(n_features=X.shape[1])
136 |         for epoch in range(1, self.epochs+1):
137 |             y_pred = torch.mm(X, self.w)
138 |             mse = torch.mean(0.5 * (y - y_pred)**2 + self.regularization(self.w))
139 |             self.training_error[epoch] = mse.item()
140 |             grad_w = torch.mm(-(y - y_pred).T, X).T + self.regularization.grad(self.w)
141 |             self.w -= self.lr * grad_w
142 | 
143 | 
144 |     def predict(self, X):
145 |         """
146 |         :param X: input tensor
147 |         :return: predicted output using learned weight vector
148 |         """
149 |         y_pred = torch.mm(X, self.w)
150 |         return y_pred
151 | 
152 | if __name__ == '__main__':
153 |     boston = load_boston()
154 |     torch.manual_seed(0)
155 |     X = torch.tensor(boston.data, dtype=torch.double)
156 |     y = torch.tensor(boston.target, dtype=torch.double).unsqueeze(1)
157 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
158 |     regression = Regression(learning_rate=0.0001, epochs=3000, regression_type='lasso')
159 |     regression.fit(regression.normalization(regression.polynomial_features(x_train, degree=1)), y_train)
160 |     y_pred = regression.predict(regression.normalization(regression.polynomial_features(x_test, degree=1)))
161 |     plt.figure(figsize=(6, 6))
162 |     sb.scatterplot(list(regression.training_error.keys()), list(regression.training_error.values()))
163 |     plt.show()
164 | 


--------------------------------------------------------------------------------
/Day-11-Gaussian-Mixture-Model/gmm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Blog post GMM: https://brilliant.org/wiki/gaussian-mixture-model/
  3 | """
  4 | import torch
  5 | import math
  6 | from sklearn.datasets import load_iris
  7 | from sklearn.metrics import accuracy_score
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | class GMM:
 11 |     def __init__(self, k, max_epochs=100, tolerance=1e-8):
 12 |         """
 13 |         :param k: the number of clusters the algorithm will form.
 14 |         :param max_epochs: The number of iterations the algorithm will run for if it does
 15 |         not converge before that.
 16 |         :param tolerance: float
 17 |         If the difference of the results from one iteration to the next is
 18 |         smaller than this value we will say that the algorithm has converged.
 19 |         """
 20 |         self.k = k
 21 |         self.parameters = []
 22 |         self.max_epochs = max_epochs
 23 |         self.tolerance = tolerance
 24 |         self.responsibility = None
 25 |         self.responsibilities = []
 26 |         self.sample_assignments = None
 27 | 
 28 |     def normalization(self, X):
 29 |         """
 30 |         :param X: Input tensor
 31 |         :return: Normalized input using l2 norm.
 32 |         """
 33 |         l2 = torch.norm(X, p=2, dim=-1)
 34 |         l2[l2 == 0] = 1
 35 |         return X / l2.unsqueeze(1)
 36 | 
 37 |     def covariance_matrix(self, X):
 38 |         """
 39 |         :param X: Input tensor
 40 |         :return: cavariance of input tensor
 41 |         """
 42 |         centering_X = X - torch.mean(X, dim=0)
 43 |         cov = torch.mm(centering_X.T, centering_X) / (centering_X.shape[0] - 1)
 44 |         return cov
 45 | 
 46 |     def random_gaussian_initialization(self, X):
 47 |         """
 48 |         Since we are using iris dataset, we know the no. of class is 3.
 49 |         We create three gaussian distribution representing each class with
 50 |         random sampling of data to find parameters like μ and 𝚺/N (covariance matrix)
 51 |         for each class
 52 |         :param X: input tensor
 53 |         :return: 3 randomly selected mean and covariance of X, each act as a separate cluster
 54 |         """
 55 |         n_samples = X.shape[0]
 56 |         self.prior = (1 / self.k) * torch.ones(self.k)
 57 |         for cls in range(self.k):
 58 |             parameter = {}
 59 |             parameter['mean'] = X[torch.randperm(n_samples)[:1]]
 60 |             parameter['cov'] = self.covariance_matrix(X)
 61 |             self.parameters.append(parameter)
 62 | 
 63 |     def multivariate_gaussian_distribution(self, X, parameters):
 64 |         """
 65 |         Checkout the equation from Multi-Dimensional Model from blog link posted above.
 66 |         We find the likelihood of each sample w.r.t to the parameters initialized above for each separate cluster.
 67 |         :param X: Input tensor
 68 |         :param parameters: mean, cov of the randomly initialized gaussian
 69 |         :return: Likelihood of each sample belonging to a cluster with random initialization of mean and cov.
 70 |         Since it is a multivariate problem we have covariance and not variance.
 71 |         """
 72 |         n_features = X.shape[1]
 73 |         mean = parameters['mean']
 74 |         cov = parameters['cov']
 75 |         determinant = torch.det(cov)
 76 |         likelihoods = torch.zeros(X.shape[0])
 77 |         for i, sample in enumerate(X):
 78 |             dim = torch.scalar_tensor(n_features, dtype=torch.float)
 79 |             coefficients = 1.0/ torch.sqrt(torch.pow((2.0 * math.pi), dim) * determinant)
 80 |             exponent = torch.exp( -0.5 * torch.mm(torch.mm((sample - mean) ,torch.pinverse(cov)) , (sample - mean).T))
 81 |             likelihoods[i] = coefficients * exponent
 82 | 
 83 |         return likelihoods
 84 | 
 85 |     def get_likelihood(self, X):
 86 |         """
 87 |         Previously, we have initialized 3 different mean and covariance in random_gaussian_initialization(). Now around
 88 |         each of these mean and cov, we see likelihood of the each sample using multivariate gaussian distribution.
 89 |         :param X:
 90 |         :return: Storing the likelihood of each sample belonging to a cluster with random initialization of mean and cov.
 91 |         Since it is a multivariate problem we have covariance and not variance.
 92 |         """
 93 |         n_samples = X.shape[0]
 94 |         likelihoods_cls = torch.zeros((n_samples, self.k))
 95 |         for cls in range(self.k):
 96 |             likelihoods_cls[:, cls] = self.multivariate_gaussian_distribution(X, self.parameters[cls])
 97 | 
 98 |         return likelihoods_cls
 99 | 
100 |     def expectation(self, X):
101 |         """
102 |         Expectation Maximization Algorithm is used to find the optimized value of randomly initialized mean and cov.
103 |         Expectation refers to probability. Here, It calculates the probabilities of X belonging to different cluster.
104 |         :param X: input tensor
105 |         :return: Max probability of each sample belonging to a particular class.
106 |         """
107 |         weighted_likelihood = self.get_likelihood(X) * self.prior
108 |         sum_likelihood =  torch.sum(weighted_likelihood, dim=1).unsqueeze(1)
109 |         # Determine responsibility as P(X|y)*P(y)/P(X)
110 |         # responsibility stores each sample's probability score corresponding to each class
111 |         self.responsibility = weighted_likelihood /sum_likelihood
112 |         # Assign samples to cluster that has largest probability
113 |         self.sample_assignments = self.responsibility.argmax(dim=1)
114 |         # Save value for convergence check
115 |         self.responsibilities.append(torch.max(self.responsibility, dim=1))
116 | 
117 |     def maximization(self, X):
118 |         """
119 |         Iterate through clusters and updating mean and covariance.
120 |         Finding updated mean and covariance using probability score of each sample w.r.t each class
121 |         :param X:
122 |         :return: Updated mean, covariance and priors
123 |         """
124 |         for i in range(self.k):
125 |             resp = self.responsibility[:, i].unsqueeze(1)
126 |             mean = torch.sum(resp * X, dim=0) / torch.sum(resp)
127 |             covariance = torch.mm((X - mean).T, (X - mean) * resp) / resp.sum()
128 |             self.parameters[i]['mean'], self.parameters[i]['cov'] = mean.unsqueeze(0), covariance
129 | 
130 |         n_samples = X.shape[0]
131 |         self.prior = self.responsibility.sum(dim=0) / n_samples
132 | 
133 |     def convergence(self, X):
134 |         """Convergence if || likehood - last_likelihood || < tolerance """
135 |         if len(self.responsibilities) < 2:
136 |             return False
137 |         difference = torch.norm(self.responsibilities[-1].values - self.responsibilities[-2].values)
138 |         return difference <= self.tolerance
139 | 
140 |     def predict(self, X):
141 |         self.random_gaussian_initialization(X)
142 | 
143 |         for _ in range(self.max_epochs):
144 |             self.expectation(X)
145 |             self.maximization(X)
146 |             break
147 | 
148 |             if self.convergence(X):
149 |                 break
150 | 
151 |         self.expectation(X)
152 |         return self.sample_assignments
153 | 
154 | if __name__ == '__main__':
155 |     iris = load_iris()
156 |     torch.manual_seed(0)
157 |     X = torch.tensor(iris.data, dtype=torch.float)
158 |     y = torch.tensor(iris.target)
159 |     n_classes = len(torch.unique(y))
160 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
161 |     gmm = GMM(k=n_classes, max_epochs=2000)
162 |     y_pred = gmm.predict(x_train)
163 |     print(f'Accuracy Score: {accuracy_score(y_train, y_pred)}')
164 | 
165 | 


--------------------------------------------------------------------------------
/Day-03-Decision-Tree/DecisionTree.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from sklearn.datasets import load_breast_cancer
  3 | from sklearn.model_selection import train_test_split
  4 | from sklearn.metrics import accuracy_score
  5 | 
  6 | 
  7 | class Node:
  8 |     def __init__(self, gini, num_samples, num_samples_per_class, predicted_class):
  9 |         self.gini = gini
 10 |         self.num_samples = num_samples
 11 |         self.num_samples_per_class = num_samples_per_class
 12 |         self.predicted_class = predicted_class
 13 |         self.feature_index = 0
 14 |         self.threshold = 0
 15 |         self.left = None
 16 |         self.right = None
 17 | 
 18 | class DecisionTree_CART:
 19 |     def __init__(self, max_depth=None):
 20 |         self.max_depth = max_depth
 21 | 
 22 |     def fit(self, X, y):
 23 |         """Build decision tree classifier
 24 |         :argument X: Input Tensor
 25 |         :argument y: ground truth Tensor
 26 |         :variable n_classes_: Number of Classes in target variable
 27 |         :variable n_features_: Number of features
 28 |         :variable tree_: Making decision tree based on X, y along with max_depth
 29 |         """
 30 |         self.n_classes_ = len(y.unique())  # classes are assumed to go from 0 to n-1
 31 |         self.n_features_ = X.shape[1]
 32 |         self.tree_ = self._grow_tree(X, y)
 33 | 
 34 |     def _gini(self, y):
 35 |         """Compute Gini impurity of a non-empty node.
 36 |         Gini impurity is defined as Σ p(1-p) over all classes, with p the frequency of a
 37 |         class within the node. Since Σ p = 1, this is equivalent to 1 - Σ p^2.
 38 | 
 39 |         :var m: Sample Size
 40 |         """
 41 |         m = y.shape[0]
 42 | 
 43 |         return 1.0 - sum((torch.sum(y == c).item() // m) ** 2 for c in range(self.n_classes_))
 44 | 
 45 |     def _best_split(self, X, y):
 46 |         """Find the best split for a node.
 47 |         "Best" means that the average impurity of the two children, weighted by their
 48 |         population, is the smallest possible. Additionally it must be less than the
 49 |         impurity of the current node.
 50 |         To find the best split, we loop through all the features, and consider all the
 51 |         midpoints between adjacent training samples as possible thresholds. We compute
 52 |         the Gini impurity of the split generated by that particular feature/threshold
 53 |         pair, and return the pair with smallest impurity.
 54 |         Returns:
 55 |             best_idx: Index of the feature for best split, or None if no split is found.
 56 |             best_thr: Threshold to use for the split, or None if no split is found.
 57 |         """
 58 |         # Need at least two elements to split a node.
 59 |         m = y.shape[0]
 60 |         if m <= 1:
 61 |             return None, None
 62 | 
 63 |         # Count of each class in the current node.
 64 |         num_parent = [torch.sum(y == c).item() for c in range(self.n_classes_)]
 65 |         print(f'num_parent {num_parent}')
 66 | 
 67 |         # Gini of current node.
 68 |         best_gini = 1.0 - sum((n // m) ** 2 for n in num_parent)
 69 |         best_idx, best_thr = None, None
 70 | 
 71 |         # Loop through all features.
 72 |         for idx in range(self.n_features_):
 73 |             # Sort data along selected feature.
 74 |             thresholds, classes = zip(*sorted(zip(X[:, idx], y)))
 75 | 
 76 |             # We could actually split the node according to each feature/threshold pair
 77 |             # and count the resulting population for each class in the children, but
 78 |             # instead we compute them in an iterative fashion, making this for loop
 79 |             # linear rather than quadratic.
 80 |             num_left = [0] * self.n_classes_
 81 |             num_right = num_parent.copy()
 82 |             for i in range(1, m):  # possible split positions
 83 |                 c = classes[i - 1]
 84 |                 num_left[c] += 1
 85 |                 num_right[c] -= 1
 86 |                 gini_left = 1.0 - sum(
 87 |                     (num_left[x] / i) ** 2 for x in range(self.n_classes_)
 88 |                 )
 89 |                 gini_right = 1.0 - sum(
 90 |                     (num_right[x] // (m - i)) ** 2 for x in range(self.n_classes_)
 91 |                 )
 92 | 
 93 |                 # The Gini impurity of a split is the weighted average of the Gini
 94 |                 # impurity of the children.
 95 |                 gini = (i * gini_left + (m - i) * gini_right) / m
 96 | 
 97 |                 # The following condition is to make sure we don't try to split two
 98 |                 # points with identical values for that feature, as it is impossible
 99 |                 # (both have to end up on the same side of a split).
100 |                 if thresholds[i] == thresholds[i - 1]:
101 |                     continue
102 | 
103 |                 if gini < best_gini:
104 |                     best_gini = gini
105 |                     best_idx = idx
106 |                     best_thr = (thresholds[i] + thresholds[i - 1]) / 2  # midpoint
107 | 
108 |         print("Best Index and Threshold",best_idx, best_thr)
109 | 
110 |         return best_idx, best_thr
111 | 
112 |     def _grow_tree(self, X, y, depth=0):
113 |         """Build a decision tree by recursively finding the best split."""
114 |         # Population for each class in current node. The predicted class is the one with
115 |         # largest population.
116 |         num_samples_per_class = torch.tensor([torch.sum(y == i) for i in range(self.n_classes_)])
117 |         predicted_class = torch.argmax(num_samples_per_class)
118 |         node = Node(
119 |             gini=self._gini(y),
120 |             num_samples=y.shape[0],
121 |             num_samples_per_class=num_samples_per_class,
122 |             predicted_class=predicted_class,
123 |         )
124 | 
125 |         # Split recursively until maximum depth is reached.
126 |         if depth < self.max_depth:
127 |             idx, thr = self._best_split(X, y)
128 |             if idx is not None:
129 |                 indices_left = X[:, idx] < thr
130 |                 X_left, y_left = X[indices_left], y[indices_left]
131 |                 X_right, y_right = X[~indices_left], y[~indices_left]
132 |                 node.feature_index = idx
133 |                 node.threshold = thr
134 |                 node.left = self._grow_tree(X_left, y_left, depth + 1)
135 |                 node.right = self._grow_tree(X_right, y_right, depth + 1)
136 |         return node
137 | 
138 |     def predict(self, X):
139 |         return [self._predict(inputs) for inputs in X]
140 | 
141 |     def _predict(self, inputs):
142 |         """Predict class for a single sample."""
143 |         node = self.tree_
144 |         while node.left:
145 |             if inputs[node.feature_index] < node.threshold:
146 |                 node = node.left
147 |             else:
148 |                 node = node.right
149 |         return node.predicted_class
150 | 
151 | if __name__ == "__main__":
152 |     """
153 |     :variable X: Input tensor with 30 features
154 |     :target y: Output tensor with 2 classes
155 |     
156 |     * Converting Numpy array into torch tensor.
157 |     * Creating DecisionTree Object with max_depth 5.
158 |     * Fit and predict with DecisionTree Object.
159 |     """
160 |     breast_cancer = load_breast_cancer()
161 |     X = breast_cancer['data']
162 |     y = breast_cancer['target']
163 |     X = torch.tensor(X)
164 |     y = torch.tensor(y)
165 | 
166 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
167 |     classifier = DecisionTree_CART(max_depth=5)
168 |     classifier.fit(x_train, y_train)
169 |     y_predict = classifier.predict(x_test)
170 | 
171 |     print(f'Accuracy: {accuracy_score(y_test, y_predict)}')
172 | 


--------------------------------------------------------------------------------
/Day-16-Bayesian-Regression/BayesianRegression.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Checkout the below url to understand, how Bayesian regression differs from Linear Regression
  3 | https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7
  4 | https://dzone.com/articles/bayesian-learning-for-machine-learning-part-ii-lin
  5 | """
  6 | import pandas as pd
  7 | import torch
  8 | from scipy.stats import chi2, multivariate_normal
  9 | from sklearn.model_selection import train_test_split
 10 | from itertools import combinations_with_replacement
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | def mean_squared_error(y_true, y_pred):
 14 |     """ Returns the mean squared error between y_true and y_pred """
 15 |     mse = torch.mean(torch.pow(y_true - y_pred, 2))
 16 |     return mse
 17 | 
 18 | def polynomial_features(X, degree):
 19 |     """
 20 |     It creates polynomial features from existing set of features. For instance,
 21 |     X_1, X_2, X_3 are available features, then polynomial features takes combinations of
 22 |     these features to create new feature by doing X_1*X_2, X_1*X_3, X_2*X3.
 23 | 
 24 |     For Degree 2:
 25 |     combinations output: [(), (0,), (1,), (2,), (3,), (0, 0), (0, 1), (0, 2), (0, 3),
 26 |     (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
 27 |     :param X: Input tensor (For Iris Dataset, (150, 4))
 28 |     :param degree: Polynomial degree of 2, i.e we'll have product of two feature vector at max.
 29 |     :return: Output tensor (After adding polynomial features, the number of features increases to 15)
 30 |     """
 31 |     n_samples, n_features = X.shape[0], X.shape[1]
 32 |     def index_combination():
 33 |         combinations = [combinations_with_replacement(range(n_features), i) for i in range(0, degree+1)]
 34 |         flat_combinations = [item for sublists in combinations for item in sublists]
 35 |         return flat_combinations
 36 | 
 37 |     combinations = index_combination()
 38 |     n_output_features = len(combinations)
 39 |     X_new = torch.empty((n_samples, n_output_features))
 40 | 
 41 |     for i, index_combs in enumerate(combinations):
 42 |         X_new[:, i] = torch.prod(X[:, index_combs], dim=1)
 43 | 
 44 |     X_new = X_new.type(torch.DoubleTensor)
 45 |     return X_new
 46 | 
 47 | 
 48 | class BayesianRegression:
 49 |     def __init__(self, n_draws, mu_0, omega_0, nu_0, sigma_sq_0, polynomial_degree=0, credible_interval=95):
 50 |         """
 51 |         Bayesian regression model. If poly_degree is specified the features will
 52 |         be transformed to with a polynomial basis function, which allows for polynomial
 53 |         regression. Assumes Normal prior and likelihood for the weights and scaled inverse
 54 |         chi-squared prior and likelihood for the variance of the weights.
 55 | 
 56 |         :param n_draws:  The number of simulated draws from the posterior of the parameters.
 57 |         :param mu_0:  The mean values of the prior Normal distribution of the parameters.
 58 |         :param omega_0: The precision matrix of the prior Normal distribution of the parameters.
 59 |         :param nu_0: The degrees of freedom of the prior scaled inverse chi squared distribution.
 60 |         :param sigma_sq_0: The scale parameter of the prior scaled inverse chi squared distribution.
 61 |         :param polynomial_degree: The polynomial degree that the features should be transformed to. Allows
 62 |         for polynomial regression.
 63 |         :param credible_interval: The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior
 64 |         of the parameters.
 65 |         """
 66 |         self.n_draws = n_draws
 67 |         self.polynomial_degree = polynomial_degree
 68 |         self.credible_interval = credible_interval
 69 | 
 70 |         # Prior parameters
 71 |         self.mu_0 = mu_0
 72 |         self.omega_0 = omega_0
 73 |         self.nu_0 = nu_0
 74 |         self.sigma_sq_0 = sigma_sq_0
 75 | 
 76 |     def scaled_inverse_chi_square(self, n, df, scale):
 77 |         """
 78 |         Allows for simulation from the scaled inverse chi squared
 79 |         distribution. Assumes the variance is distributed according to
 80 |         this distribution.
 81 |         :param n:
 82 |         :param df:
 83 |         :param scale:
 84 |         :return:
 85 |         """
 86 |         X = chi2.rvs(size=n, df=df)
 87 |         sigma_sq = df * scale / X
 88 |         return sigma_sq
 89 | 
 90 |     def fit(self, X, y):
 91 |         # For polynomial transformation
 92 |         if self.polynomial_degree:
 93 |             X = polynomial_features(X, degree=self.polynomial_degree)
 94 | 
 95 |         n_samples, n_features = X.shape[0], X.shape[1]
 96 |         X_X_T = torch.mm(X.T, X)
 97 | 
 98 |         # Least squares approximate of beta
 99 |         beta_hat = torch.mm(torch.mm(torch.pinverse(X_X_T), X.T), y)
100 | 
101 |         # The posterior parameters can be determined analytically since we assume
102 |         # conjugate priors for the likelihoods.
103 |         # Normal prior / likelihood => Normal posterior
104 |         mu_n = torch.mm(torch.pinverse(X_X_T + self.omega_0), torch.mm(X_X_T, beta_hat) + torch.mm(self.omega_0, self.mu_0.unsqueeze(1)))
105 |         omega_n = X_X_T + self.omega_0
106 |         nu_n = self.nu_0 + n_samples
107 | 
108 |         # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior
109 |         sigma_sq_n = (1.0/nu_n) * (self.nu_0 * self.sigma_sq_0 + torch.mm(y.T, y) + torch.mm(torch.mm(self.mu_0.unsqueeze(1).T, self.omega_0), self.mu_0.unsqueeze(1)) - torch.mm(mu_n.T, torch.mm(omega_n, mu_n)))
110 | 
111 |         # Simulate parameter values for n_draws
112 |         beta_draws = torch.empty((self.n_draws, n_features))
113 |         for i in range(self.n_draws):
114 |             sigma_sq = self.scaled_inverse_chi_square(n=1, df=nu_n, scale=sigma_sq_n)
115 |             beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq * torch.pinverse(omega_n))
116 |             beta_draws[1, :] = torch.tensor(beta,dtype=torch.float)
117 | 
118 |         # Select the mean of the simulated variables as the ones used to make predictions
119 |         self.w = torch.mean(beta_draws, dim=0, dtype=torch.double)
120 | 
121 |         # Lower and upper boundary of the credible interval
122 |         l_eti = 0.50 - self.credible_interval / 2
123 |         u_eti = 0.50 + self.credible_interval / 2
124 |         self.eti = torch.tensor([[torch.quantile(beta_draws[:, i], q=l_eti), torch.quantile(beta_draws[:, i], q=u_eti)] for i in range(n_features)], dtype=torch.double)
125 | 
126 |     def predict(self, X, eti=False):
127 |         if self.polynomial_degree:
128 |             X = polynomial_features(X, degree=self.polynomial_degree)
129 |         y_pred = torch.mm(X, self.w.unsqueeze(1))
130 |         # If the lower and upper boundaries for the 95%
131 |         # equal tail interval should be returned
132 |         if eti:
133 |             lower_w = self.eti[:, 0]
134 |             upper_w = self.eti[:, 1]
135 | 
136 |             y_lower_prediction = torch.mm(X, lower_w.unsqueeze(1))
137 |             y_upper_prediction = torch.mm(X, upper_w.unsqueeze(1))
138 | 
139 |             return y_pred, y_lower_prediction, y_upper_prediction
140 | 
141 |         return y_pred
142 | 
143 | if __name__ == '__main__':
144 |     data = pd.read_csv('temp.txt', sep="\t")
145 |     X = torch.tensor(data["time"].values).unsqueeze(0).T
146 |     y = torch.tensor(data["temp"].values).unsqueeze(0).T
147 |     x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
148 |     n_samples, n_features = X.shape[0], X.shape[1]
149 |     mu_0 = torch.zeros(n_features, dtype=torch.double)
150 |     omega_0 = torch.diag(torch.tensor([0.0001] * n_features, dtype=torch.double))
151 |     nu_0 = 1
152 |     sigma_sq_0 = 100
153 |     credible_interval = 0.40
154 |     classifier = BayesianRegression(n_draws=2000,
155 |                                     polynomial_degree=4,
156 |                                     mu_0=mu_0,
157 |                                     omega_0=omega_0,
158 |                                     nu_0=nu_0,
159 |                                     sigma_sq_0=sigma_sq_0,
160 |                                     credible_interval=credible_interval)
161 |     classifier.fit(x_train, y_train)
162 |     y_pred = classifier.predict(x_test)
163 |     mse = mean_squared_error(y_test, y_pred)
164 |     y_pred_, y_lower_, y_upper_ = classifier.predict(X=X, eti=True)
165 |     print("Mean Squared Error:", mse)
166 |     #
167 |     # Color map
168 |     cmap = plt.get_cmap('viridis')
169 | 
170 |     # Plot the results
171 |     m1 = plt.scatter(366 * x_train, y_train, color=cmap(0.9), s=10)
172 |     m2 = plt.scatter(366 * x_test, y_test, color=cmap(0.5), s=10)
173 |     p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction")
174 |     p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(credible_interval))
175 |     p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2)
176 |     plt.axis((0, 366, -20, 25))
177 |     plt.suptitle("Bayesian Regression")
178 |     plt.title("MSE: %.2f" % mse, fontsize=10)
179 |     plt.xlabel('Day')
180 |     plt.ylabel('Temperature in Celcius')
181 |     plt.legend(loc='lower right')
182 |     # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
183 |     plt.legend(loc='lower right')
184 | 
185 |     plt.show()
186 | 


--------------------------------------------------------------------------------