├── .gitignore ├── .DS_Store ├── Naive Bayes ├── Naive Bayes.jpg ├── README.md └── naive_bayes.py ├── Hidden Markov Model ├── .DS_Store ├── assets │ ├── model.png │ ├── state.png │ ├── initial.png │ └── observation.png ├── outputs │ ├── HMM.dot.png │ └── HMM.dot ├── main.py └── README.md ├── Decision Tree ├── Decision_Tree.jpg ├── Decision_Tree.png └── README.md ├── Multilayer Perceptron ├── multi_layer_perceptron.jpg ├── multi_layer_perceptron2.jpg └── README.md ├── Perceptron ├── __pycache__ │ └── perceptron_training.cpython-38.pyc ├── perceptron_test.py ├── perceptron_training.py └── README.md ├── Apriori ├── GroceryStoreDataSet.csv ├── README.md └── apriori.py ├── Elastic Net ├── Salary_Data.csv ├── README.md └── Elastic_Net_Regression.py ├── Random Forest ├── randomForestTest.py ├── README.md └── randomForest.py ├── LICENSE ├── Principal Component Analaysis ├── PCA.py └── README.md ├── K Nearest Neighbors ├── k-nearest neighbors (KNN).py └── README.md ├── Spectral Clustering ├── README.md └── spectral_clustering.py ├── Ridge Regression ├── Ridge Regression- Base.py └── README.md ├── Multiple Linear Regression ├── README.md └── multiple_linear_regression_implementation.py ├── Hierarchical Clustering ├── implementation.py └── README.md ├── DBSCAN ├── dbscan.py └── README.md ├── BIRCH Clustering └── README.md ├── Lasso Regression ├── Lasso_Regression.py └── README.md ├── stochastic gradient descent ├── stochastic_gradient_descent_algo.py └── README.md ├── FP-Growth └── README.md ├── Lowess Regression ├── README.md └── lowessregression.py ├── Mini Batch K-means Clustering └── README.md ├── CONTRIBUTING.md ├── K-Means ├── kmeans.py └── README.md ├── Neural Network ├── neural_network.py └── README.md ├── Linear Regression ├── Linear_Regression.py └── README.md ├── Preprocessing ├── standard_scaler.py └── min_max_scaler.py ├── Markov's Chain ├── Markov's-Chain.py ├── Readme.md └── Trump-Speech.txt ├── CODE_OF_CONDUCT.md ├── Genetic Algorithm ├── genetic_algorithm.py └── README.md ├── Gaussian Mixture Model └── GaussianMixtureModel.py ├── Adaboost └── Iris.csv ├── Bayesian Regression ├── bayessian_regression.py └── README.md ├── Logistic Regression ├── Logistic_Regression_base.py └── README.md ├── Support Vector Machine └── SVM_Linear_Kernal_&_documentation.py ├── README.md └── XGBoost └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | testing_file.py 2 | *.pyc 3 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/.DS_Store -------------------------------------------------------------------------------- /Naive Bayes/Naive Bayes.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Naive Bayes/Naive Bayes.jpg -------------------------------------------------------------------------------- /Hidden Markov Model/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Hidden Markov Model/.DS_Store -------------------------------------------------------------------------------- /Decision Tree/Decision_Tree.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Decision Tree/Decision_Tree.jpg -------------------------------------------------------------------------------- /Decision Tree/Decision_Tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Decision Tree/Decision_Tree.png -------------------------------------------------------------------------------- /Hidden Markov Model/assets/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Hidden Markov Model/assets/model.png -------------------------------------------------------------------------------- /Hidden Markov Model/assets/state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Hidden Markov Model/assets/state.png -------------------------------------------------------------------------------- /Hidden Markov Model/assets/initial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Hidden Markov Model/assets/initial.png -------------------------------------------------------------------------------- /Hidden Markov Model/outputs/HMM.dot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Hidden Markov Model/outputs/HMM.dot.png -------------------------------------------------------------------------------- /Hidden Markov Model/assets/observation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Hidden Markov Model/assets/observation.png -------------------------------------------------------------------------------- /Multilayer Perceptron/multi_layer_perceptron.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Multilayer Perceptron/multi_layer_perceptron.jpg -------------------------------------------------------------------------------- /Multilayer Perceptron/multi_layer_perceptron2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Multilayer Perceptron/multi_layer_perceptron2.jpg -------------------------------------------------------------------------------- /Perceptron/__pycache__/perceptron_training.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Algo-Phantoms/Algo-ScriptML/HEAD/Perceptron/__pycache__/perceptron_training.cpython-38.pyc -------------------------------------------------------------------------------- /Apriori/GroceryStoreDataSet.csv: -------------------------------------------------------------------------------- 1 | "MILK,BREAD,BISCUIT" 2 | "BREAD,MILK,BISCUIT,CORNFLAKES" 3 | "BREAD,TEA,BOURNVITA" 4 | "JAM,MAGGI,BREAD,MILK" 5 | "MAGGI,TEA,BISCUIT" 6 | "BREAD,TEA,BOURNVITA" 7 | "MAGGI,TEA,CORNFLAKES" 8 | "MAGGI,BREAD,TEA,BISCUIT" 9 | "JAM,MAGGI,BREAD,TEA" 10 | "BREAD,MILK" 11 | "COFFEE,COCK,BISCUIT,CORNFLAKES" 12 | "COFFEE,COCK,BISCUIT,CORNFLAKES" 13 | "COFFEE,SUGER,BOURNVITA" 14 | "BREAD,COFFEE,COCK" 15 | "BREAD,SUGER,BISCUIT" 16 | "COFFEE,SUGER,CORNFLAKES" 17 | "BREAD,SUGER,BOURNVITA" 18 | "BREAD,COFFEE,SUGER" 19 | "BREAD,COFFEE,SUGER" 20 | "TEA,MILK,COFFEE,CORNFLAKES" 21 | -------------------------------------------------------------------------------- /Elastic Net/Salary_Data.csv: -------------------------------------------------------------------------------- 1 | YearsExperience,Salary 2 | 1.1,39343.00 3 | 1.3,46205.00 4 | 1.5,37731.00 5 | 2.0,43525.00 6 | 2.2,39891.00 7 | 2.9,56642.00 8 | 3.0,60150.00 9 | 3.2,54445.00 10 | 3.2,64445.00 11 | 3.7,57189.00 12 | 3.9,63218.00 13 | 4.0,55794.00 14 | 4.0,56957.00 15 | 4.1,57081.00 16 | 4.5,61111.00 17 | 4.9,67938.00 18 | 5.1,66029.00 19 | 5.3,83088.00 20 | 5.9,81363.00 21 | 6.0,93940.00 22 | 6.8,91738.00 23 | 7.1,98273.00 24 | 7.9,101302.00 25 | 8.2,113812.00 26 | 8.7,109431.00 27 | 9.0,105582.00 28 | 9.5,116969.00 29 | 9.6,112635.00 30 | 10.3,122391.00 31 | 10.5,121872.00 32 | -------------------------------------------------------------------------------- /Hidden Markov Model/outputs/HMM.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | Rainy; 3 | Sunny; 4 | Sad; 5 | Happy; 6 | Rainy -> Rainy [color=blue, key=0, label="0.5", weight="0.5"]; 7 | Rainy -> Sunny [color=blue, key=0, label="0.5", weight="0.5"]; 8 | Rainy -> Sad [color=red, key=0, label="0.8", weight="0.8"]; 9 | Rainy -> Happy [color=red, key=0, label="0.2", weight="0.2"]; 10 | Sunny -> Rainy [color=blue, key=0, label="0.3", weight="0.3"]; 11 | Sunny -> Sunny [color=blue, key=0, label="0.7", weight="0.7"]; 12 | Sunny -> Sad [color=red, key=0, label="0.4", weight="0.4"]; 13 | Sunny -> Happy [color=red, key=0, label="0.6", weight="0.6"]; 14 | } 15 | -------------------------------------------------------------------------------- /Random Forest/randomForestTest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import datasets 3 | from sklearn.model_selection import train_test_split 4 | 5 | from randomForest import randomForest 6 | 7 | def accuracy(y_true, y_pred): 8 | accuracy = np.sum(y_true == y_pred) / len(y_true) 9 | return accuracy 10 | 11 | data = datasets.load_breast_cancer() 12 | X = data.data 13 | y = data.target 14 | 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) 16 | 17 | clf = randomForest(n_trees=3, max_depth=10) 18 | 19 | clf.fit(X_train, y_train) 20 | y_pred = clf.predict(X_test) 21 | acc = accuracy(y_test, y_pred) 22 | 23 | print ("Accuracy:", acc) -------------------------------------------------------------------------------- /Hidden Markov Model/main.py: -------------------------------------------------------------------------------- 1 | import hmm 2 | 3 | # Hidden 4 | hidden_states = ["Rainy", "Sunny"] 5 | transition_matrix = [[0.5, 0.5], [0.3, 0.7]] 6 | 7 | # Observable 8 | observable_states = ["Sad", "Happy"] 9 | emission_matrix = [[0.8, 0.2], [0.4, 0.6]] 10 | 11 | # Inputs 12 | input_seq = [0, 0, 1] 13 | 14 | model = hmm.HiddenMarkovModel( 15 | observable_states, hidden_states, transition_matrix, emission_matrix 16 | ) 17 | 18 | model.print_model_info() 19 | model.visualize_model() 20 | 21 | alpha, a_probs = model.forward(input_seq) 22 | hmm.print_forward_result(alpha, a_probs) 23 | 24 | beta, b_probs = model.backward(input_seq) 25 | hmm.print_backward_result(beta, b_probs) 26 | 27 | path, delta, phi = model.viterbi(input_seq) 28 | hmm.print_viterbi_result(input_seq, observable_states, hidden_states, path, delta, phi) -------------------------------------------------------------------------------- /Perceptron/perceptron_test.py: -------------------------------------------------------------------------------- 1 | # Perceptron 2 | # Maths behind Perceptron Training 3 | 4 | import numpy as np 5 | from sklearn.model_selection import train_test_split 6 | import matplotlib.pyplot as plt 7 | from sklearn.datasets import make_blobs 8 | from perceptron_training import Perceptron 9 | 10 | # ------- Generating the dataset using make_blobs ------- 11 | X,Y = make_blobs(n_samples=800, centers=2, n_features=2, random_state=2) 12 | plt.style.use("seaborn") 13 | plt.scatter(X[:,0],X[:,1],c=Y,cmap = plt.cm.Accent) 14 | plt.show() 15 | 16 | # -------- Splitting train and test --------- 17 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,Y, test_size=0.3,random_state = 101) 18 | 19 | # -------- Predicting using Perceptron class -------- 20 | p = Perceptron() 21 | p.fit(Xtrain, Ytrain) 22 | pred = p.predict(Xtest) 23 | 24 | print(p.accuracy(Ytest,pred)) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Algo Phantoms 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Principal Component Analaysis/PCA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class PCA: 4 | """ 5 | PCA: mathematical technique used for dimensionality reduction 6 | Attributes: 7 | 8 | array (list): A matrix of elements 9 | """ 10 | 11 | def __init__(self, array): 12 | self.arr = array 13 | 14 | def calculate(self): 15 | self.arr = np.array(self.arr) 16 | # Calculate mean 17 | arr_mean = np.mean(self.arr.T, axis = 1) 18 | # Scale the columns by subracting the column mean 19 | arr_scale = self.arr - arr_mean 20 | # Calculate the co-variance of the scaled transpose 21 | arr_cov = np.cov(arr_scale.T) 22 | # get the eigen values and vectors 23 | values, vectors = np.linalg.eig(arr_cov) 24 | # Matrix after applying PCA 25 | P = vectors.T.dot(arr_scale.T) 26 | return P.T 27 | 28 | 29 | """ 30 | Test case 31 | 32 | arr = [ 33 | [1, 2], 34 | [3, 4], 35 | [5, 6] 36 | ] 37 | 38 | pca = PCA(arr) 39 | print('Principal Component Analysis of the given array\n') 40 | print(pca.calculate()) 41 | 42 | """ 43 | 44 | """ 45 | Solution 46 | 47 | Principal Component Analysis of the given array 48 | 49 | [[-2.82842712 0. ] 50 | [ 0. 0. ] 51 | [ 2.82842712 0. ]] 52 | """ 53 | -------------------------------------------------------------------------------- /Hidden Markov Model/README.md: -------------------------------------------------------------------------------- 1 | # Hidden Markov Model 2 | 3 | ## What is a Hidden Markov Model? 4 | 5 | A Hidden Markov Model (HMM) is a statistical Markov model in with the system being modeled is assumed to be a Markov process with hidden states. 6 | 7 | An HMM allows us to talk about both observed events (like words that we see in the input) and hidden events (like Part-Of-Speech tags). 8 | 9 | An HMM is specified by the following components: 10 | 11 | ![Markov Model Parameters](assets/model.png) 12 | 13 | **State Transition Probabilities** are the probabilities of moving from state i to state j. 14 | 15 | ![State Transition Probability](assets/state.png) 16 | 17 | **Observation Probability Matrix** also called emission probabilities, express the probability of an observation Ot being generated from a state i. 18 | 19 | ![Observation Probability Matrix](assets/observation.png) 20 | 21 | **Initial State Distribution** $\pi$i is the probability that the Markov chain will start in state i. Some state j with $\pi$j=0 means that they cannot be initial states. 22 | 23 | Hence, the entire Hidden Markov Model can be described as, 24 | 25 | ![Initial State Distribution](assets/initial.png) 26 | 27 | # Example 28 | 29 | For the example in ```main.py``` the Hidden Markov Model is as follows: 30 | 31 | ![Output Image](outputs/HMM.dot.png) -------------------------------------------------------------------------------- /K Nearest Neighbors/k-nearest neighbors (KNN).py: -------------------------------------------------------------------------------- 1 | # %% [code] 2 | import pandas as pd 3 | import numpy as np 4 | 5 | def dist(x1,x2): 6 | return np.sqrt(sum((x1-x2)**2)) # calculating distance 7 | 8 | # main algo 9 | def knn(X,Y,queryPoint,k=5): 10 | 11 | vals = [] # creating list to append all distances 12 | m = X.shape[0] 13 | 14 | for i in range(m): 15 | d = dist(queryPoint,X[i]) 16 | vals.append((d,Y[i])) #appending all distances 17 | 18 | #sorting the list 19 | vals = sorted(vals) 20 | # choose first k distances 21 | vals = vals[:k] 22 | 23 | vals = np.array(vals) 24 | 25 | 26 | new_vals = np.unique(vals[:,1],return_counts=True) 27 | 28 | index = new_vals[1].argmax() 29 | pred = new_vals[0][index] 30 | 31 | return pred 32 | 33 | 34 | ## For testing Purposes 35 | ''' 36 | ## Importing libraries 37 | 38 | import sklearn.datasets 39 | import matplotlib.pyplot as plt 40 | 41 | ## creating dataset 42 | 43 | x,y = sklearn.datasets.make_classification(n_samples=1000, n_classes=2, 44 | n_clusters_per_class=1, n_features=2,n_informative=2, n_redundant=0, n_repeated=0) 45 | 46 | 47 | ## Visualization 48 | 49 | query_p = np.array([0.5,0.5]) 50 | plt.scatter(query_p[0],query_p[1],c = 'r') ## plot the query point 51 | plt.scatter(x[:,0],x[:,1],c = y) 52 | plt.show() 53 | 54 | 55 | ## testing the algorithm 56 | 57 | result = knn(x,y,query_p) ### query point ==> x = 0.5,y = 0.5 58 | print(result) 59 | ''' -------------------------------------------------------------------------------- /Spectral Clustering/README.md: -------------------------------------------------------------------------------- 1 | # SPECTRAL CLUSTERING 2 | 3 | ## Introduction 4 | 5 | Spectral Clustering treats each data point as a graph-node and thus transforms the clustering problem into a graph-partitioning problem. A typical implementation consists of three fundamental steps:- 6 | 7 | 1. Pre-processing 8 | 9 | ▪ Construct a matrix representation of the graph. 10 | 11 | 2. Decomposition 12 | 13 | ▪ Compute eigenvalues and eigenvectors of the matrix. 14 | 15 | ▪ Map each point to a lower-dimensional representation based on one or more eigenvectors. 16 | 17 | 3. Grouping 18 | 19 | ▪ Assign points to two or more clusters, based on the new representation. 20 | 21 | Clustering techniques, like K-Means, assume that the points assigned to a cluster are spherical about the cluster centre. This is a strong assumption and may not always be relevant. In such cases, Spectral Clustering helps create more accurate clusters. It can correctly cluster observations that actually belong to the same cluster, but are farther off than observations in other clusters, due to dimension reduction. 22 | 23 | ## Advantages 24 | 25 | ▪ Elegant and well-founded mathematically. 26 | 27 | ▪ Works quite well when relations are approximately transitive (like similarity). 28 | 29 | ## Disadvantages 30 | 31 | ▪ Very noisy datasets cause problems; performance can drop suddenly from good to terrible. 32 | 33 | ▪ Expensive for very large datasets. 34 | 35 | ## References 36 | 37 | ▪ https://www.absolutdata.com/learn-analytics-whitepapers-webinars/spectral-clustering/ 38 | 39 | ▪ https://www.geeksforgeeks.org/ml-spectral-clustering/ 40 | 41 | ▪ http://cobweb.cs.uga.edu/~squinn/mmd_s15/lectures/lecture10_feb4.pdf 42 | -------------------------------------------------------------------------------- /Perceptron/perceptron_training.py: -------------------------------------------------------------------------------- 1 | # Perceptron 2 | # Maths behind Perceptron Training 3 | # -------- MODEL AND HELPER FUNCTIONS --------- 4 | # Sigmoid function is an activation function (denoted as sigma(z)). The output of the sigma(z) belongs to the range 0 to 1. 5 | # 0 means - highly negative input and 1 means - highly positive input 6 | # This is useful as an activation function when one is interested in probability mapping rather than precise values of input parameter t. 7 | 8 | import numpy as np 9 | 10 | class Perceptron: 11 | 12 | def __init__(self, learning_rate=0.01, n_iters=500): 13 | self.lr = learning_rate 14 | self.n_iters = n_iters 15 | self.activation_func = self._unit_step_func 16 | self.weights = None 17 | self.bias = None 18 | 19 | def fit(self, X, y): 20 | n_samples, n_features = X.shape 21 | self.weights = np.zeros(n_features) 22 | self.bias = 0 23 | y_ = np.array([1 if i > 0 else 0 for i in y]) 24 | for _ in range(self.n_iters): 25 | for idx, x_i in enumerate(X): 26 | linear_output = np.dot(x_i, self.weights) + self.bias 27 | y_predicted = self.activation_func(linear_output) 28 | update = self.lr * (y_[idx] - y_predicted) 29 | 30 | self.weights += update * x_i 31 | self.bias += update 32 | 33 | def predict(self, X): 34 | linear_output = np.dot(X, self.weights) + self.bias 35 | y_predicted = self.activation_func(linear_output) 36 | return y_predicted 37 | 38 | def _unit_step_func(self, x): 39 | return np.where(x>=0, 1, 0) 40 | 41 | def accuracy(y_true, y_pred): 42 | accuracy = np.sum(y_true == y_pred) / len(y_true) 43 | return accuracy -------------------------------------------------------------------------------- /Ridge Regression/Ridge Regression- Base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import numpy as np 8 | 9 | 10 | # In[1]: 11 | 12 | 13 | class Ridge_Regression(): #defining a class named Ridge Regression 14 | 15 | def _init_(self,iteration,lam,alpha): #using _init_ method which builds a constructor and initializing parameters 16 | 17 | self.iteration = iteration #number of iterations 18 | self.lam = lam #value for lambda 19 | self.alpha = alpha #alpha tuning parameter 20 | 21 | def fit(self,x,y): 22 | 23 | m = x.shape[0] #getting the no. of data points 24 | 25 | # #initialising weights on the basis of number of input parameters 26 | 27 | self.w = np.zeros((x.shape[1],1)) 28 | self.b = 0 29 | self.x = x 30 | self.y = y 31 | 32 | for i in range(self.iteration): 33 | 34 | yi = np.dot(x,self.w) + b.self #calculating the predicted values 35 | 36 | residuals = self.y-yi #calculating the residuals 37 | 38 | #calculating gradients 39 | 40 | 41 | gradient_w = (-2*np.dot(x.T,residuals) + 2 * self.w * self.lam)/self.m 42 | 43 | gradient_b = - 2 * np.sum( residuals ) / self.m 44 | 45 | #updating weights 46 | 47 | self.w = self.w - self.alpha*gradient_w 48 | self.b = self.b - self.alpha*gradient_b 49 | 50 | return self 51 | 52 | def predict(self,x): 53 | 54 | return np.dot(x,self.w) + b.self 55 | 56 | 57 | 58 | 59 | 60 | # In[ ]: 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /Decision Tree/README.md: -------------------------------------------------------------------------------- 1 | # DECISION TREE 2 | 3 | ## Introduction 4 | Decision Tree is a supervised learning algorithm that can perform both classification and regression tasks. The goal of using a Decision Tree is to build a training model that can predict the class or value of the target variable based on decision rules inferred from the training data. 5 | 6 | ![plot1](./Decision_Tree.jpg) 7 | 8 | In a decision tree a node represents an attribute, each branch represents a decision rule and each leaf represents an outcome. 9 | 10 | ## Steps Involved in Building a Decision Tree 11 | 12 | 1. Splitting - *Partitioning the dataset based on various factors*. 13 | 2. Pruning - *It involves removing the branches that make use of attributes having low importance* . 14 | 3. Tree Selection -*Finding the tree that fits the data well based on the cross-validated error* . 15 | 16 | ![plot2](./Decision_Tree.png) 17 | 18 | ## Some Algorithm used in Decision Tree 19 | 20 | - Classification and Regression Trees (CART ) which uses **Gini Index** as metric. 21 | - Iterative Dichotomiser 3 (ID3) uses **Entropy function** and **Information gain** as metrics. 22 | 23 | ## Advantages 24 | 25 | - Easy Interpretation 26 | - No Normalization 27 | - Requires little data preprocessing 28 | - Fast for inference 29 | 30 | 31 | ## Disadvantages 32 | 33 | - Tends to overfit. 34 | - Training is relatively expensive. 35 | - A small change in the data can cause instability. 36 | 37 | ## References 38 | 39 | - https://towardsdatascience.com/decision-trees-in-machine-learning-641b9c4e8052 40 | - https://machinelearningmastery.com/implement-decision-tree-algorithm-scratch-python/ 41 | - https://www.youtube.com/watch?v=PHxYNGo8NcI&t=546s 42 | - https://www.youtube.com/watch?v=wr9gUr-eWdA 43 | -------------------------------------------------------------------------------- /Multiple Linear Regression/README.md: -------------------------------------------------------------------------------- 1 | # 📈 MULTIPLE LINEAR REGRESSION 2 | 3 | ## Introduction 4 | 5 | In Multiple Linear Regression, the target variable(Y) is a linear combination of multiple predictor variables x1, x2, x3, ...,xn. It is an extension of Simple Linear regression as it takes more than one predictor variable to predict the response variable. 6 | 7 | The equation for multiple linear regression: 8 | Y = b0+b1x1+ b2x2+ b3x3+...... bnxn 9 | 10 | Where, 11 | 12 | Y = Output/Response variable 13 | 14 | b0, b1, b2, b3, bn.... = Coefficients of the Model 15 | 16 | x1, x2, x3, x4,... = Various Independent/feature Variable 17 | 18 | ![](https://cdn-images-1.medium.com/max/800/1*r3aOsJoXHX7uC2nxn2lygQ.png) 19 | 20 | ## Assumptions for Multiple Linear Regression 21 | 22 | 1. A linear relationship must exist between the target and predictor variables. 23 | 24 | 2. The regression residuals must be normally distributed. 25 | 26 | 3. The algorithm assumes little or no multicollinearity in data. 27 | 28 | ## Advantages 29 | 30 | ▪ Multiple Linear Regression is simple to implement and it is easier to interpret the output coefficients. 31 | 32 | ▪ Although Linear Regression is susceptible to over-fitting but it can be avoided using some dimensionality reduction techniques, regularization (L1 and L2) techniques and cross-validation. 33 | 34 | ## Disadvantages 35 | 36 | ▪ Outliers can have huge effects on the regression and boundaries are linear in this technique. 37 | 38 | ▪ Linear Regression is not a complete description of relationships among variables. 39 | 40 | ## References 41 | 42 | ▪ https://www.javatpoint.com/multiple-linear-regression-in-machine-learning 43 | 44 | ▪ https://www.geeksforgeeks.org/ml-advantages-and-disadvantages-of-linear-regression/ 45 | -------------------------------------------------------------------------------- /Hierarchical Clustering/implementation.py: -------------------------------------------------------------------------------- 1 | ## Importing the libraries 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | 6 | 7 | ## Importing the dataset 8 | dataset = pd.read_csv('Mall_Customers.csv') 9 | X = dataset.iloc[:, [3, 4]].values 10 | 11 | 12 | ## Dataset information (Pandas Profiling) 13 | import pandas_profiling as pp 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | %matplotlib inline 17 | pp.ProfileReport(dataset, title = 'Pandas Profiling report of "dataset") 18 | 19 | 20 | ## Using the dendrogram to find the optimal number of clusters 21 | import scipy.cluster.hierarchy as sch 22 | dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward')) 23 | plt.title('Dendrogram') 24 | plt.xlabel('Customers') 25 | plt.ylabel('Euclidean distances') 26 | plt.show() 27 | 28 | 29 | ## Training the Hierarchical Clustering model on the dataset 30 | from sklearn.cluster import AgglomerativeClustering 31 | hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward') 32 | y_hc = hc.fit_predict(X) 33 | 34 | 35 | ## Visualising the Hierarchical clusters 36 | plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 50, c = 'red', label = 'Cluster 1') 37 | plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 50, c = 'blue', label = 'Cluster 2') 38 | plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 50, c = 'green', label = 'Cluster 3') 39 | plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 50, c = 'cyan', label = 'Cluster 4') 40 | plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 50, c = 'magenta', label = 'Cluster 5') 41 | plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, marker = '+', c = 'black', label = 'Centroids') 42 | plt.title('Clusters of customers') 43 | plt.xlabel('Annual Income (k$)') 44 | plt.ylabel('Spending Score (1-100)') 45 | plt.legend() 46 | plt.show() 47 | -------------------------------------------------------------------------------- /DBSCAN/dbscan.py: -------------------------------------------------------------------------------- 1 | #importing libraries 2 | from sklearn.datasets import make_blobs 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | class cluster: 6 | 7 | x,_= make_blobs(n_samples=500,n_features=2,centers=4,random_state=19) 8 | eps=4 9 | minpts=5 10 | D=x 11 | 12 | def update_labels(x,pt,eps,labels,cluster_val): 13 | neighbors=[] 14 | label_index=[] 15 | for i in range (0,x.shape[0]): 16 | 17 | if np.linalg.norm(x[pt]-x[i])