├── .gitattributes ├── logo.png ├── requirements.txt ├── datasets ├── voicegender.zip ├── iris.data ├── iris_data.csv └── diabetes_data.csv ├── Logistic Regression └── README.md ├── Apriori algorithm ├── README.md └── apriori_algorithm.py ├── Naive Bayes ├── README.md └── Naive Bayes.ipynb ├── Decision Trees └── README.md ├── K-Nearest Neigbors ├── README.md ├── KNN-Classifier.ipynb └── KNN_weighted_classification.ipynb ├── .gitignore ├── Linear Regression └── README.md ├── Principal Component Analysis ├── README.md └── dimensionality reduction.ipynb ├── Random Forest ├── decision_tree.py └── random_forest.ipynb ├── K-means └── Spiral.txt └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Machine-Learning-Algorithms/HEAD/logo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pip 2 | numpy 3 | pandas 4 | scipy 5 | scikit-learn 6 | matplotlib 7 | seaborn 8 | -------------------------------------------------------------------------------- /datasets/voicegender.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Machine-Learning-Algorithms/HEAD/datasets/voicegender.zip -------------------------------------------------------------------------------- /Logistic Regression/README.md: -------------------------------------------------------------------------------- 1 | ## Logistic Regression 2 | _______________ 3 | 4 | * Articles Used 5 | * [x] [Logistic Regression - YouTube](https://www.youtube.com/playlist?list=PLblh5JKOoLUKxzEP5HA2d-Li7IJkHfXSe) 6 | * [x] [TLM | Logistic Regression](https://www.thelearningmachine.ai/logistic) 7 | * [x] [Logistic regression - Wikipedia](https://en.wikipedia.org/wiki/Logistic_regression) 8 | * [x] [Maximum likelihood and gradient descent demonstration – Zlatan Kremonic](https://zlatankr.github.io/posts/2017/03/06/mle-gradient-descent) 9 | * [x] [An Introduction to Logistic Regression - Towards Data Science](https://towardsdatascience.com/an-introduction-to-logistic-regression-8136ad65da2e) 10 | * [x] [A Gentle Introduction to Logistic Regression With Maximum Likelihood Estimation](https://machinelearningmastery.com/logistic-regression-with-maximum-likelihood-estimation/) 11 | * [x] [Logistic model - Maximum likelihood](https://www.statlect.com/fundamentals-of-statistics/logistic-model-maximum-likelihood) 12 | -------------------------------------------------------------------------------- /Apriori algorithm/README.md: -------------------------------------------------------------------------------- 1 | # Apriori Algorithm (Association Rule Mining) 2 | 3 | -------- 4 | 5 | 6 | * Articles/videos used: 7 | * [x] [Apriori Algorithm : Know How to Find Frequent Itemsets | Edureka](https://www.edureka.co/blog/apriori-algorithm/) 8 | * [x] [Apriori Algorithm Explained | Association Rule Mining | Finding Frequent Itemset | Edureka - YouTube](https://www.youtube.com/watch?v=guVvtZ7ZClw) 9 | * [x] [Apriori Algorithm (Associated Learning) - Fun and Easy Machine Learning - YouTube](https://www.youtube.com/watch?v=WGlMlS_Yydk) 10 | * [x] [Apriori Algorithm - GeeksforGeeks](https://www.geeksforgeeks.org/apriori-algorithm/?ref=lbp) 11 | * [x] [Apriori Algorithm - GeeksforGeeks](https://www.geeksforgeeks.org/apriori-algorithm/) 12 | * [x] [Implementing Apriori algorithm in Python - GeeksforGeeks](https://www.geeksforgeeks.org/implementing-apriori-algorithm-in-python/) 13 | * [x] [Association Rule Mining via Apriori Algorithm in Python](https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/) 14 | * [ ] [Apriori Algorithm in Python - CodeSpeedy](https://www.codespeedy.com/apriori-algorithm-in-python/) 15 | * [ ] [Data Science Apriori Algorithm in Python- Market Basket Analysis - Intellipaat](https://intellipaat.com/blog/data-science-apriori-algorithm/) 16 | * [ ] [Apriori Algorithm from Scratch - Python](http://www.vucreations.com/articles/apriori-algorithm-from-scratch-Python.html) 17 | 18 | -------------------------------------------------------------------------------- /Naive Bayes/README.md: -------------------------------------------------------------------------------- 1 | # Gaussian Naive Bayes 2 | 3 | --- 4 | 5 | - Articles used 6 | - [x] [3blue1brown- Bayes theorem, and making probability intuitive](https://www.youtube.com/watch?v=HZGCoVF3YvM) 7 | - [x] [3blue1brown- The quick proof of Bayes' theorem](https://www.youtube.com/watch?v=U_85TaXbeIo) 8 | - [x] [Luis Serrano - Naive Bayes classifier: A friendly approach](https://www.youtube.com/watch?v=Q8l0Vip5YUw) 9 | - [x] [Andrew Ng Naive Bayes Generative Learning Algorithms](https://www.youtube.com/watch?v=z5UQyCESW64) 10 | - [x] [Andrew Ng Naive Bayes Text Clasification](https://www.youtube.com/watch?v=NFd0ZQk5bR4) 11 | - [x] [Brandon Rohrer - How Bayes Theorem works](https://www.youtube.com/watch?v=5NMxiOGL39M) 12 | - [x] [Naive Bayes Classifiers](https://www.geeksforgeeks.org/naive-bayes-classifiers) 13 | - [x] [In Depth: Naive Bayes Classification](https://jakevdp.github.io/PythonDataScienceHandbook/05.05-naive-bayes.html) 14 | - [x] [Naive Bayes Classifier in Python | Naive Bayes Algorithm | Machine Learning Algorithm | Edureka](https://www.youtube.com/watch?v=vz_xuxYS2PM&t=11s) 15 | - [x] [How to Develop a Naive Bayes Classifier from Scratch in Python](https://machinelearningmastery.com/classification-as-conditional-probability-and-the-naive-bayes-algorithm//) 16 | - [x] [Naive Bayes Classifier From Scratch](https://chrisalbon.com/machine_learning/naive_bayes/naive_bayes_classifier_from_scratch/) 17 | - [x] [kDnuggets - Naive Bayes from Scratch using Python only – No Fancy Frameworks](https://www.kdnuggets.com/2018/10/naive-bayes-from-scratch-python.html) 18 | -------------------------------------------------------------------------------- /Decision Trees/README.md: -------------------------------------------------------------------------------- 1 | ## Decision Tree Classifier 2 | _______________ 3 | 4 | * Articles/videos read and used 5 | * [x] [StatQuest: Decision Trees](https://www.youtube.com/watch?v=7VeUPuFGJHk&list=PLblh5JKOoLUICTaGLRoHQDuF_7q2GfuJF&index=34) 6 | * [x] [Decision tree learning](https://en.wikipedia.org/wiki/Decision_tree_learning) 7 | * [x] [Decision Tree Algorithm | Decision Tree in Python | Machine Learning Algorithms | Edureka](https://www.youtube.com/watch?v=qDcl-FRnwSU) 8 | * [x] [Classification And Regression Trees for Machine Learning](https://machinelearningmastery.com/classification-and-regression-trees-for-machine-learning/) 9 | * [x] [How To Implement The Decision Tree Algorithm From Scratch In Python](https://machinelearningmastery.com/implement-decision-tree-algorithm-scratch-python/) 10 | * [x] [Clas - 5 Data Science Training | Decision Tree Classifier Explained | Edureka](https://www.youtube.com/watch?v=v3tsrs1wpi4) 11 | * [x] [Understanding Decision Trees for Classification in Python](https://www.kdnuggets.com/2019/08/understanding-decision-trees-classification-python.html) 12 | * [x] [A Simple Explanation of Information Gain and Entropy](https://victorzhou.com/blog/information-gain/) 13 | * [x] [A Simple Explanation of Gini Impurity](https://victorzhou.com/blog/gini-impurity/) 14 | * [x] [In-Depth: Decision Trees and Random Forests](https://jakevdp.github.io/PythonDataScienceHandbook/05.08-random-forests.html) 15 | * [x] [The Simple Math behind 3 Decision Tree Splitting criterions](https://towardsdatascience.com/the-simple-math-behind-3-decision-tree-splitting-criterions-85d4de2a75fe) 16 | -------------------------------------------------------------------------------- /K-Nearest Neigbors/README.md: -------------------------------------------------------------------------------- 1 | ### k-Nearest Neighbors README 2 | -------------------- 3 | 4 | * Articles used: 5 | 1. [A Detailed Introduction to K-Nearest Neighbor (KNN) Algorithm](https://saravananthirumuruganathan.wordpress.com/2010/05/17/a-detailed-introduction-to-k-nearest-neighbor-knn-algorithm/) 6 | 2. [A Complete Guide to K-Nearest-Neighbors with Applications in Python and R](https://kevinzakka.github.io/2016/07/13/k-nearest-neighbor/) 7 | 3. [Tutorial To Implement k-Nearest Neighbors in Python From Scratch](https://machinelearningmastery.com/tutorial-to-implement-k-nearest-neighbors-in-python-from-scratch/) 8 | 4. [A Practical Introduction to K-Nearest Neighbors Algorithm for Regression](https://www.analyticsvidhya.com/blog/2018/08/k-nearest-neighbor-introduction-regression-python/) 9 | 5. [Introduction to k-Nearest Neighbors: A powerful Machine Learning Algorithm](https://www.analyticsvidhya.com/blog/2018/03/introduction-k-neighbours-algorithm-clustering/) 10 | 6. [Wikipedia - k-nearest neighbors algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) 11 | 7. [Building a k-Nearest-Neighbors (k-NN) Model with Scikit-learn](https://towardsdatascience.com/building-a-k-nearest-neighbors-k-nn-model-with-scikit-learn-51209555453a) 12 | 8. [Understanding KNN(K-nearest neighbor) with example](https://kraj3.com.np/blog/2019/06/understanding-knnk-nearest-neighbor-with-example/) 13 | 9. [Dataset Extraction and analysis - Regression](https://towardsdatascience.com/linear-regression-on-boston-housing-dataset-f409b7e4a155) 14 | 10. [Weighted K-NN](https://www.geeksforgeeks.org/weighted-k-nn/) 15 | 16 | * Bias - Variance in K-NN 17 | * [Why does the variance decreases in KNN algorithm when we increase the K?](https://www.quora.com/Why-does-the-variance-decreases-in-KNN-algorithm-when-we-increase-the-K#) 18 | * [KNN: 1-nearest neighbor](https://stats.stackexchange.com/questions/151756/knn-1-nearest-neighbor/151770) 19 | * [Day 3 — K-Nearest Neighbors and Bias–Variance Tradeoff](https://medium.com/30-days-of-machine-learning/day-3-k-nearest-neighbors-and-bias-variance-tradeoff-75f84d515bdb) 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 88 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 89 | # install all needed dependencies. 90 | #Pipfile.lock 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | Decision Trees/.vscode/settings.json 125 | .vscode/settings.json 126 | -------------------------------------------------------------------------------- /Linear Regression/README.md: -------------------------------------------------------------------------------- 1 | - Linear Regression Articles and Videos 2 | 3 | - [x] [A BEGINNERS GUIDE TO REGRESSION TECHNIQUES](https://analyticsindiamag.com/a-beginners-guide-to-regression-techniques/) 4 | - [x] [Linear Regression Algorithm | Linear Regression in Python | Machine Learning Algorithm | Edureka - YouTube](https://www.youtube.com/watch?v=E5RjzSK0fvY) 5 | - [x] [In Depth: Linear Regression | Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/05.06-linear-regression.html) 6 | - [x] [Linear Models - YouTube](https://www.youtube.com/playlist?list=PLblh5JKOoLUIzaEkCLIUxQFjPIlapw8nU) 7 | - [x] [Statistics 101: Linear Regression, The Very Basics - YouTube](https://www.youtube.com/watch?v=ZkjP5RJLQF4&list=PLIeGtxpvyG-LoKUpV0fSY8BGKIMIdmfCi&index=1) 8 | - [x] [How to Implement Linear Regression From Scratch in Python](https://machinelearningmastery.com/implement-linear-regression-stochastic-gradient-descent-scratch-python/) 9 | - [x] [Linear Regression using Python - Towards Data Science](https://towardsdatascience.com/linear-regression-using-python-b136c91bf0a2) 10 | - [x] [Mathematical explanation for Linear Regression working - GeeksforGeeks](https://www.geeksforgeeks.org/mathematical-explanation-for-linear-regression-working/) 11 | - [x] [Gradient Descent in Linear Regression - GeeksforGeeks](https://www.geeksforgeeks.org/gradient-descent-in-linear-regression/) 12 | - [x] [ML | Normal Equation in Linear Regression - GeeksforGeeks](https://www.geeksforgeeks.org/ml-normal-equation-in-linear-regression/) 13 | - [x] [Univariate Linear Regression in Python - GeeksforGeeks](https://www.geeksforgeeks.org/univariate-linear-regression-in-python/) 14 | - [x] [How to do Linear Regression and Logistic Regression in Machine Learning?](https://mlfromscratch.com/machine-learning-introduction-8-linear-regression-and-logistic-regression/#/) 15 | - [x] [Linear Regression (Python Implementation) - GeeksforGeeks](https://www.geeksforgeeks.org/linear-regression-python-implementation/) 16 | - [x] [ML | Multiple Linear Regression using Python - GeeksforGeeks](https://www.geeksforgeeks.org/ml-multiple-linear-regression-using-python/) 17 | - [x] [A Complete Tutorial on Ridge and Lasso Regression in Python](https://www.analyticsvidhya.com/blog/2016/01/complete-tutorial-ridge-lasso-regression-python/) 18 | - [x] [Python/linear_regression.py at master · TheAlgorithms/Python](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/linear_regression.py) 19 | - [x] [Python | Linear Regression using sklearn - GeeksforGeeks](https://www.geeksforgeeks.org/python-linear-regression-using-sklearn/) 20 | - [ ] [ML | Locally weighted Linear Regression - GeeksforGeeks](https://www.geeksforgeeks.org/ml-locally-weighted-linear-regression/) 21 | - [x] [Statistics PL15 - Multiple Regression - YouTube](https://www.youtube.com/playlist?list=PLIeGtxpvyG-IqjoU8IiF0Yu1WtxNq_4z-) 22 | - [ ] [Statistics PL18 - Nonlinear Regression - YouTube](https://www.youtube.com/playlist?list=PLIeGtxpvyG-KE0M1r5cjbC_7Q_dVlKVq4) 23 | - [ ] [Isotonic Regression is THE Coolest Machine-Learning Model You Might Not Have Heard Of](https://towardsdatascience.com/isotonic-regression-is-the-coolest-machine-learning-model-you-might-not-have-heard-of-3ce14afc6d1e) 24 | 25 | - For 3-D plot: 26 | - [Multiple linear regression with Python, numpy, matplotlib, plot in 3d](https://www.aiproblog.com/index.php/forums/topic/multiple-linear-regression-with-python-numpy-matplotlib-plot-in-3d/) 27 | -------------------------------------------------------------------------------- /Principal Component Analysis/README.md: -------------------------------------------------------------------------------- 1 | # Principal Component Analysis 2 | 3 | --- 4 | **Articles Used** 5 | 6 | * PCA 7 | * [x] [Principal Component Analysis](https://sebastianraschka.com/Articles/2015_pca_in_3_steps.html#preparing-the-iris-dataset) 8 | * [x] [Implementing a Principal Component Analysis (PCA)](https://sebastianraschka.com/Articles/2014_pca_step_by_step.html#4-computing-eigenvectors-and-corresponding-eigenvalues) 9 | * [x] [Kernel tricks and nonlinear dimensionality reduction via RBF kernel PCA](https://sebastianraschka.com/Articles/2014_kernel_pca.html) 10 | * [x] [The Mathematics Behind Principal Component Analysis](https://towardsdatascience.com/the-mathematics-behind-principal-component-analysis-fff2d7f4b643) 11 | * [x] [A tutorial on Principal Components Analysis](http://www.cs.otago.ac.nz/cosc453/student_tutorials/principal_components.pdf) 12 | * [x] [Principal Component Analysis - Youtube](https://www.youtube.com/playlist?list=PLBv09BD7ez_5_yapAg86Od6JeeypkS4YM) 13 | * [x] [Dimensionality Reduction For Dummies — Part 1: Intuition](https://towardsdatascience.com/https-medium-com-abdullatif-h-dimensionality-reduction-for-dummies-part-1-a8c9ec7b7e79) 14 | * [x] [Data Analysis 6: Principal Component Analysis (PCA) - Computerphile](https://www.youtube.com/watch?v=TJdH6rPA-TI) 15 | * [x] [Visual Explanation of Principal Component Analysis, Covariance, SVD](https://www.youtube.com/watch?v=5HNr_j6LmPc) 16 | * [x] [luis serrano pca](https://www.youtube.com/watch?v=g-Hb26agBFg) 17 | * [ ] [Dimensionality reduction and PCA](https://www.youtube.com/playlist?list=PLBv09BD7ez_4InDh85LM_43Bsw0cFDHdN) 18 | * [x] [What is an intuitive explanation for PCA? - Quora](https://www.quora.com/What-is-an-intuitive-explanation-for-PCA) 19 | * [x] [What is an intuitive explanation of the relation between PCA and SVD?](https://www.quora.com/What-is-an-intuitive-explanation-of-the-relation-between-PCA-and-SVD) 20 | * [x] [Why don't people use SVD in PCA rather than eigen value decomposition? - Quora](https://www.quora.com/Why-dont-people-use-SVD-in-PCA-rather-than-eigen-value-decomposition) 21 | * [x] [In Depth: Principal Component Analysis](https://jakevdp.github.io/PythonDataScienceHandbook/05.09-principal-component-analysis.html) 22 | 23 | * SVD 24 | * [x] [Gilbert strang - SVD](https://www.youtube.com/watch?v=rYz83XPxiZo) 25 | * [x] [You Don’t Know SVD (Singular Value Decomposition)](https://towardsdatascience.com/svd-8c2f72e264f) 26 | * [x] [(114) A geometrical interpretation of the SVD - YouTube](https://www.youtube.com/watch?v=NsNNI_-JPUY) 27 | * [ ] [SVD playlist](https://www.youtube.com/playlist?list=PLMrJAkhIeNNSVjnsviglFoY2nXildDCcv) 28 | * [ ] [Gilbert strang - Computing Eigenvalues and Singular Values](https://www.youtube.com/watch?v=d32WV1rKoVk) 29 | * [x] [Gilbert strang - Singular Value Decomposition](https://www.youtube.com/watch?v=mBcLRGuAFUk) 30 | * [x] [Computing the SVD](https://www.youtube.com/watch?v=cOUTpqlX-Xs&t=22s) 31 | * [x] [Lecture 47 — Singular Value Decomposition | Stanford University](https://www.youtube.com/watch?v=P5mlg91as1c) 32 | * [x] [How to Calculate the Singular-Value Decomposition (SVD) from Scratch with Python](https://machinelearningmastery.com/singular-value-decomposition-for-machine-learning/) 33 | * [x] [What is an intuitive explanation of singular value decomposition (SVD)? - Quora](https://www.quora.com/What-is-an-intuitive-explanation-of-singular-value-decomposition-SVD) 34 | * [x] [What is the meaning behind the singular value in Singular Value Decomposition? - Quora](https://www.quora.com/What-is-the-meaning-behind-the-singular-value-in-Singular-Value-Decomposition) 35 | * [x] [What is the best way of introducing singular value decomposition (SVD) on a linear algebra course? Why is it so important? Are there any applications which have a real impact? - Quora](https://www.quora.com/What-is-the-best-way-of-introducing-singular-value-decomposition-SVD-on-a-linear-algebra-course-Why-is-it-so-important-Are-there-any-applications-which-have-a-real-impact) 36 | * [x] [What's the difference between SVD and SVD++? - Quora](https://www.quora.com/Whats-the-difference-between-SVD-and-SVD++) 37 | * [x] [What is the purpose of Singular Value Decomposition? - Quora](https://www.quora.com/What-is-the-purpose-of-Singular-Value-Decomposition) 38 | -------------------------------------------------------------------------------- /Random Forest/decision_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import Counter 3 | 4 | 5 | def giniImpurity(y): 6 | hist = np.bincount(y) 7 | ps = hist / len(y) 8 | return np.sum(ps * (1 - ps)) 9 | 10 | 11 | class Node: 12 | 13 | def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None): 14 | self.feature = feature 15 | self.threshold = threshold 16 | self.left = left 17 | self.right = right 18 | self.value = value 19 | 20 | def is_leaf_node(self): 21 | return self.value is not None 22 | 23 | 24 | class DecisionTree(): 25 | 26 | def __init__(self, min_samples_split=5, max_depth=100, n_feats=None): 27 | self.min_samples_split = min_samples_split 28 | self.max_depth = max_depth 29 | self.n_feats = n_feats # for random forest, choosing a subset of features 30 | self.root = None 31 | 32 | def fit(self, X, y): 33 | self.n_feats = X.shape[1] if not self.n_feats else min( 34 | self.n_feats, X.shape[1]) 35 | self.root = self._grow_tree(X, y) 36 | 37 | def _grow_tree(self, X, y, depth=0): 38 | n_samples, n_features = X.shape 39 | n_labels = len(np.unique(y)) 40 | 41 | # stopping criteria 42 | if (depth >= self.max_depth or n_labels == 1 or n_samples < self.min_samples_split): 43 | leaf_value = self._most_common_label(y) 44 | return Node(value=leaf_value) 45 | 46 | feat_idxs = np.random.choice(n_features, self.n_feats, replace=False) 47 | 48 | # greedily select the best split according to the gini gain 49 | best_feat, best_thresh = self._best_criteria(X, y, feat_idxs) 50 | 51 | # grow the children that result from the split 52 | left_idxs, right_idxs = self._split(X[:, best_feat], best_thresh) 53 | left = self._grow_tree(X[left_idxs, :], y[left_idxs], depth+1) 54 | right = self._grow_tree(X[right_idxs, :], y[right_idxs], depth+1) 55 | return Node(best_feat, best_thresh, left, right) 56 | 57 | def _best_criteria(self, X, y, feat_idxs): 58 | best_gain = -1 59 | split_idx, split_thresh = None, None 60 | for feat_idx in feat_idxs: 61 | X_column = X[:, feat_idx] 62 | thresholds = np.unique(X_column) 63 | for threshold in thresholds: 64 | gain = self._gini_gain(y, X_column, threshold) 65 | 66 | if gain > best_gain: 67 | best_gain = gain 68 | split_idx = feat_idx 69 | split_thresh = threshold 70 | 71 | return split_idx, split_thresh 72 | 73 | def _gini_gain(self, y, X_column, split_thresh): 74 | # parent 75 | parent_gini_impurity = giniImpurity(y) 76 | 77 | # generate split 78 | left_idxs, right_idxs = self._split(X_column, split_thresh) 79 | 80 | if len(left_idxs) == 0 or len(right_idxs) == 0: 81 | return 0 82 | 83 | # compute the weighted avg. for the children 84 | n = len(y) 85 | n_l, n_r = len(left_idxs), len(right_idxs) 86 | g_l, g_r = giniImpurity(y[left_idxs]), giniImpurity(y[right_idxs]) 87 | child_gini_impurity = (n_l / n) * g_l + (n_r / n) * g_r 88 | 89 | # gini gain is difference in loss before vs. after split 90 | gg = parent_gini_impurity - child_gini_impurity 91 | return gg 92 | 93 | def _split(self, X_column, split_thresh): 94 | left_idxs = np.argwhere(X_column <= split_thresh).flatten() 95 | right_idxs = np.argwhere(X_column > split_thresh).flatten() 96 | return left_idxs, right_idxs 97 | 98 | def _most_common_label(self, y): 99 | counter = Counter(y) 100 | most_common = counter.most_common(1)[0][0] 101 | return most_common 102 | 103 | def predict(self, X): 104 | return np.array([self._traverse_tree(x, self.root) for x in X]) 105 | 106 | def _traverse_tree(self, x, node): 107 | if node.is_leaf_node(): 108 | return node.value 109 | 110 | if x[node.feature] <= node.threshold: 111 | return self._traverse_tree(x, node.left) 112 | return self._traverse_tree(x, node.right) 113 | -------------------------------------------------------------------------------- /datasets/iris.data: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | -------------------------------------------------------------------------------- /datasets/iris_data.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | -------------------------------------------------------------------------------- /Apriori algorithm/apriori_algorithm.py: -------------------------------------------------------------------------------- 1 | # Apriori algorithm developed and used on a sample dataset 2 | # This code was written just for demonstration and learning purposes 3 | 4 | from collections import Counter 5 | from itertools import combinations 6 | import numpy as np 7 | 8 | # transactions = { 9 | # 1: ["a", "c", "d"], 10 | # 2: ["b", "c", "e"], 11 | # 3: ["a", "b", "c", "e"], 12 | # 5: ["b", "e"], 13 | # 6: ["a", "c", "e"] 14 | # } 15 | 16 | 17 | transactions = { 18 | 1: ["1", "3", "4"], 19 | 2: ["2", "3", "5"], 20 | 3: ["1", "2", "3", "5"], 21 | 5: ["2", "5"], 22 | 6: ["1", "3", "5"] 23 | } 24 | 25 | min_support_count = 2 26 | min_confidence_value = 0.6 27 | 28 | 29 | # apriori pruning concept 30 | def _pruning(current, previous, size): 31 | final_keys = [] 32 | previous = [tuple(i) for i in previous] 33 | for key in current: 34 | FLAG = False 35 | current_comb = list(combinations(key, size)) 36 | for i in current_comb: 37 | if i in previous or i[::-1] in previous: 38 | FLAG = True 39 | else: 40 | FLAG = False 41 | break 42 | 43 | if FLAG: 44 | final_keys.append(key) 45 | 46 | return final_keys 47 | 48 | 49 | def support_value(itemset_keys_, transactions): 50 | itemset = {key: 0 for key in itemset_keys_} 51 | 52 | for keys in itemset_keys_: 53 | for val in transactions.values(): 54 | if set(keys) & set(val) == set(keys): 55 | itemset[keys] += 1 56 | return itemset 57 | 58 | 59 | # creating frequent itemset 60 | def get_frequent_itemset(size=None, transactions=None, itemset=None): 61 | if size == 1: 62 | itemset = Counter() 63 | 64 | for val in transactions.values(): 65 | itemset.update(val) 66 | 67 | else: 68 | 69 | prev_itemset_keys = list(itemset.keys()) 70 | prev_itemset = itemset.copy() 71 | 72 | valid_keys = list(set(itemset.keys())) 73 | # flatten list of tuple -> keys: [(), ()] -> [] 74 | # useful for running a combination of all the chosen features 75 | l = [] 76 | for row in valid_keys: 77 | l.extend(row) 78 | 79 | valid_keys = set(l) 80 | 81 | # candidate itemset keys 82 | itemset_keys_ = list(combinations(valid_keys, size)) 83 | 84 | # Apriori algorithm is based on theconcept that a subset 85 | # of a frequent itemset must also be a frequent itemset 86 | # so we are pruning away those features whose subset are not present 87 | # in the previous frequent itemset 88 | if size >= 2: 89 | itemset_keys_ = _pruning( 90 | itemset_keys_, prev_itemset_keys, size - 1) 91 | 92 | # finding support value for each of the selected itemset feature combination 93 | itemset = support_value(itemset_keys_, transactions) 94 | 95 | # defaulting back to th previous frequent itemset if 96 | # the iteration doesn't find any itemset which has the theshold required 97 | if itemset == {}: 98 | itemset = prev_itemset 99 | 100 | # getting frequent itemset from itemset 101 | # Frequent Itemset is an itemset whose support 102 | # value is greater than a threshold value(support). 103 | 104 | frequent_itemset = {} 105 | for key, val in itemset.items(): 106 | if val >= min_support_count: 107 | frequent_itemset[key] = val 108 | 109 | return frequent_itemset 110 | 111 | 112 | def finding_subsets(frequent_set): 113 | item_list = [] 114 | size = len(list(frequent_set.keys())[0]) 115 | for key in frequent_set.keys(): 116 | subsets = [] 117 | for i in range(1, size): 118 | subsets.append(list(combinations(key, i))) 119 | 120 | subsets = list(np.array(subsets).flatten()) 121 | subsets.insert(0, key) 122 | item_list.append(subsets) 123 | 124 | return item_list 125 | 126 | 127 | def finding_rules(itemset_sub): 128 | print("Antecedents --> Consequents --- Confidence") 129 | for i in range(1, len(itemset_sub)): 130 | 131 | # passing as list as we have designed support_value function as 132 | # a function that takes an iteratable list of itemsets 133 | x = support_value([itemset_sub[0], ], transactions) 134 | y = support_value([itemset_sub[i], ], transactions) 135 | confidence = list(x.values())[0] / list(y.values())[0] 136 | if confidence >= min_confidence_value: 137 | print( 138 | f"{itemset_sub[i]} --> {itemset_sub[0]} --- {round(confidence, 2)}") 139 | 140 | 141 | print(""" 142 | ITEMS 143 | 1: Banana 144 | 2: Eggs 145 | 3: Milk 146 | 4: Tea 147 | 5: Bread 148 | 149 | """) 150 | 151 | f = {} 152 | 153 | for i in range(1, 5): 154 | f = get_frequent_itemset(size=i, transactions=transactions, 155 | itemset=f) 156 | 157 | # frequent_itemsets 158 | 159 | print("Frequent Itemsets...") 160 | for key, val in f.items(): 161 | print(f"Itemset: {key}, support value: {val}") 162 | 163 | 164 | subset = finding_subsets(f) 165 | 166 | for i in subset: 167 | print(f"Rules for itemset - {i[0]}") 168 | finding_rules(i) 169 | print() 170 | -------------------------------------------------------------------------------- /K-means/Spiral.txt: -------------------------------------------------------------------------------- 1 | f1 f2 label 2 | 31.95 7.95 3 3 | 31.15 7.3 3 4 | 30.45 6.65 3 5 | 29.7 6 3 6 | 28.9 5.55 3 7 | 28.05 5 3 8 | 27.2 4.55 3 9 | 26.35 4.15 3 10 | 25.4 3.85 3 11 | 24.6 3.6 3 12 | 23.6 3.3 3 13 | 22.75 3.15 3 14 | 21.85 3.05 3 15 | 20.9 3 3 16 | 20 2.9 3 17 | 19.1 3 3 18 | 18.2 3.2 3 19 | 17.3 3.25 3 20 | 16.55 3.5 3 21 | 15.7 3.7 3 22 | 14.85 4.1 3 23 | 14.15 4.4 3 24 | 13.4 4.75 3 25 | 12.7 5.2 3 26 | 12.05 5.65 3 27 | 11.45 6.15 3 28 | 10.9 6.65 3 29 | 10.3 7.25 3 30 | 9.7 7.85 3 31 | 9.35 8.35 3 32 | 8.9 9.05 3 33 | 8.55 9.65 3 34 | 8.15 10.35 3 35 | 7.95 10.95 3 36 | 7.75 11.7 3 37 | 7.55 12.35 3 38 | 7.45 13 3 39 | 7.35 13.75 3 40 | 7.3 14.35 3 41 | 7.35 14.95 3 42 | 7.35 15.75 3 43 | 7.55 16.35 3 44 | 7.7 16.95 3 45 | 7.8 17.55 3 46 | 8.05 18.15 3 47 | 8.3 18.75 3 48 | 8.65 19.3 3 49 | 8.9 19.85 3 50 | 9.3 20.3 3 51 | 9.65 20.8 3 52 | 10.2 21.25 3 53 | 10.6 21.65 3 54 | 11.1 22.15 3 55 | 11.55 22.45 3 56 | 11.95 22.7 3 57 | 12.55 23 3 58 | 13.05 23.2 3 59 | 13.45 23.4 3 60 | 14 23.55 3 61 | 14.55 23.6 3 62 | 15.1 23.75 3 63 | 15.7 23.75 3 64 | 16.15 23.85 3 65 | 16.7 23.8 3 66 | 17.15 23.75 3 67 | 17.75 23.75 3 68 | 18.2 23.6 3 69 | 18.65 23.5 3 70 | 19.1 23.35 3 71 | 19.6 23.15 3 72 | 20 22.95 3 73 | 20.4 22.7 3 74 | 20.7 22.55 3 75 | 21 22.15 3 76 | 21.45 21.95 3 77 | 21.75 21.55 3 78 | 22 21.25 3 79 | 22.25 21 3 80 | 22.5 20.7 3 81 | 22.65 20.35 3 82 | 22.75 20.05 3 83 | 22.9 19.65 3 84 | 23 19.35 3 85 | 23.1 19 3 86 | 23.15 18.65 3 87 | 23.2 18.25 3 88 | 23.2 18.05 3 89 | 23.2 17.8 3 90 | 23.1 17.45 3 91 | 23.05 17.15 3 92 | 22.9 16.9 3 93 | 22.85 16.6 3 94 | 22.7 16.4 3 95 | 22.6 16.2 3 96 | 22.55 16.05 3 97 | 22.4 15.95 3 98 | 22.35 15.8 3 99 | 22.2 15.65 3 100 | 22.15 15.55 3 101 | 22 15.4 3 102 | 21.9 15.3 3 103 | 21.85 15.25 3 104 | 21.75 15.15 3 105 | 21.65 15.05 3 106 | 21.55 15 3 107 | 21.5 14.9 3 108 | 19.35 31.65 1 109 | 20.35 31.45 1 110 | 21.35 31.1 1 111 | 22.25 30.9 1 112 | 23.2 30.45 1 113 | 23.95 30.05 1 114 | 24.9 29.65 1 115 | 25.6 29.05 1 116 | 26.35 28.5 1 117 | 27.15 27.9 1 118 | 27.75 27.35 1 119 | 28.3 26.6 1 120 | 28.95 25.85 1 121 | 29.5 25.15 1 122 | 29.95 24.45 1 123 | 30.4 23.7 1 124 | 30.6 22.9 1 125 | 30.9 22.1 1 126 | 31.25 21.3 1 127 | 31.35 20.55 1 128 | 31.5 19.7 1 129 | 31.55 18.9 1 130 | 31.65 18.15 1 131 | 31.6 17.35 1 132 | 31.45 16.55 1 133 | 31.3 15.8 1 134 | 31.15 15.05 1 135 | 30.9 14.35 1 136 | 30.6 13.65 1 137 | 30.3 13 1 138 | 29.9 12.3 1 139 | 29.5 11.75 1 140 | 29 11.15 1 141 | 28.5 10.6 1 142 | 28 10.1 1 143 | 27.55 9.65 1 144 | 26.9 9.1 1 145 | 26.25 8.8 1 146 | 25.7 8.4 1 147 | 25.15 8.05 1 148 | 24.5 7.75 1 149 | 23.9 7.65 1 150 | 23.15 7.4 1 151 | 22.5 7.3 1 152 | 21.9 7.1 1 153 | 21.25 7.05 1 154 | 20.5 7 1 155 | 19.9 6.95 1 156 | 19.25 7.05 1 157 | 18.75 7.1 1 158 | 18.05 7.25 1 159 | 17.5 7.35 1 160 | 16.9 7.6 1 161 | 16.35 7.8 1 162 | 15.8 8.05 1 163 | 15.4 8.35 1 164 | 14.9 8.7 1 165 | 14.45 8.9 1 166 | 13.95 9.3 1 167 | 13.6 9.65 1 168 | 13.25 10.1 1 169 | 12.95 10.55 1 170 | 12.65 10.9 1 171 | 12.35 11.4 1 172 | 12.2 11.75 1 173 | 11.95 12.2 1 174 | 11.8 12.65 1 175 | 11.75 13.05 1 176 | 11.55 13.6 1 177 | 11.55 14 1 178 | 11.55 14.35 1 179 | 11.55 14.7 1 180 | 11.6 15.25 1 181 | 11.65 15.7 1 182 | 11.8 16.05 1 183 | 11.85 16.5 1 184 | 12 16.75 1 185 | 12.15 17.2 1 186 | 12.3 17.6 1 187 | 12.55 17.85 1 188 | 12.8 18.05 1 189 | 13.1 18.4 1 190 | 13.3 18.6 1 191 | 13.55 18.85 1 192 | 13.8 19.05 1 193 | 14.15 19.25 1 194 | 14.45 19.5 1 195 | 14.85 19.55 1 196 | 15 19.7 1 197 | 15.25 19.7 1 198 | 15.55 19.85 1 199 | 15.95 19.9 1 200 | 16.2 19.9 1 201 | 16.55 19.9 1 202 | 16.85 19.9 1 203 | 17.2 19.9 1 204 | 17.4 19.8 1 205 | 17.65 19.75 1 206 | 17.8 19.7 1 207 | 18 19.6 1 208 | 18.2 19.55 1 209 | 3.9 9.6 2 210 | 3.55 10.65 2 211 | 3.35 11.4 2 212 | 3.1 12.35 2 213 | 3.1 13.25 2 214 | 3.05 14.15 2 215 | 3 15.1 2 216 | 3.1 16 2 217 | 3.2 16.85 2 218 | 3.45 17.75 2 219 | 3.7 18.7 2 220 | 3.95 19.55 2 221 | 4.35 20.25 2 222 | 4.7 21.1 2 223 | 5.15 21.8 2 224 | 5.6 22.5 2 225 | 6.2 23.3 2 226 | 6.8 23.85 2 227 | 7.35 24.45 2 228 | 8.05 24.95 2 229 | 8.8 25.45 2 230 | 9.5 26 2 231 | 10.2 26.35 2 232 | 10.9 26.75 2 233 | 11.7 27 2 234 | 12.45 27.25 2 235 | 13.3 27.6 2 236 | 14.05 27.6 2 237 | 14.7 27.75 2 238 | 15.55 27.75 2 239 | 16.4 27.75 2 240 | 17.1 27.75 2 241 | 17.9 27.75 2 242 | 18.55 27.7 2 243 | 19.35 27.6 2 244 | 20.1 27.35 2 245 | 20.7 27.1 2 246 | 21.45 26.8 2 247 | 22.05 26.5 2 248 | 22.7 26.15 2 249 | 23.35 25.65 2 250 | 23.8 25.3 2 251 | 24.3 24.85 2 252 | 24.75 24.35 2 253 | 25.25 23.95 2 254 | 25.65 23.45 2 255 | 26.05 23 2 256 | 26.2 22.3 2 257 | 26.6 21.8 2 258 | 26.75 21.25 2 259 | 27 20.7 2 260 | 27.15 20.15 2 261 | 27.15 19.6 2 262 | 27.35 19.1 2 263 | 27.35 18.45 2 264 | 27.4 18 2 265 | 27.3 17.4 2 266 | 27.15 16.9 2 267 | 27 16.4 2 268 | 27 15.9 2 269 | 26.75 15.35 2 270 | 26.55 14.85 2 271 | 26.3 14.45 2 272 | 25.95 14.1 2 273 | 25.75 13.7 2 274 | 25.35 13.3 2 275 | 25.05 12.95 2 276 | 24.8 12.7 2 277 | 24.4 12.45 2 278 | 24.05 12.2 2 279 | 23.55 11.85 2 280 | 23.2 11.65 2 281 | 22.75 11.4 2 282 | 22.3 11.3 2 283 | 21.9 11.1 2 284 | 21.45 11.05 2 285 | 21.1 11 2 286 | 20.7 10.95 2 287 | 20.35 10.95 2 288 | 19.95 11 2 289 | 19.55 11 2 290 | 19.15 11.05 2 291 | 18.85 11.1 2 292 | 18.45 11.25 2 293 | 18.15 11.35 2 294 | 17.85 11.5 2 295 | 17.5 11.7 2 296 | 17.2 11.95 2 297 | 17 12.05 2 298 | 16.75 12.2 2 299 | 16.65 12.35 2 300 | 16.5 12.5 2 301 | 16.35 12.7 2 302 | 16.2 12.8 2 303 | 16.15 12.95 2 304 | 16 13.1 2 305 | 15.95 13.25 2 306 | 15.9 13.4 2 307 | 15.8 13.5 2 308 | 15.8 13.65 2 309 | 15.75 13.85 2 310 | 15.65 14.05 2 311 | 15.65 14.25 2 312 | 15.65 14.5 2 313 | 15.65 14.6 2 314 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning Algorithms 2 | 3 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/veb-101/Machine-Learning-Algorithms/master) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/) [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/veb-101/Machine-Learning-Algorithms/issues) [![PRs](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/veb-101/Machine-Learning-Algorithms/pulls) 4 | 5 | [![Try on repl.it](https://repl-badge.jajoosam.repl.co/try.png)](https://repl.it/repls/folder/machine%20learning%20algorithms?ref=button) 6 | 7 | --- 8 | 9 | #### Numpy implementation of some basic machine learning algorithms 10 | 11 | Image 12 | 13 | --- 14 | 15 | ##### Run Online 16 | 17 | | Sr. No. | Algorithm | View | Python File | Colab Notebook | 18 | | ------- | ----------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 19 | | 1.a | KNN - Classification | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Classifier.ipynb) | [.py](https://repl.it/@VaibhavSingh4/1a-k-NN-classification) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Classifier.ipynb) | 20 | | 1.b | KNN - Regression | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Regression.ipynb) | [.py](https://repl.it/@VaibhavSingh4/1b-k-NN-Regression) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Regression.ipynb) | 21 | | 1.c | KNN - Weighted Classification | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN_weighted_classification.ipynb) | [.py](https://repl.it/@VaibhavSingh4/1c-KNN-weighted-classification) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN_weighted_classification.ipynb) | 22 | | 2 | Linear Regression | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Linear%20Regression/linear_regression.ipynb) | | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Linear%20Regression/linear_regression.ipynb) | 23 | | 3 | Logistic Regression | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Logistic%20Regression/logistic%20regression.ipynb) | [.py](https/repl.it/@VaibhavSingh4/Logistic-Regression) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Logistic%20Regression/logistic%20regression.ipynb) | 24 | | 4 | Decision Trees | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Decision%20Trees/decision%20tree.ipynb) | [.py](https://repl.it/@VaibhavSingh4/decision-tree) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Decision%20Trees/decision%20tree.ipynb) | 25 | | 5 | Naive Bayes | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Naive%20Bayes/Naive%20Bayes.ipynb) | | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Naive%20Bayes/Naive%20Bayes.ipynb) | 26 | | 6 | Random Forest | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Random%20Forest/random_forest.ipynb) | | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Random%20Forest/random_forest.ipynb) | 27 | | 7 | AdaBoost | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Boosting%20-%20AdaBoost/adaboost.ipynb) | | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Boosting%20-%20AdaBoost/adaboost.ipynb) | 28 | | 8 | K-means Clustering | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-means/K_means.ipynb) | [.py](https://repl.it/@VaibhavSingh4/k-means) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-means/K_means.ipynb) | 29 | | 9 | PCA | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Principal%20Component%20Analysis/dimensionality%20reduction.ipynb) | | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Principal%20Component%20Analysis/dimensionality%20reduction.ipynb) | 30 | | 10 | Apriori Algorithm | | [.py](https://repl.it/@VaibhavSingh4/Apriori-Algorithm) | | -------------------------------------------------------------------------------- /Random Forest/random_forest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2020-04-12T16:49:27.604267Z", 19 | "start_time": "2020-04-12T16:49:25.091457Z" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import numpy as np\n", 25 | "\n", 26 | "from sklearn import datasets\n", 27 | "from sklearn.model_selection import train_test_split\n", 28 | "from sklearn.metrics import precision_score, recall_score, f1_score\n", 29 | "\n", 30 | "import pandas as pd\n", 31 | "\n", 32 | "from collections import Counter\n", 33 | "from decision_tree import DecisionTree" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "# Model Definition\n", 41 | "\n", 42 | "Using previously written Decision tree" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": { 49 | "ExecuteTime": { 50 | "end_time": "2020-04-12T16:49:27.617602Z", 51 | "start_time": "2020-04-12T16:49:27.610032Z" 52 | } 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def bootstrap_sample(X, y):\n", 57 | " n_samples = X.shape[0]\n", 58 | " idxs = np.random.choice(n_samples, n_samples, replace=True)\n", 59 | " return X[idxs], y[idxs]\n", 60 | "\n", 61 | "\n", 62 | "def most_common_label(y):\n", 63 | " counter = Counter(y)\n", 64 | " most_common = counter.most_common(1)[0][0]\n", 65 | " return most_common" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": { 72 | "ExecuteTime": { 73 | "end_time": "2020-04-12T16:49:27.819326Z", 74 | "start_time": "2020-04-12T16:49:27.621157Z" 75 | } 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "class RandomForest:\n", 80 | "\n", 81 | " def __init__(self, n_trees=10, min_samples_split=2,\n", 82 | " max_depth=100, n_feats=None):\n", 83 | " self.n_trees = n_trees\n", 84 | " self.min_samples_split = min_samples_split\n", 85 | " self.max_depth = max_depth\n", 86 | " self.n_feats = n_feats\n", 87 | " self.trees = []\n", 88 | "\n", 89 | " def fit(self, X, y):\n", 90 | " self.trees = []\n", 91 | " for _ in range(self.n_trees):\n", 92 | " tree = DecisionTree(min_samples_split=self.min_samples_split,\n", 93 | " max_depth=self.max_depth, n_feats=self.n_feats)\n", 94 | " X_samp, y_samp = bootstrap_sample(X, y)\n", 95 | " tree.fit(X_samp, y_samp)\n", 96 | " self.trees.append(tree)\n", 97 | "\n", 98 | " def predict(self, X):\n", 99 | " tree_preds = np.array([tree.predict(X) for tree in self.trees])\n", 100 | " tree_preds = np.swapaxes(tree_preds, 0, 1)\n", 101 | " y_pred = [most_common_label(tree_pred) for tree_pred in tree_preds]\n", 102 | " return np.array(y_pred)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 4, 108 | "metadata": { 109 | "ExecuteTime": { 110 | "end_time": "2020-04-12T16:49:27.994854Z", 111 | "start_time": "2020-04-12T16:49:27.833497Z" 112 | } 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "def accuracy(y_true, y_pred):\n", 117 | " accuracy = np.sum(y_true == y_pred) / len(y_true)\n", 118 | " return accuracy" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "# Training and testing on Diabetes Dataset" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 5, 131 | "metadata": { 132 | "ExecuteTime": { 133 | "end_time": "2020-04-12T16:49:29.037211Z", 134 | "start_time": "2020-04-12T16:49:28.003482Z" 135 | } 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "cols = [\"Pregnancies\", \"Glucose\", \"BloodPressure\", \"SkinThickness\",\n", 140 | " \"Insulin\", \"BMI\", \"DiabetesPedigreeFunction\", \"Age\", \"Outcome\"]\n", 141 | "url = \"https://gist.githubusercontent.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f/raw/819b69b5736821ccee93d05b51de0510bea00294/pima-indians-diabetes.csv\"\n", 142 | "diabetes_data = pd.read_csv(url, skiprows=9, header=None, names=cols)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": { 149 | "ExecuteTime": { 150 | "end_time": "2020-04-12T16:49:29.087001Z", 151 | "start_time": "2020-04-12T16:49:29.044736Z" 152 | } 153 | }, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "(768, 9)" 159 | ] 160 | }, 161 | "execution_count": 6, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "diabetes_data.shape" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 7, 173 | "metadata": { 174 | "ExecuteTime": { 175 | "end_time": "2020-04-12T16:49:29.305869Z", 176 | "start_time": "2020-04-12T16:49:29.103088Z" 177 | } 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "X = diabetes_data[cols[:-1]].values\n", 182 | "y = diabetes_data[cols[-1]].values" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 8, 188 | "metadata": { 189 | "ExecuteTime": { 190 | "end_time": "2020-04-12T16:49:29.457842Z", 191 | "start_time": "2020-04-12T16:49:29.316553Z" 192 | } 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 9, 202 | "metadata": { 203 | "ExecuteTime": { 204 | "end_time": "2020-04-12T16:49:43.284602Z", 205 | "start_time": "2020-04-12T16:49:29.464289Z" 206 | } 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "rnd_clf = RandomForest(max_depth=10, n_trees=10, n_feats=6)\n", 211 | "rnd_clf.fit(X_train, y_train)\n", 212 | "y_pred = rnd_clf.predict(X_test)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "# Metrics" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 10, 225 | "metadata": { 226 | "ExecuteTime": { 227 | "end_time": "2020-04-12T16:49:43.300402Z", 228 | "start_time": "2020-04-12T16:49:43.288586Z" 229 | } 230 | }, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "Accuracy: 0.8181818181818182\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "print(f\"Accuracy: {accuracy(y_test, y_pred)}\")" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 11, 247 | "metadata": { 248 | "ExecuteTime": { 249 | "end_time": "2020-04-12T16:49:43.470452Z", 250 | "start_time": "2020-04-12T16:49:43.306229Z" 251 | } 252 | }, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "Precision: 0.782608695652174\n", 259 | "Recall: 0.6666666666666666\n", 260 | "F1-Score: 0.72\n" 261 | ] 262 | } 263 | ], 264 | "source": [ 265 | "print(f\"Precision: {precision_score(y_test, y_pred)}\")\n", 266 | "print(f\"Recall: {recall_score(y_test, y_pred)}\")\n", 267 | "print(f\"F1-Score: {f1_score(y_test, y_pred)}\")" 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.7.6" 288 | }, 289 | "toc": { 290 | "base_numbering": 1, 291 | "nav_menu": {}, 292 | "number_sections": true, 293 | "sideBar": true, 294 | "skip_h1_title": false, 295 | "title_cell": "Table of Contents", 296 | "title_sidebar": "Contents", 297 | "toc_cell": true, 298 | "toc_position": {}, 299 | "toc_section_display": true, 300 | "toc_window_display": false 301 | }, 302 | "varInspector": { 303 | "cols": { 304 | "lenName": 16, 305 | "lenType": 16, 306 | "lenVar": 40 307 | }, 308 | "kernels_config": { 309 | "python": { 310 | "delete_cmd_postfix": "", 311 | "delete_cmd_prefix": "del ", 312 | "library": "var_list.py", 313 | "varRefreshCmd": "print(var_dic_list())" 314 | }, 315 | "r": { 316 | "delete_cmd_postfix": ") ", 317 | "delete_cmd_prefix": "rm(", 318 | "library": "var_list.r", 319 | "varRefreshCmd": "cat(var_dic_list()) " 320 | } 321 | }, 322 | "types_to_exclude": [ 323 | "module", 324 | "function", 325 | "builtin_function_or_method", 326 | "instance", 327 | "_Feature" 328 | ], 329 | "window_display": false 330 | } 331 | }, 332 | "nbformat": 4, 333 | "nbformat_minor": 4 334 | } 335 | -------------------------------------------------------------------------------- /K-Nearest Neigbors/KNN-Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2020-02-16T23:06:30.008859Z", 9 | "start_time": "2020-02-16T23:06:30.002839Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import csv\n", 15 | "import random\n", 16 | "import operator\n", 17 | "import math\n", 18 | "\n", 19 | "random.seed(47)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "ExecuteTime": { 27 | "end_time": "2020-02-16T23:06:30.143114Z", 28 | "start_time": "2020-02-16T23:06:30.010816Z" 29 | } 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def viewDataset(file):\n", 34 | " with open(file) as csvfile:\n", 35 | " lines = csv.reader(csvfile)\n", 36 | " for row in lines:\n", 37 | " print(', '.join(row))\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": { 44 | "ExecuteTime": { 45 | "end_time": "2020-02-16T23:06:30.290169Z", 46 | "start_time": "2020-02-16T23:06:30.149057Z" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "dataset = r'../datasets/iris.data'\n", 52 | "# viewDataset(dataset)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2020-02-16T23:06:30.439251Z", 61 | "start_time": "2020-02-16T23:06:30.293164Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "def handleDataset(filename, split):\n", 67 | " trainingSet = []\n", 68 | " testSet = []\n", 69 | " with open(filename, 'r') as csvfile:\n", 70 | " lines = csv.reader(csvfile)\n", 71 | " dataset = list(lines)\n", 72 | " for x in range(len(dataset) - 1):\n", 73 | " for y in range(4):\n", 74 | " dataset[x][y] = float(dataset[x][y])\n", 75 | " if random.random() < split:\n", 76 | " trainingSet.append(dataset[x])\n", 77 | " else:\n", 78 | " testSet.append(dataset[x])\n", 79 | " return trainingSet, testSet" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": { 86 | "ExecuteTime": { 87 | "end_time": "2020-02-16T23:06:30.580287Z", 88 | "start_time": "2020-02-16T23:06:30.440284Z" 89 | } 90 | }, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "Train: 101\n", 97 | "Test: 49\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "# test handleDataset\n", 103 | "\n", 104 | "trainingSet, testSet = handleDataset(dataset, 0.66)\n", 105 | "print ('Train: ' + repr(len(trainingSet)))\n", 106 | "print ('Test: ' + repr(len(testSet)))" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 6, 112 | "metadata": { 113 | "ExecuteTime": { 114 | "end_time": "2020-02-16T23:06:30.721943Z", 115 | "start_time": "2020-02-16T23:06:30.582311Z" 116 | } 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "def euclideanDistance(instance1, instance2, length):\n", 121 | " distance = 0\n", 122 | " for x in range(length):\n", 123 | " distance += pow((instance1[x] - instance2[x]), 2)\n", 124 | " return math.sqrt(distance)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 7, 130 | "metadata": { 131 | "ExecuteTime": { 132 | "end_time": "2020-02-16T23:06:30.858538Z", 133 | "start_time": "2020-02-16T23:06:30.731964Z" 134 | } 135 | }, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "Distance: 3.4641016151377544\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "# Test Euclidean Distance\n", 147 | "data1 = [2, 2, 2, 'a']\n", 148 | "data2 = [4, 4, 4, 'b']\n", 149 | "distance = euclideanDistance(data1, data2, 3)\n", 150 | "print('Distance: ' + repr(distance))" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 8, 156 | "metadata": { 157 | "ExecuteTime": { 158 | "end_time": "2020-02-16T23:06:30.995044Z", 159 | "start_time": "2020-02-16T23:06:30.861489Z" 160 | } 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "def getKNeighbors(trainingSet, testInstance, k):\n", 165 | " distances = []\n", 166 | " length = len(testInstance) - 1\n", 167 | " for x in range(len(trainingSet)):\n", 168 | " dist = euclideanDistance(testInstance, trainingSet[x], length)\n", 169 | " distances.append((trainingSet[x], dist))\n", 170 | " distances.sort(key=operator.itemgetter(1))\n", 171 | " neighbors = []\n", 172 | " for x in range(k):\n", 173 | " neighbors.append(distances[x][0])\n", 174 | " return neighbors" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 9, 180 | "metadata": { 181 | "ExecuteTime": { 182 | "end_time": "2020-02-16T23:06:31.162445Z", 183 | "start_time": "2020-02-16T23:06:30.996140Z" 184 | } 185 | }, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "[[4, 4, 4, 'b']]\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "# test getKNeighbors\n", 197 | "\n", 198 | "trainSet = [[2, 2, 2, 'a'], [4, 4, 4, 'b']]\n", 199 | "testInstance = [5, 5, 5]\n", 200 | "k = 1\n", 201 | "neighbors = getKNeighbors(trainSet, testInstance, 1)\n", 202 | "print(neighbors)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 10, 208 | "metadata": { 209 | "ExecuteTime": { 210 | "end_time": "2020-02-16T23:06:31.315818Z", 211 | "start_time": "2020-02-16T23:06:31.163407Z" 212 | } 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "def getResponse(neighbors):\n", 217 | " classVotes = {}\n", 218 | " for x in range(len(neighbors)):\n", 219 | " response = neighbors[x][-1]\n", 220 | " if response in classVotes:\n", 221 | " classVotes[response] += 1\n", 222 | " else:\n", 223 | " classVotes[response] = 1\n", 224 | "# print(classVotes)\n", 225 | " sortedVotes = sorted(classVotes.items(),\n", 226 | " key=operator.itemgetter(1), reverse=True)\n", 227 | "# print(sortedVotes)\n", 228 | " return sortedVotes[0][0]" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 11, 234 | "metadata": { 235 | "ExecuteTime": { 236 | "end_time": "2020-02-16T23:06:31.484233Z", 237 | "start_time": "2020-02-16T23:06:31.321761Z" 238 | } 239 | }, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "a\n" 246 | ] 247 | } 248 | ], 249 | "source": [ 250 | "# test getResponse\n", 251 | "\n", 252 | "neighbors = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n", 253 | "print(getResponse(neighbors))" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 12, 259 | "metadata": { 260 | "ExecuteTime": { 261 | "end_time": "2020-02-16T23:06:31.632980Z", 262 | "start_time": "2020-02-16T23:06:31.490217Z" 263 | } 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "def getAccuracy(testSet, predictions):\n", 268 | " correct = 0\n", 269 | " testSet_length = len(testSet)\n", 270 | " for x in range(testSet_length):\n", 271 | " if testSet[x][-1] == predictions[x]:\n", 272 | " correct += 1\n", 273 | " return (correct/testSet_length) * 100.0" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 13, 279 | "metadata": { 280 | "ExecuteTime": { 281 | "end_time": "2020-02-16T23:06:31.768505Z", 282 | "start_time": "2020-02-16T23:06:31.638927Z" 283 | } 284 | }, 285 | "outputs": [ 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "66.66666666666666\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "# test getAccuracy\n", 296 | "\n", 297 | "testSet = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n", 298 | "predictions = ['a', 'a', 'a']\n", 299 | "accuracy = getAccuracy(testSet, predictions)\n", 300 | "print(accuracy)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 14, 306 | "metadata": { 307 | "ExecuteTime": { 308 | "end_time": "2020-02-16T23:06:32.449281Z", 309 | "start_time": "2020-02-16T23:06:31.769490Z" 310 | } 311 | }, 312 | "outputs": [], 313 | "source": [ 314 | "from sklearn.metrics import accuracy_score" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 15, 320 | "metadata": { 321 | "ExecuteTime": { 322 | "end_time": "2020-02-16T23:06:32.465145Z", 323 | "start_time": "2020-02-16T23:06:32.455179Z" 324 | } 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "def main():\n", 329 | " # prepare data\n", 330 | " split = 0.8\n", 331 | " trainingSet, testSet = handleDataset(dataset, split)\n", 332 | " print('Train: ' + repr(len(trainingSet)))\n", 333 | " print('Test: ' + repr(len(testSet)))\n", 334 | " # generate predictions\n", 335 | " predictions = []\n", 336 | " k = 3\n", 337 | " for x in range(len(testSet)):\n", 338 | " neighbors = getKNeighbors(trainingSet, testSet[x], k)\n", 339 | " result = getResponse(neighbors) \n", 340 | " predictions.append(result)\n", 341 | " print(f'> predicted = {result}, actual = {testSet[x][-1]}')\n", 342 | " \n", 343 | " accuracy = getAccuracy(testSet, predictions)\n", 344 | " print(f'k: {k}, Accuracy: {round(accuracy,3)}%')" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 16, 350 | "metadata": { 351 | "ExecuteTime": { 352 | "end_time": "2020-02-16T23:06:32.623369Z", 353 | "start_time": "2020-02-16T23:06:32.467140Z" 354 | } 355 | }, 356 | "outputs": [ 357 | { 358 | "name": "stdout", 359 | "output_type": "stream", 360 | "text": [ 361 | "Train: 121\n", 362 | "Test: 29\n", 363 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 364 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 365 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 366 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 367 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 368 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 369 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 370 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 371 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 372 | "> predicted = Iris-setosa, actual = Iris-setosa\n", 373 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 374 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 375 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 376 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 377 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 378 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 379 | "> predicted = Iris-virginica, actual = Iris-versicolor\n", 380 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 381 | "> predicted = Iris-virginica, actual = Iris-versicolor\n", 382 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n", 383 | "> predicted = Iris-versicolor, actual = Iris-virginica\n", 384 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 385 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 386 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 387 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 388 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 389 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 390 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 391 | "> predicted = Iris-virginica, actual = Iris-virginica\n", 392 | "k: 3, Accuracy: 89.655%\n" 393 | ] 394 | } 395 | ], 396 | "source": [ 397 | "main()" 398 | ] 399 | } 400 | ], 401 | "metadata": { 402 | "kernelspec": { 403 | "display_name": "Python 3", 404 | "language": "python", 405 | "name": "python3" 406 | }, 407 | "language_info": { 408 | "codemirror_mode": { 409 | "name": "ipython", 410 | "version": 3 411 | }, 412 | "file_extension": ".py", 413 | "mimetype": "text/x-python", 414 | "name": "python", 415 | "nbconvert_exporter": "python", 416 | "pygments_lexer": "ipython3", 417 | "version": "3.7.6" 418 | }, 419 | "toc": { 420 | "base_numbering": 1, 421 | "nav_menu": {}, 422 | "number_sections": true, 423 | "sideBar": true, 424 | "skip_h1_title": false, 425 | "title_cell": "Table of Contents", 426 | "title_sidebar": "Contents", 427 | "toc_cell": false, 428 | "toc_position": {}, 429 | "toc_section_display": true, 430 | "toc_window_display": false 431 | }, 432 | "varInspector": { 433 | "cols": { 434 | "lenName": 16, 435 | "lenType": 16, 436 | "lenVar": 40 437 | }, 438 | "kernels_config": { 439 | "python": { 440 | "delete_cmd_postfix": "", 441 | "delete_cmd_prefix": "del ", 442 | "library": "var_list.py", 443 | "varRefreshCmd": "print(var_dic_list())" 444 | }, 445 | "r": { 446 | "delete_cmd_postfix": ") ", 447 | "delete_cmd_prefix": "rm(", 448 | "library": "var_list.r", 449 | "varRefreshCmd": "cat(var_dic_list()) " 450 | } 451 | }, 452 | "types_to_exclude": [ 453 | "module", 454 | "function", 455 | "builtin_function_or_method", 456 | "instance", 457 | "_Feature" 458 | ], 459 | "window_display": false 460 | } 461 | }, 462 | "nbformat": 4, 463 | "nbformat_minor": 2 464 | } 465 | -------------------------------------------------------------------------------- /Naive Bayes/Naive Bayes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**Gaussian Naive Bayes**\n", 8 | "\n", 9 | "***Bayes Theorem:***\n", 10 | "$P(A|B) = \\frac{P(B|A)P(A)}{P(B}$\n", 11 | "\n", 12 | "\n", 13 | "***Naive Bayes:***\n", 14 | "\n", 15 | "$\\mathbf{P(y|X) = \\frac{P(X|y)P(y)}{P(X}}$\n", 16 | "\n", 17 | "\n", 18 | "$\\mathbf{X = (x_{1}, x_{2}, x_{3}, x_{4}, x_{5},...,x_{n})}$\n", 19 | "\n", 20 | "\n", 21 | "$\\mathbf{P(y|X) = \\frac{P(x_{1}|y).P(x_{2}|y)....P(x_{n}|y).P(y)}{P(X)}}$" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": { 28 | "ExecuteTime": { 29 | "end_time": "2020-04-11T22:38:10.083585Z", 30 | "start_time": "2020-04-11T22:38:08.030037Z" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np\n", 36 | "import pandas as pd\n", 37 | "from sklearn.model_selection import train_test_split" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "ExecuteTime": { 45 | "end_time": "2020-04-11T22:38:10.137508Z", 46 | "start_time": "2020-04-11T22:38:10.091643Z" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "class NaiveBayes(object):\n", 52 | " \n", 53 | " def fit(self, X, y):\n", 54 | " n_samples, n_features = X.shape\n", 55 | " self._classes = np.unique(y)\n", 56 | " n_classes = len(self._classes)\n", 57 | " \n", 58 | " # mean, variance, priors\n", 59 | " self._mean = np.zeros((n_classes, n_features), dtype=np.float64)\n", 60 | " self._var = np.zeros((n_classes, n_features), dtype=np.float64)\n", 61 | " self._priors = np.zeros(n_classes, dtype=np.float64)\n", 62 | "\n", 63 | " # extracting mean, variance and priors for each class\n", 64 | " # useful in calculating pdf during prediction\n", 65 | " for c in self._classes:\n", 66 | " X_c = X[y==c]\n", 67 | " self._mean[c, :] = X_c.mean(axis=0)\n", 68 | " self._var[c, :] = X_c.var(axis=0)\n", 69 | " self._priors[c] = X_c.shape[0] / float(n_samples)\n", 70 | "\n", 71 | " def predict(self, X):\n", 72 | " y_pred = [self._predict(x) for x in X]\n", 73 | " return np.array(y_pred)\n", 74 | "\n", 75 | " def _predict(self, x):\n", 76 | " posteriors = []\n", 77 | "\n", 78 | " # calculate posterior probability for each class\n", 79 | " for idx, c in enumerate(self._classes):\n", 80 | " prior = np.log(self._priors[idx])\n", 81 | " class_conditional = np.sum(np.log(self.gaussian_pdf(idx, x)))\n", 82 | " posterior = prior + class_conditional\n", 83 | " posteriors.append(posterior)\n", 84 | " \n", 85 | " # return class with highest posterior probability\n", 86 | " return self._classes[np.argmax(posteriors)]\n", 87 | " \n", 88 | "\n", 89 | " def gaussian_pdf(self, class_idx, x):\n", 90 | " mean = self._mean[class_idx]\n", 91 | " var = self._var[class_idx]\n", 92 | " numerator = np.exp(- (x-mean)**2 / (2 * var))\n", 93 | " denominator = np.sqrt(2 * np.pi * var)\n", 94 | " return numerator / denominator" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": { 101 | "ExecuteTime": { 102 | "end_time": "2020-04-11T22:38:10.283490Z", 103 | "start_time": "2020-04-11T22:38:10.144440Z" 104 | } 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "def accuracy(y_true, y_pred):\n", 109 | " accuracy = np.sum(y_true == y_pred) / len(y_true)\n", 110 | " return accuracy" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 4, 116 | "metadata": { 117 | "ExecuteTime": { 118 | "end_time": "2020-04-11T22:38:10.548884Z", 119 | "start_time": "2020-04-11T22:38:10.292962Z" 120 | } 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "from sklearn.datasets import load_iris\n", 125 | "\n", 126 | "data = load_iris()" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 5, 132 | "metadata": { 133 | "ExecuteTime": { 134 | "end_time": "2020-04-11T22:38:10.561691Z", 135 | "start_time": "2020-04-11T22:38:10.549919Z" 136 | } 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 6, 146 | "metadata": { 147 | "ExecuteTime": { 148 | "end_time": "2020-04-11T22:38:10.695826Z", 149 | "start_time": "2020-04-11T22:38:10.566489Z" 150 | } 151 | }, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "(112, 4)\n", 158 | "(38, 4)\n", 159 | "(112,)\n", 160 | "(38,)\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "print(X_train.shape)\n", 166 | "print(X_test.shape)\n", 167 | "print(y_train.shape)\n", 168 | "print(y_test.shape)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 7, 174 | "metadata": { 175 | "ExecuteTime": { 176 | "end_time": "2020-04-11T22:38:10.840526Z", 177 | "start_time": "2020-04-11T22:38:10.707061Z" 178 | } 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "nb = NaiveBayes()\n", 183 | "nb.fit(X_train, y_train.ravel())\n", 184 | "y_pred = nb.predict(X_test)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 8, 190 | "metadata": { 191 | "ExecuteTime": { 192 | "end_time": "2020-04-11T22:38:10.969671Z", 193 | "start_time": "2020-04-11T22:38:10.849490Z" 194 | } 195 | }, 196 | "outputs": [ 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "Naive Bayes accuracy: 0.8947368421052632\n" 202 | ] 203 | } 204 | ], 205 | "source": [ 206 | "print(f\"Naive Bayes accuracy: {accuracy(y_test, y_pred)}\")" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 9, 212 | "metadata": { 213 | "ExecuteTime": { 214 | "end_time": "2020-04-11T22:38:11.108872Z", 215 | "start_time": "2020-04-11T22:38:10.977849Z" 216 | } 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "# Diabetes Dataset" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 10, 226 | "metadata": { 227 | "ExecuteTime": { 228 | "end_time": "2020-04-11T22:38:11.985904Z", 229 | "start_time": "2020-04-11T22:38:11.118415Z" 230 | } 231 | }, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/html": [ 236 | "
\n", 237 | "\n", 250 | "\n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", 328 | "
" 329 | ], 330 | "text/plain": [ 331 | " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", 332 | "0 6 148 72 35 0 33.6 \n", 333 | "1 1 85 66 29 0 26.6 \n", 334 | "2 8 183 64 0 0 23.3 \n", 335 | "3 1 89 66 23 94 28.1 \n", 336 | "4 0 137 40 35 168 43.1 \n", 337 | "\n", 338 | " DiabetesPedigreeFunction Age Outcome \n", 339 | "0 0.627 50 1 \n", 340 | "1 0.351 31 0 \n", 341 | "2 0.672 32 1 \n", 342 | "3 0.167 21 0 \n", 343 | "4 2.288 33 1 " 344 | ] 345 | }, 346 | "execution_count": 10, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "cols = [\"Pregnancies\" ,\"Glucose\" ,\"BloodPressure\" ,\"SkinThickness\" ,\"Insulin\" ,\"BMI\" ,\"DiabetesPedigreeFunction\" ,\"Age\" ,\"Outcome\"]\n", 353 | "url = \"https://gist.githubusercontent.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f/raw/819b69b5736821ccee93d05b51de0510bea00294/pima-indians-diabetes.csv\"\n", 354 | "\n", 355 | "diabetes_data = pd.read_csv(url, skiprows=9, header=None, names=cols)\n", 356 | "diabetes_data.head()" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 11, 362 | "metadata": { 363 | "ExecuteTime": { 364 | "end_time": "2020-04-11T22:38:11.999427Z", 365 | "start_time": "2020-04-11T22:38:11.989369Z" 366 | } 367 | }, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "(768, 9)" 373 | ] 374 | }, 375 | "execution_count": 11, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "diabetes_data.shape" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 12, 387 | "metadata": { 388 | "ExecuteTime": { 389 | "end_time": "2020-04-11T22:38:12.130899Z", 390 | "start_time": "2020-04-11T22:38:12.003158Z" 391 | } 392 | }, 393 | "outputs": [], 394 | "source": [ 395 | "X = diabetes_data[cols[:-1]].values\n", 396 | "y = diabetes_data[cols[-1]].values" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 13, 402 | "metadata": { 403 | "ExecuteTime": { 404 | "end_time": "2020-04-11T22:38:12.285074Z", 405 | "start_time": "2020-04-11T22:38:12.134911Z" 406 | } 407 | }, 408 | "outputs": [], 409 | "source": [ 410 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 14, 416 | "metadata": { 417 | "ExecuteTime": { 418 | "end_time": "2020-04-11T22:38:12.441193Z", 419 | "start_time": "2020-04-11T22:38:12.293223Z" 420 | } 421 | }, 422 | "outputs": [], 423 | "source": [ 424 | "nb = NaiveBayes()\n", 425 | "nb.fit(X_train, y_train.ravel())\n", 426 | "y_pred = nb.predict(X_test)" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 15, 432 | "metadata": { 433 | "ExecuteTime": { 434 | "end_time": "2020-04-11T22:38:12.614652Z", 435 | "start_time": "2020-04-11T22:38:12.449128Z" 436 | } 437 | }, 438 | "outputs": [ 439 | { 440 | "name": "stdout", 441 | "output_type": "stream", 442 | "text": [ 443 | "Naive Bayes accuracy: 0.7532467532467533\n" 444 | ] 445 | } 446 | ], 447 | "source": [ 448 | "print(f\"Naive Bayes accuracy: {accuracy(y_test, y_pred)}\")" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 16, 454 | "metadata": { 455 | "ExecuteTime": { 456 | "end_time": "2020-04-11T22:38:12.746000Z", 457 | "start_time": "2020-04-11T22:38:12.617873Z" 458 | } 459 | }, 460 | "outputs": [], 461 | "source": [ 462 | "from sklearn.metrics import precision_score, recall_score, f1_score" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 17, 468 | "metadata": { 469 | "ExecuteTime": { 470 | "end_time": "2020-04-11T22:38:13.191309Z", 471 | "start_time": "2020-04-11T22:38:12.750193Z" 472 | } 473 | }, 474 | "outputs": [ 475 | { 476 | "name": "stdout", 477 | "output_type": "stream", 478 | "text": [ 479 | "Precision: 0.6428571428571429\n", 480 | "Recall: 0.6666666666666666\n", 481 | "F1-Score: 0.6545454545454545\n" 482 | ] 483 | } 484 | ], 485 | "source": [ 486 | "print(f\"Precision: {precision_score(y_test, y_pred)}\")\n", 487 | "print(f\"Recall: {recall_score(y_test, y_pred)}\")\n", 488 | "print(f\"F1-Score: {f1_score(y_test, y_pred)}\")" 489 | ] 490 | } 491 | ], 492 | "metadata": { 493 | "kernelspec": { 494 | "display_name": "Python 3", 495 | "language": "python", 496 | "name": "python3" 497 | }, 498 | "language_info": { 499 | "codemirror_mode": { 500 | "name": "ipython", 501 | "version": 3 502 | }, 503 | "file_extension": ".py", 504 | "mimetype": "text/x-python", 505 | "name": "python", 506 | "nbconvert_exporter": "python", 507 | "pygments_lexer": "ipython3", 508 | "version": "3.7.6" 509 | }, 510 | "toc": { 511 | "base_numbering": 1, 512 | "nav_menu": {}, 513 | "number_sections": true, 514 | "sideBar": true, 515 | "skip_h1_title": false, 516 | "title_cell": "Table of Contents", 517 | "title_sidebar": "Contents", 518 | "toc_cell": false, 519 | "toc_position": {}, 520 | "toc_section_display": true, 521 | "toc_window_display": false 522 | }, 523 | "varInspector": { 524 | "cols": { 525 | "lenName": 16, 526 | "lenType": 16, 527 | "lenVar": 40 528 | }, 529 | "kernels_config": { 530 | "python": { 531 | "delete_cmd_postfix": "", 532 | "delete_cmd_prefix": "del ", 533 | "library": "var_list.py", 534 | "varRefreshCmd": "print(var_dic_list())" 535 | }, 536 | "r": { 537 | "delete_cmd_postfix": ") ", 538 | "delete_cmd_prefix": "rm(", 539 | "library": "var_list.r", 540 | "varRefreshCmd": "cat(var_dic_list()) " 541 | } 542 | }, 543 | "types_to_exclude": [ 544 | "module", 545 | "function", 546 | "builtin_function_or_method", 547 | "instance", 548 | "_Feature" 549 | ], 550 | "window_display": false 551 | } 552 | }, 553 | "nbformat": 4, 554 | "nbformat_minor": 4 555 | } 556 | -------------------------------------------------------------------------------- /datasets/diabetes_data.csv: -------------------------------------------------------------------------------- 1 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome 2 | 6,148,72,35,0,33.6,0.627,50,1 3 | 1,85,66,29,0,26.6,0.35100000000000003,31,0 4 | 8,183,64,0,0,23.3,0.672,32,1 5 | 1,89,66,23,94,28.1,0.16699999999999998,21,0 6 | 0,137,40,35,168,43.1,2.2880000000000003,33,1 7 | 5,116,74,0,0,25.6,0.201,30,0 8 | 3,78,50,32,88,31.0,0.248,26,1 9 | 10,115,0,0,0,35.3,0.134,29,0 10 | 2,197,70,45,543,30.5,0.158,53,1 11 | 8,125,96,0,0,0.0,0.23199999999999998,54,1 12 | 4,110,92,0,0,37.6,0.191,30,0 13 | 10,168,74,0,0,38.0,0.537,34,1 14 | 10,139,80,0,0,27.1,1.4409999999999998,57,0 15 | 1,189,60,23,846,30.1,0.39799999999999996,59,1 16 | 5,166,72,19,175,25.8,0.5870000000000001,51,1 17 | 7,100,0,0,0,30.0,0.484,32,1 18 | 0,118,84,47,230,45.8,0.551,31,1 19 | 7,107,74,0,0,29.6,0.254,31,1 20 | 1,103,30,38,83,43.3,0.183,33,0 21 | 1,115,70,30,96,34.6,0.529,32,1 22 | 3,126,88,41,235,39.3,0.7040000000000001,27,0 23 | 8,99,84,0,0,35.4,0.38799999999999996,50,0 24 | 7,196,90,0,0,39.8,0.451,41,1 25 | 9,119,80,35,0,29.0,0.263,29,1 26 | 11,143,94,33,146,36.6,0.254,51,1 27 | 10,125,70,26,115,31.1,0.205,41,1 28 | 7,147,76,0,0,39.4,0.257,43,1 29 | 1,97,66,15,140,23.2,0.48700000000000004,22,0 30 | 13,145,82,19,110,22.2,0.245,57,0 31 | 5,117,92,0,0,34.1,0.337,38,0 32 | 5,109,75,26,0,36.0,0.546,60,0 33 | 3,158,76,36,245,31.6,0.851,28,1 34 | 3,88,58,11,54,24.8,0.267,22,0 35 | 6,92,92,0,0,19.9,0.188,28,0 36 | 10,122,78,31,0,27.6,0.512,45,0 37 | 4,103,60,33,192,24.0,0.966,33,0 38 | 11,138,76,0,0,33.2,0.42,35,0 39 | 9,102,76,37,0,32.9,0.665,46,1 40 | 2,90,68,42,0,38.2,0.503,27,1 41 | 4,111,72,47,207,37.1,1.39,56,1 42 | 3,180,64,25,70,34.0,0.271,26,0 43 | 7,133,84,0,0,40.2,0.696,37,0 44 | 7,106,92,18,0,22.7,0.235,48,0 45 | 9,171,110,24,240,45.4,0.721,54,1 46 | 7,159,64,0,0,27.4,0.294,40,0 47 | 0,180,66,39,0,42.0,1.893,25,1 48 | 1,146,56,0,0,29.7,0.564,29,0 49 | 2,71,70,27,0,28.0,0.586,22,0 50 | 7,103,66,32,0,39.1,0.344,31,1 51 | 7,105,0,0,0,0.0,0.305,24,0 52 | 1,103,80,11,82,19.4,0.491,22,0 53 | 1,101,50,15,36,24.2,0.526,26,0 54 | 5,88,66,21,23,24.4,0.342,30,0 55 | 8,176,90,34,300,33.7,0.467,58,1 56 | 7,150,66,42,342,34.7,0.718,42,0 57 | 1,73,50,10,0,23.0,0.248,21,0 58 | 7,187,68,39,304,37.7,0.254,41,1 59 | 0,100,88,60,110,46.8,0.9620000000000001,31,0 60 | 0,146,82,0,0,40.5,1.781,44,0 61 | 0,105,64,41,142,41.5,0.17300000000000001,22,0 62 | 2,84,0,0,0,0.0,0.304,21,0 63 | 8,133,72,0,0,32.9,0.27,39,1 64 | 5,44,62,0,0,25.0,0.5870000000000001,36,0 65 | 2,141,58,34,128,25.4,0.6990000000000001,24,0 66 | 7,114,66,0,0,32.8,0.258,42,1 67 | 5,99,74,27,0,29.0,0.203,32,0 68 | 0,109,88,30,0,32.5,0.855,38,1 69 | 2,109,92,0,0,42.7,0.845,54,0 70 | 1,95,66,13,38,19.6,0.33399999999999996,25,0 71 | 4,146,85,27,100,28.9,0.18899999999999997,27,0 72 | 2,100,66,20,90,32.9,0.867,28,1 73 | 5,139,64,35,140,28.6,0.41100000000000003,26,0 74 | 13,126,90,0,0,43.4,0.583,42,1 75 | 4,129,86,20,270,35.1,0.231,23,0 76 | 1,79,75,30,0,32.0,0.396,22,0 77 | 1,0,48,20,0,24.7,0.14,22,0 78 | 7,62,78,0,0,32.6,0.391,41,0 79 | 5,95,72,33,0,37.7,0.37,27,0 80 | 0,131,0,0,0,43.2,0.27,26,1 81 | 2,112,66,22,0,25.0,0.307,24,0 82 | 3,113,44,13,0,22.4,0.14,22,0 83 | 2,74,0,0,0,0.0,0.102,22,0 84 | 7,83,78,26,71,29.3,0.767,36,0 85 | 0,101,65,28,0,24.6,0.237,22,0 86 | 5,137,108,0,0,48.8,0.22699999999999998,37,1 87 | 2,110,74,29,125,32.4,0.698,27,0 88 | 13,106,72,54,0,36.6,0.17800000000000002,45,0 89 | 2,100,68,25,71,38.5,0.324,26,0 90 | 15,136,70,32,110,37.1,0.153,43,1 91 | 1,107,68,19,0,26.5,0.165,24,0 92 | 1,80,55,0,0,19.1,0.258,21,0 93 | 4,123,80,15,176,32.0,0.44299999999999995,34,0 94 | 7,81,78,40,48,46.7,0.261,42,0 95 | 4,134,72,0,0,23.8,0.27699999999999997,60,1 96 | 2,142,82,18,64,24.7,0.7609999999999999,21,0 97 | 6,144,72,27,228,33.9,0.255,40,0 98 | 2,92,62,28,0,31.6,0.13,24,0 99 | 1,71,48,18,76,20.4,0.32299999999999995,22,0 100 | 6,93,50,30,64,28.7,0.35600000000000004,23,0 101 | 1,122,90,51,220,49.7,0.325,31,1 102 | 1,163,72,0,0,39.0,1.222,33,1 103 | 1,151,60,0,0,26.1,0.179,22,0 104 | 0,125,96,0,0,22.5,0.262,21,0 105 | 1,81,72,18,40,26.6,0.28300000000000003,24,0 106 | 2,85,65,0,0,39.6,0.93,27,0 107 | 1,126,56,29,152,28.7,0.8009999999999999,21,0 108 | 1,96,122,0,0,22.4,0.207,27,0 109 | 4,144,58,28,140,29.5,0.287,37,0 110 | 3,83,58,31,18,34.3,0.336,25,0 111 | 0,95,85,25,36,37.4,0.247,24,1 112 | 3,171,72,33,135,33.3,0.19899999999999998,24,1 113 | 8,155,62,26,495,34.0,0.5429999999999999,46,1 114 | 1,89,76,34,37,31.2,0.192,23,0 115 | 4,76,62,0,0,34.0,0.391,25,0 116 | 7,160,54,32,175,30.5,0.588,39,1 117 | 4,146,92,0,0,31.2,0.539,61,1 118 | 5,124,74,0,0,34.0,0.22,38,1 119 | 5,78,48,0,0,33.7,0.654,25,0 120 | 4,97,60,23,0,28.2,0.44299999999999995,22,0 121 | 4,99,76,15,51,23.2,0.223,21,0 122 | 0,162,76,56,100,53.2,0.759,25,1 123 | 6,111,64,39,0,34.2,0.26,24,0 124 | 2,107,74,30,100,33.6,0.40399999999999997,23,0 125 | 5,132,80,0,0,26.8,0.18600000000000003,69,0 126 | 0,113,76,0,0,33.3,0.278,23,1 127 | 1,88,30,42,99,55.0,0.496,26,1 128 | 3,120,70,30,135,42.9,0.452,30,0 129 | 1,118,58,36,94,33.3,0.261,23,0 130 | 1,117,88,24,145,34.5,0.40299999999999997,40,1 131 | 0,105,84,0,0,27.9,0.741,62,1 132 | 4,173,70,14,168,29.7,0.361,33,1 133 | 9,122,56,0,0,33.3,1.114,33,1 134 | 3,170,64,37,225,34.5,0.35600000000000004,30,1 135 | 8,84,74,31,0,38.3,0.457,39,0 136 | 2,96,68,13,49,21.1,0.647,26,0 137 | 2,125,60,20,140,33.8,0.08800000000000001,31,0 138 | 0,100,70,26,50,30.8,0.597,21,0 139 | 0,93,60,25,92,28.7,0.532,22,0 140 | 0,129,80,0,0,31.2,0.703,29,0 141 | 5,105,72,29,325,36.9,0.159,28,0 142 | 3,128,78,0,0,21.1,0.268,55,0 143 | 5,106,82,30,0,39.5,0.28600000000000003,38,0 144 | 2,108,52,26,63,32.5,0.318,22,0 145 | 10,108,66,0,0,32.4,0.272,42,1 146 | 4,154,62,31,284,32.8,0.237,23,0 147 | 0,102,75,23,0,0.0,0.5720000000000001,21,0 148 | 9,57,80,37,0,32.8,0.096,41,0 149 | 2,106,64,35,119,30.5,1.4,34,0 150 | 5,147,78,0,0,33.7,0.218,65,0 151 | 2,90,70,17,0,27.3,0.085,22,0 152 | 1,136,74,50,204,37.4,0.39899999999999997,24,0 153 | 4,114,65,0,0,21.9,0.43200000000000005,37,0 154 | 9,156,86,28,155,34.3,1.189,42,1 155 | 1,153,82,42,485,40.6,0.687,23,0 156 | 8,188,78,0,0,47.9,0.13699999999999998,43,1 157 | 7,152,88,44,0,50.0,0.337,36,1 158 | 2,99,52,15,94,24.6,0.637,21,0 159 | 1,109,56,21,135,25.2,0.833,23,0 160 | 2,88,74,19,53,29.0,0.22899999999999998,22,0 161 | 17,163,72,41,114,40.9,0.8170000000000001,47,1 162 | 4,151,90,38,0,29.7,0.294,36,0 163 | 7,102,74,40,105,37.2,0.204,45,0 164 | 0,114,80,34,285,44.2,0.16699999999999998,27,0 165 | 2,100,64,23,0,29.7,0.368,21,0 166 | 0,131,88,0,0,31.6,0.743,32,1 167 | 6,104,74,18,156,29.9,0.722,41,1 168 | 3,148,66,25,0,32.5,0.256,22,0 169 | 4,120,68,0,0,29.6,0.7090000000000001,34,0 170 | 4,110,66,0,0,31.9,0.47100000000000003,29,0 171 | 3,111,90,12,78,28.4,0.495,29,0 172 | 6,102,82,0,0,30.8,0.18,36,1 173 | 6,134,70,23,130,35.4,0.542,29,1 174 | 2,87,0,23,0,28.9,0.773,25,0 175 | 1,79,60,42,48,43.5,0.6779999999999999,23,0 176 | 2,75,64,24,55,29.7,0.37,33,0 177 | 8,179,72,42,130,32.7,0.7190000000000001,36,1 178 | 6,85,78,0,0,31.2,0.382,42,0 179 | 0,129,110,46,130,67.1,0.319,26,1 180 | 5,143,78,0,0,45.0,0.19,47,0 181 | 5,130,82,0,0,39.1,0.956,37,1 182 | 6,87,80,0,0,23.2,0.084,32,0 183 | 0,119,64,18,92,34.9,0.725,23,0 184 | 1,0,74,20,23,27.7,0.299,21,0 185 | 5,73,60,0,0,26.8,0.268,27,0 186 | 4,141,74,0,0,27.6,0.244,40,0 187 | 7,194,68,28,0,35.9,0.745,41,1 188 | 8,181,68,36,495,30.1,0.615,60,1 189 | 1,128,98,41,58,32.0,1.321,33,1 190 | 8,109,76,39,114,27.9,0.64,31,1 191 | 5,139,80,35,160,31.6,0.361,25,1 192 | 3,111,62,0,0,22.6,0.142,21,0 193 | 9,123,70,44,94,33.1,0.374,40,0 194 | 7,159,66,0,0,30.4,0.38299999999999995,36,1 195 | 11,135,0,0,0,52.3,0.578,40,1 196 | 8,85,55,20,0,24.4,0.136,42,0 197 | 5,158,84,41,210,39.4,0.395,29,1 198 | 1,105,58,0,0,24.3,0.187,21,0 199 | 3,107,62,13,48,22.9,0.6779999999999999,23,1 200 | 4,109,64,44,99,34.8,0.905,26,1 201 | 4,148,60,27,318,30.9,0.15,29,1 202 | 0,113,80,16,0,31.0,0.8740000000000001,21,0 203 | 1,138,82,0,0,40.1,0.23600000000000002,28,0 204 | 0,108,68,20,0,27.3,0.787,32,0 205 | 2,99,70,16,44,20.4,0.235,27,0 206 | 6,103,72,32,190,37.7,0.324,55,0 207 | 5,111,72,28,0,23.9,0.40700000000000003,27,0 208 | 8,196,76,29,280,37.5,0.605,57,1 209 | 5,162,104,0,0,37.7,0.151,52,1 210 | 1,96,64,27,87,33.2,0.289,21,0 211 | 7,184,84,33,0,35.5,0.355,41,1 212 | 2,81,60,22,0,27.7,0.29,25,0 213 | 0,147,85,54,0,42.8,0.375,24,0 214 | 7,179,95,31,0,34.2,0.16399999999999998,60,0 215 | 0,140,65,26,130,42.6,0.431,24,1 216 | 9,112,82,32,175,34.2,0.26,36,1 217 | 12,151,70,40,271,41.8,0.742,38,1 218 | 5,109,62,41,129,35.8,0.514,25,1 219 | 6,125,68,30,120,30.0,0.46399999999999997,32,0 220 | 5,85,74,22,0,29.0,1.224,32,1 221 | 5,112,66,0,0,37.8,0.261,41,1 222 | 0,177,60,29,478,34.6,1.072,21,1 223 | 2,158,90,0,0,31.6,0.805,66,1 224 | 7,119,0,0,0,25.2,0.209,37,0 225 | 7,142,60,33,190,28.8,0.687,61,0 226 | 1,100,66,15,56,23.6,0.6659999999999999,26,0 227 | 1,87,78,27,32,34.6,0.10099999999999999,22,0 228 | 0,101,76,0,0,35.7,0.198,26,0 229 | 3,162,52,38,0,37.2,0.652,24,1 230 | 4,197,70,39,744,36.7,2.329,31,0 231 | 0,117,80,31,53,45.2,0.08900000000000001,24,0 232 | 4,142,86,0,0,44.0,0.645,22,1 233 | 6,134,80,37,370,46.2,0.23800000000000002,46,1 234 | 1,79,80,25,37,25.4,0.583,22,0 235 | 4,122,68,0,0,35.0,0.39399999999999996,29,0 236 | 3,74,68,28,45,29.7,0.293,23,0 237 | 4,171,72,0,0,43.6,0.479,26,1 238 | 7,181,84,21,192,35.9,0.586,51,1 239 | 0,179,90,27,0,44.1,0.6859999999999999,23,1 240 | 9,164,84,21,0,30.8,0.831,32,1 241 | 0,104,76,0,0,18.4,0.5820000000000001,27,0 242 | 1,91,64,24,0,29.2,0.192,21,0 243 | 4,91,70,32,88,33.1,0.446,22,0 244 | 3,139,54,0,0,25.6,0.402,22,1 245 | 6,119,50,22,176,27.1,1.318,33,1 246 | 2,146,76,35,194,38.2,0.32899999999999996,29,0 247 | 9,184,85,15,0,30.0,1.213,49,1 248 | 10,122,68,0,0,31.2,0.258,41,0 249 | 0,165,90,33,680,52.3,0.42700000000000005,23,0 250 | 9,124,70,33,402,35.4,0.282,34,0 251 | 1,111,86,19,0,30.1,0.14300000000000002,23,0 252 | 9,106,52,0,0,31.2,0.38,42,0 253 | 2,129,84,0,0,28.0,0.284,27,0 254 | 2,90,80,14,55,24.4,0.249,24,0 255 | 0,86,68,32,0,35.8,0.23800000000000002,25,0 256 | 12,92,62,7,258,27.6,0.9259999999999999,44,1 257 | 1,113,64,35,0,33.6,0.5429999999999999,21,1 258 | 3,111,56,39,0,30.1,0.557,30,0 259 | 2,114,68,22,0,28.7,0.092,25,0 260 | 1,193,50,16,375,25.9,0.655,24,0 261 | 11,155,76,28,150,33.3,1.3530000000000002,51,1 262 | 3,191,68,15,130,30.9,0.299,34,0 263 | 3,141,0,0,0,30.0,0.7609999999999999,27,1 264 | 4,95,70,32,0,32.1,0.612,24,0 265 | 3,142,80,15,0,32.4,0.2,63,0 266 | 4,123,62,0,0,32.0,0.226,35,1 267 | 5,96,74,18,67,33.6,0.997,43,0 268 | 0,138,0,0,0,36.3,0.9329999999999999,25,1 269 | 2,128,64,42,0,40.0,1.101,24,0 270 | 0,102,52,0,0,25.1,0.078,21,0 271 | 2,146,0,0,0,27.5,0.24,28,1 272 | 10,101,86,37,0,45.6,1.136,38,1 273 | 2,108,62,32,56,25.2,0.128,21,0 274 | 3,122,78,0,0,23.0,0.254,40,0 275 | 1,71,78,50,45,33.2,0.42200000000000004,21,0 276 | 13,106,70,0,0,34.2,0.251,52,0 277 | 2,100,70,52,57,40.5,0.677,25,0 278 | 7,106,60,24,0,26.5,0.29600000000000004,29,1 279 | 0,104,64,23,116,27.8,0.45399999999999996,23,0 280 | 5,114,74,0,0,24.9,0.7440000000000001,57,0 281 | 2,108,62,10,278,25.3,0.8809999999999999,22,0 282 | 0,146,70,0,0,37.9,0.33399999999999996,28,1 283 | 10,129,76,28,122,35.9,0.28,39,0 284 | 7,133,88,15,155,32.4,0.262,37,0 285 | 7,161,86,0,0,30.4,0.165,47,1 286 | 2,108,80,0,0,27.0,0.259,52,1 287 | 7,136,74,26,135,26.0,0.647,51,0 288 | 5,155,84,44,545,38.7,0.619,34,0 289 | 1,119,86,39,220,45.6,0.8079999999999999,29,1 290 | 4,96,56,17,49,20.8,0.34,26,0 291 | 5,108,72,43,75,36.1,0.263,33,0 292 | 0,78,88,29,40,36.9,0.434,21,0 293 | 0,107,62,30,74,36.6,0.757,25,1 294 | 2,128,78,37,182,43.3,1.224,31,1 295 | 1,128,48,45,194,40.5,0.613,24,1 296 | 0,161,50,0,0,21.9,0.254,65,0 297 | 6,151,62,31,120,35.5,0.6920000000000001,28,0 298 | 2,146,70,38,360,28.0,0.337,29,1 299 | 0,126,84,29,215,30.7,0.52,24,0 300 | 14,100,78,25,184,36.6,0.41200000000000003,46,1 301 | 8,112,72,0,0,23.6,0.84,58,0 302 | 0,167,0,0,0,32.3,0.8390000000000001,30,1 303 | 2,144,58,33,135,31.6,0.42200000000000004,25,1 304 | 5,77,82,41,42,35.8,0.156,35,0 305 | 5,115,98,0,0,52.9,0.209,28,1 306 | 3,150,76,0,0,21.0,0.207,37,0 307 | 2,120,76,37,105,39.7,0.215,29,0 308 | 10,161,68,23,132,25.5,0.326,47,1 309 | 0,137,68,14,148,24.8,0.14300000000000002,21,0 310 | 0,128,68,19,180,30.5,1.391,25,1 311 | 2,124,68,28,205,32.9,0.875,30,1 312 | 6,80,66,30,0,26.2,0.313,41,0 313 | 0,106,70,37,148,39.4,0.605,22,0 314 | 2,155,74,17,96,26.6,0.433,27,1 315 | 3,113,50,10,85,29.5,0.626,25,0 316 | 7,109,80,31,0,35.9,1.127,43,1 317 | 2,112,68,22,94,34.1,0.315,26,0 318 | 3,99,80,11,64,19.3,0.284,30,0 319 | 3,182,74,0,0,30.5,0.345,29,1 320 | 3,115,66,39,140,38.1,0.15,28,0 321 | 6,194,78,0,0,23.5,0.129,59,1 322 | 4,129,60,12,231,27.5,0.527,31,0 323 | 3,112,74,30,0,31.6,0.19699999999999998,25,1 324 | 0,124,70,20,0,27.4,0.254,36,1 325 | 13,152,90,33,29,26.8,0.731,43,1 326 | 2,112,75,32,0,35.7,0.14800000000000002,21,0 327 | 1,157,72,21,168,25.6,0.12300000000000001,24,0 328 | 1,122,64,32,156,35.1,0.6920000000000001,30,1 329 | 10,179,70,0,0,35.1,0.2,37,0 330 | 2,102,86,36,120,45.5,0.127,23,1 331 | 6,105,70,32,68,30.8,0.122,37,0 332 | 8,118,72,19,0,23.1,1.476,46,0 333 | 2,87,58,16,52,32.7,0.166,25,0 334 | 1,180,0,0,0,43.3,0.282,41,1 335 | 12,106,80,0,0,23.6,0.13699999999999998,44,0 336 | 1,95,60,18,58,23.9,0.26,22,0 337 | 0,165,76,43,255,47.9,0.259,26,0 338 | 0,117,0,0,0,33.8,0.932,44,0 339 | 5,115,76,0,0,31.2,0.34299999999999997,44,1 340 | 9,152,78,34,171,34.2,0.893,33,1 341 | 7,178,84,0,0,39.9,0.331,41,1 342 | 1,130,70,13,105,25.9,0.47200000000000003,22,0 343 | 1,95,74,21,73,25.9,0.6729999999999999,36,0 344 | 1,0,68,35,0,32.0,0.389,22,0 345 | 5,122,86,0,0,34.7,0.29,33,0 346 | 8,95,72,0,0,36.8,0.485,57,0 347 | 8,126,88,36,108,38.5,0.349,49,0 348 | 1,139,46,19,83,28.7,0.654,22,0 349 | 3,116,0,0,0,23.5,0.187,23,0 350 | 3,99,62,19,74,21.8,0.27899999999999997,26,0 351 | 5,0,80,32,0,41.0,0.34600000000000003,37,1 352 | 4,92,80,0,0,42.2,0.237,29,0 353 | 4,137,84,0,0,31.2,0.252,30,0 354 | 3,61,82,28,0,34.4,0.243,46,0 355 | 1,90,62,12,43,27.2,0.58,24,0 356 | 3,90,78,0,0,42.7,0.5589999999999999,21,0 357 | 9,165,88,0,0,30.4,0.302,49,1 358 | 1,125,50,40,167,33.3,0.9620000000000001,28,1 359 | 13,129,0,30,0,39.9,0.569,44,1 360 | 12,88,74,40,54,35.3,0.37799999999999995,48,0 361 | 1,196,76,36,249,36.5,0.875,29,1 362 | 5,189,64,33,325,31.2,0.583,29,1 363 | 5,158,70,0,0,29.8,0.207,63,0 364 | 5,103,108,37,0,39.2,0.305,65,0 365 | 4,146,78,0,0,38.5,0.52,67,1 366 | 4,147,74,25,293,34.9,0.385,30,0 367 | 5,99,54,28,83,34.0,0.499,30,0 368 | 6,124,72,0,0,27.6,0.368,29,1 369 | 0,101,64,17,0,21.0,0.252,21,0 370 | 3,81,86,16,66,27.5,0.306,22,0 371 | 1,133,102,28,140,32.8,0.23399999999999999,45,1 372 | 3,173,82,48,465,38.4,2.137,25,1 373 | 0,118,64,23,89,0.0,1.7309999999999999,21,0 374 | 0,84,64,22,66,35.8,0.545,21,0 375 | 2,105,58,40,94,34.9,0.225,25,0 376 | 2,122,52,43,158,36.2,0.816,28,0 377 | 12,140,82,43,325,39.2,0.528,58,1 378 | 0,98,82,15,84,25.2,0.299,22,0 379 | 1,87,60,37,75,37.2,0.509,22,0 380 | 4,156,75,0,0,48.3,0.23800000000000002,32,1 381 | 0,93,100,39,72,43.4,1.021,35,0 382 | 1,107,72,30,82,30.8,0.821,24,0 383 | 0,105,68,22,0,20.0,0.23600000000000002,22,0 384 | 1,109,60,8,182,25.4,0.9470000000000001,21,0 385 | 1,90,62,18,59,25.1,1.268,25,0 386 | 1,125,70,24,110,24.3,0.221,25,0 387 | 1,119,54,13,50,22.3,0.205,24,0 388 | 5,116,74,29,0,32.3,0.66,35,1 389 | 8,105,100,36,0,43.3,0.239,45,1 390 | 5,144,82,26,285,32.0,0.452,58,1 391 | 3,100,68,23,81,31.6,0.9490000000000001,28,0 392 | 1,100,66,29,196,32.0,0.444,42,0 393 | 5,166,76,0,0,45.7,0.34,27,1 394 | 1,131,64,14,415,23.7,0.389,21,0 395 | 4,116,72,12,87,22.1,0.46299999999999997,37,0 396 | 4,158,78,0,0,32.9,0.8029999999999999,31,1 397 | 2,127,58,24,275,27.7,1.6,25,0 398 | 3,96,56,34,115,24.7,0.9440000000000001,39,0 399 | 0,131,66,40,0,34.3,0.196,22,1 400 | 3,82,70,0,0,21.1,0.389,25,0 401 | 3,193,70,31,0,34.9,0.24100000000000002,25,1 402 | 4,95,64,0,0,32.0,0.161,31,1 403 | 6,137,61,0,0,24.2,0.151,55,0 404 | 5,136,84,41,88,35.0,0.28600000000000003,35,1 405 | 9,72,78,25,0,31.6,0.28,38,0 406 | 5,168,64,0,0,32.9,0.135,41,1 407 | 2,123,48,32,165,42.1,0.52,26,0 408 | 4,115,72,0,0,28.9,0.376,46,1 409 | 0,101,62,0,0,21.9,0.336,25,0 410 | 8,197,74,0,0,25.9,1.1909999999999998,39,1 411 | 1,172,68,49,579,42.4,0.7020000000000001,28,1 412 | 6,102,90,39,0,35.7,0.674,28,0 413 | 1,112,72,30,176,34.4,0.528,25,0 414 | 1,143,84,23,310,42.4,1.0759999999999998,22,0 415 | 1,143,74,22,61,26.2,0.256,21,0 416 | 0,138,60,35,167,34.6,0.534,21,1 417 | 3,173,84,33,474,35.7,0.258,22,1 418 | 1,97,68,21,0,27.2,1.095,22,0 419 | 4,144,82,32,0,38.5,0.5539999999999999,37,1 420 | 1,83,68,0,0,18.2,0.624,27,0 421 | 3,129,64,29,115,26.4,0.21899999999999997,28,1 422 | 1,119,88,41,170,45.3,0.507,26,0 423 | 2,94,68,18,76,26.0,0.561,21,0 424 | 0,102,64,46,78,40.6,0.496,21,0 425 | 2,115,64,22,0,30.8,0.42100000000000004,21,0 426 | 8,151,78,32,210,42.9,0.516,36,1 427 | 4,184,78,39,277,37.0,0.264,31,1 428 | 0,94,0,0,0,0.0,0.256,25,0 429 | 1,181,64,30,180,34.1,0.32799999999999996,38,1 430 | 0,135,94,46,145,40.6,0.284,26,0 431 | 1,95,82,25,180,35.0,0.233,43,1 432 | 2,99,0,0,0,22.2,0.10800000000000001,23,0 433 | 3,89,74,16,85,30.4,0.551,38,0 434 | 1,80,74,11,60,30.0,0.527,22,0 435 | 2,139,75,0,0,25.6,0.16699999999999998,29,0 436 | 1,90,68,8,0,24.5,1.138,36,0 437 | 0,141,0,0,0,42.4,0.205,29,1 438 | 12,140,85,33,0,37.4,0.244,41,0 439 | 5,147,75,0,0,29.9,0.434,28,0 440 | 1,97,70,15,0,18.2,0.147,21,0 441 | 6,107,88,0,0,36.8,0.727,31,0 442 | 0,189,104,25,0,34.3,0.435,41,1 443 | 2,83,66,23,50,32.2,0.49700000000000005,22,0 444 | 4,117,64,27,120,33.2,0.23,24,0 445 | 8,108,70,0,0,30.5,0.955,33,1 446 | 4,117,62,12,0,29.7,0.38,30,1 447 | 0,180,78,63,14,59.4,2.42,25,1 448 | 1,100,72,12,70,25.3,0.6579999999999999,28,0 449 | 0,95,80,45,92,36.5,0.33,26,0 450 | 0,104,64,37,64,33.6,0.51,22,1 451 | 0,120,74,18,63,30.5,0.285,26,0 452 | 1,82,64,13,95,21.2,0.415,23,0 453 | 2,134,70,0,0,28.9,0.542,23,1 454 | 0,91,68,32,210,39.9,0.381,25,0 455 | 2,119,0,0,0,19.6,0.8320000000000001,72,0 456 | 2,100,54,28,105,37.8,0.498,24,0 457 | 14,175,62,30,0,33.6,0.212,38,1 458 | 1,135,54,0,0,26.7,0.687,62,0 459 | 5,86,68,28,71,30.2,0.364,24,0 460 | 10,148,84,48,237,37.6,1.001,51,1 461 | 9,134,74,33,60,25.9,0.46,81,0 462 | 9,120,72,22,56,20.8,0.733,48,0 463 | 1,71,62,0,0,21.8,0.41600000000000004,26,0 464 | 8,74,70,40,49,35.3,0.705,39,0 465 | 5,88,78,30,0,27.6,0.258,37,0 466 | 10,115,98,0,0,24.0,1.022,34,0 467 | 0,124,56,13,105,21.8,0.452,21,0 468 | 0,74,52,10,36,27.8,0.26899999999999996,22,0 469 | 0,97,64,36,100,36.8,0.6,25,0 470 | 8,120,0,0,0,30.0,0.183,38,1 471 | 6,154,78,41,140,46.1,0.5710000000000001,27,0 472 | 1,144,82,40,0,41.3,0.607,28,0 473 | 0,137,70,38,0,33.2,0.17,22,0 474 | 0,119,66,27,0,38.8,0.259,22,0 475 | 7,136,90,0,0,29.9,0.21,50,0 476 | 4,114,64,0,0,28.9,0.126,24,0 477 | 0,137,84,27,0,27.3,0.231,59,0 478 | 2,105,80,45,191,33.7,0.711,29,1 479 | 7,114,76,17,110,23.8,0.466,31,0 480 | 8,126,74,38,75,25.9,0.162,39,0 481 | 4,132,86,31,0,28.0,0.419,63,0 482 | 3,158,70,30,328,35.5,0.344,35,1 483 | 0,123,88,37,0,35.2,0.19699999999999998,29,0 484 | 4,85,58,22,49,27.8,0.306,28,0 485 | 0,84,82,31,125,38.2,0.233,23,0 486 | 0,145,0,0,0,44.2,0.63,31,1 487 | 0,135,68,42,250,42.3,0.365,24,1 488 | 1,139,62,41,480,40.7,0.536,21,0 489 | 0,173,78,32,265,46.5,1.159,58,0 490 | 4,99,72,17,0,25.6,0.294,28,0 491 | 8,194,80,0,0,26.1,0.551,67,0 492 | 2,83,65,28,66,36.8,0.629,24,0 493 | 2,89,90,30,0,33.5,0.292,42,0 494 | 4,99,68,38,0,32.8,0.145,33,0 495 | 4,125,70,18,122,28.9,1.1440000000000001,45,1 496 | 3,80,0,0,0,0.0,0.174,22,0 497 | 6,166,74,0,0,26.6,0.304,66,0 498 | 5,110,68,0,0,26.0,0.292,30,0 499 | 2,81,72,15,76,30.1,0.547,25,0 500 | 7,195,70,33,145,25.1,0.163,55,1 501 | 6,154,74,32,193,29.3,0.8390000000000001,39,0 502 | 2,117,90,19,71,25.2,0.313,21,0 503 | 3,84,72,32,0,37.2,0.267,28,0 504 | 6,0,68,41,0,39.0,0.727,41,1 505 | 7,94,64,25,79,33.3,0.738,41,0 506 | 3,96,78,39,0,37.3,0.23800000000000002,40,0 507 | 10,75,82,0,0,33.3,0.263,38,0 508 | 0,180,90,26,90,36.5,0.314,35,1 509 | 1,130,60,23,170,28.6,0.6920000000000001,21,0 510 | 2,84,50,23,76,30.4,0.968,21,0 511 | 8,120,78,0,0,25.0,0.409,64,0 512 | 12,84,72,31,0,29.7,0.297,46,1 513 | 0,139,62,17,210,22.1,0.207,21,0 514 | 9,91,68,0,0,24.2,0.2,58,0 515 | 2,91,62,0,0,27.3,0.525,22,0 516 | 3,99,54,19,86,25.6,0.154,24,0 517 | 3,163,70,18,105,31.6,0.268,28,1 518 | 9,145,88,34,165,30.3,0.7709999999999999,53,1 519 | 7,125,86,0,0,37.6,0.304,51,0 520 | 13,76,60,0,0,32.8,0.18,41,0 521 | 6,129,90,7,326,19.6,0.5820000000000001,60,0 522 | 2,68,70,32,66,25.0,0.187,25,0 523 | 3,124,80,33,130,33.2,0.305,26,0 524 | 6,114,0,0,0,0.0,0.18899999999999997,26,0 525 | 9,130,70,0,0,34.2,0.652,45,1 526 | 3,125,58,0,0,31.6,0.151,24,0 527 | 3,87,60,18,0,21.8,0.444,21,0 528 | 1,97,64,19,82,18.2,0.299,21,0 529 | 3,116,74,15,105,26.3,0.107,24,0 530 | 0,117,66,31,188,30.8,0.493,22,0 531 | 0,111,65,0,0,24.6,0.66,31,0 532 | 2,122,60,18,106,29.8,0.7170000000000001,22,0 533 | 0,107,76,0,0,45.3,0.6859999999999999,24,0 534 | 1,86,66,52,65,41.3,0.917,29,0 535 | 6,91,0,0,0,29.8,0.501,31,0 536 | 1,77,56,30,56,33.3,1.251,24,0 537 | 4,132,0,0,0,32.9,0.302,23,1 538 | 0,105,90,0,0,29.6,0.19699999999999998,46,0 539 | 0,57,60,0,0,21.7,0.735,67,0 540 | 0,127,80,37,210,36.3,0.804,23,0 541 | 3,129,92,49,155,36.4,0.968,32,1 542 | 8,100,74,40,215,39.4,0.6609999999999999,43,1 543 | 3,128,72,25,190,32.4,0.5489999999999999,27,1 544 | 10,90,85,32,0,34.9,0.825,56,1 545 | 4,84,90,23,56,39.5,0.159,25,0 546 | 1,88,78,29,76,32.0,0.365,29,0 547 | 8,186,90,35,225,34.5,0.423,37,1 548 | 5,187,76,27,207,43.6,1.034,53,1 549 | 4,131,68,21,166,33.1,0.16,28,0 550 | 1,164,82,43,67,32.8,0.341,50,0 551 | 4,189,110,31,0,28.5,0.68,37,0 552 | 1,116,70,28,0,27.4,0.204,21,0 553 | 3,84,68,30,106,31.9,0.591,25,0 554 | 6,114,88,0,0,27.8,0.247,66,0 555 | 1,88,62,24,44,29.9,0.42200000000000004,23,0 556 | 1,84,64,23,115,36.9,0.47100000000000003,28,0 557 | 7,124,70,33,215,25.5,0.161,37,0 558 | 1,97,70,40,0,38.1,0.218,30,0 559 | 8,110,76,0,0,27.8,0.237,58,0 560 | 11,103,68,40,0,46.2,0.126,42,0 561 | 11,85,74,0,0,30.1,0.3,35,0 562 | 6,125,76,0,0,33.8,0.121,54,1 563 | 0,198,66,32,274,41.3,0.502,28,1 564 | 1,87,68,34,77,37.6,0.401,24,0 565 | 6,99,60,19,54,26.9,0.49700000000000005,32,0 566 | 0,91,80,0,0,32.4,0.601,27,0 567 | 2,95,54,14,88,26.1,0.748,22,0 568 | 1,99,72,30,18,38.6,0.41200000000000003,21,0 569 | 6,92,62,32,126,32.0,0.085,46,0 570 | 4,154,72,29,126,31.3,0.33799999999999997,37,0 571 | 0,121,66,30,165,34.3,0.203,33,1 572 | 3,78,70,0,0,32.5,0.27,39,0 573 | 2,130,96,0,0,22.6,0.268,21,0 574 | 3,111,58,31,44,29.5,0.43,22,0 575 | 2,98,60,17,120,34.7,0.198,22,0 576 | 1,143,86,30,330,30.1,0.892,23,0 577 | 1,119,44,47,63,35.5,0.28,25,0 578 | 6,108,44,20,130,24.0,0.813,35,0 579 | 2,118,80,0,0,42.9,0.693,21,1 580 | 10,133,68,0,0,27.0,0.245,36,0 581 | 2,197,70,99,0,34.7,0.575,62,1 582 | 0,151,90,46,0,42.1,0.371,21,1 583 | 6,109,60,27,0,25.0,0.20600000000000002,27,0 584 | 12,121,78,17,0,26.5,0.259,62,0 585 | 8,100,76,0,0,38.7,0.19,42,0 586 | 8,124,76,24,600,28.7,0.687,52,1 587 | 1,93,56,11,0,22.5,0.41700000000000004,22,0 588 | 8,143,66,0,0,34.9,0.129,41,1 589 | 6,103,66,0,0,24.3,0.249,29,0 590 | 3,176,86,27,156,33.3,1.1540000000000001,52,1 591 | 0,73,0,0,0,21.1,0.342,25,0 592 | 11,111,84,40,0,46.8,0.925,45,1 593 | 2,112,78,50,140,39.4,0.175,24,0 594 | 3,132,80,0,0,34.4,0.402,44,1 595 | 2,82,52,22,115,28.5,1.699,25,0 596 | 6,123,72,45,230,33.6,0.733,34,0 597 | 0,188,82,14,185,32.0,0.682,22,1 598 | 0,67,76,0,0,45.3,0.19399999999999998,46,0 599 | 1,89,24,19,25,27.8,0.5589999999999999,21,0 600 | 1,173,74,0,0,36.8,0.08800000000000001,38,1 601 | 1,109,38,18,120,23.1,0.40700000000000003,26,0 602 | 1,108,88,19,0,27.1,0.4,24,0 603 | 6,96,0,0,0,23.7,0.19,28,0 604 | 1,124,74,36,0,27.8,0.1,30,0 605 | 7,150,78,29,126,35.2,0.6920000000000001,54,1 606 | 4,183,0,0,0,28.4,0.212,36,1 607 | 1,124,60,32,0,35.8,0.514,21,0 608 | 1,181,78,42,293,40.0,1.258,22,1 609 | 1,92,62,25,41,19.5,0.48200000000000004,25,0 610 | 0,152,82,39,272,41.5,0.27,27,0 611 | 1,111,62,13,182,24.0,0.138,23,0 612 | 3,106,54,21,158,30.9,0.292,24,0 613 | 3,174,58,22,194,32.9,0.593,36,1 614 | 7,168,88,42,321,38.2,0.787,40,1 615 | 6,105,80,28,0,32.5,0.878,26,0 616 | 11,138,74,26,144,36.1,0.557,50,1 617 | 3,106,72,0,0,25.8,0.207,27,0 618 | 6,117,96,0,0,28.7,0.157,30,0 619 | 2,68,62,13,15,20.1,0.257,23,0 620 | 9,112,82,24,0,28.2,1.2819999999999998,50,1 621 | 0,119,0,0,0,32.4,0.141,24,1 622 | 2,112,86,42,160,38.4,0.24600000000000002,28,0 623 | 2,92,76,20,0,24.2,1.6980000000000002,28,0 624 | 6,183,94,0,0,40.8,1.4609999999999999,45,0 625 | 0,94,70,27,115,43.5,0.34700000000000003,21,0 626 | 2,108,64,0,0,30.8,0.158,21,0 627 | 4,90,88,47,54,37.7,0.36200000000000004,29,0 628 | 0,125,68,0,0,24.7,0.20600000000000002,21,0 629 | 0,132,78,0,0,32.4,0.39299999999999996,21,0 630 | 5,128,80,0,0,34.6,0.14400000000000002,45,0 631 | 4,94,65,22,0,24.7,0.14800000000000002,21,0 632 | 7,114,64,0,0,27.4,0.732,34,1 633 | 0,102,78,40,90,34.5,0.23800000000000002,24,0 634 | 2,111,60,0,0,26.2,0.34299999999999997,23,0 635 | 1,128,82,17,183,27.5,0.115,22,0 636 | 10,92,62,0,0,25.9,0.16699999999999998,31,0 637 | 13,104,72,0,0,31.2,0.465,38,1 638 | 5,104,74,0,0,28.8,0.153,48,0 639 | 2,94,76,18,66,31.6,0.649,23,0 640 | 7,97,76,32,91,40.9,0.871,32,1 641 | 1,100,74,12,46,19.5,0.149,28,0 642 | 0,102,86,17,105,29.3,0.695,27,0 643 | 4,128,70,0,0,34.3,0.303,24,0 644 | 6,147,80,0,0,29.5,0.17800000000000002,50,1 645 | 4,90,0,0,0,28.0,0.61,31,0 646 | 3,103,72,30,152,27.6,0.73,27,0 647 | 2,157,74,35,440,39.4,0.134,30,0 648 | 1,167,74,17,144,23.4,0.447,33,1 649 | 0,179,50,36,159,37.8,0.455,22,1 650 | 11,136,84,35,130,28.3,0.26,42,1 651 | 0,107,60,25,0,26.4,0.133,23,0 652 | 1,91,54,25,100,25.2,0.23399999999999999,23,0 653 | 1,117,60,23,106,33.8,0.466,27,0 654 | 5,123,74,40,77,34.1,0.26899999999999996,28,0 655 | 2,120,54,0,0,26.8,0.455,27,0 656 | 1,106,70,28,135,34.2,0.142,22,0 657 | 2,155,52,27,540,38.7,0.24,25,1 658 | 2,101,58,35,90,21.8,0.155,22,0 659 | 1,120,80,48,200,38.9,1.162,41,0 660 | 11,127,106,0,0,39.0,0.19,51,0 661 | 3,80,82,31,70,34.2,1.2919999999999998,27,1 662 | 10,162,84,0,0,27.7,0.182,54,0 663 | 1,199,76,43,0,42.9,1.3940000000000001,22,1 664 | 8,167,106,46,231,37.6,0.165,43,1 665 | 9,145,80,46,130,37.9,0.637,40,1 666 | 6,115,60,39,0,33.7,0.245,40,1 667 | 1,112,80,45,132,34.8,0.217,24,0 668 | 4,145,82,18,0,32.5,0.235,70,1 669 | 10,111,70,27,0,27.5,0.141,40,1 670 | 6,98,58,33,190,34.0,0.43,43,0 671 | 9,154,78,30,100,30.9,0.16399999999999998,45,0 672 | 6,165,68,26,168,33.6,0.631,49,0 673 | 1,99,58,10,0,25.4,0.551,21,0 674 | 10,68,106,23,49,35.5,0.285,47,0 675 | 3,123,100,35,240,57.3,0.88,22,0 676 | 8,91,82,0,0,35.6,0.5870000000000001,68,0 677 | 6,195,70,0,0,30.9,0.32799999999999996,31,1 678 | 9,156,86,0,0,24.8,0.23,53,1 679 | 0,93,60,0,0,35.3,0.263,25,0 680 | 3,121,52,0,0,36.0,0.127,25,1 681 | 2,101,58,17,265,24.2,0.614,23,0 682 | 2,56,56,28,45,24.2,0.332,22,0 683 | 0,162,76,36,0,49.6,0.364,26,1 684 | 0,95,64,39,105,44.6,0.366,22,0 685 | 4,125,80,0,0,32.3,0.536,27,1 686 | 5,136,82,0,0,0.0,0.64,69,0 687 | 2,129,74,26,205,33.2,0.591,25,0 688 | 3,130,64,0,0,23.1,0.314,22,0 689 | 1,107,50,19,0,28.3,0.18100000000000002,29,0 690 | 1,140,74,26,180,24.1,0.828,23,0 691 | 1,144,82,46,180,46.1,0.335,46,1 692 | 8,107,80,0,0,24.6,0.856,34,0 693 | 13,158,114,0,0,42.3,0.257,44,1 694 | 2,121,70,32,95,39.1,0.8859999999999999,23,0 695 | 7,129,68,49,125,38.5,0.439,43,1 696 | 2,90,60,0,0,23.5,0.191,25,0 697 | 7,142,90,24,480,30.4,0.128,43,1 698 | 3,169,74,19,125,29.9,0.268,31,1 699 | 0,99,0,0,0,25.0,0.253,22,0 700 | 4,127,88,11,155,34.5,0.598,28,0 701 | 4,118,70,0,0,44.5,0.904,26,0 702 | 2,122,76,27,200,35.9,0.483,26,0 703 | 6,125,78,31,0,27.6,0.565,49,1 704 | 1,168,88,29,0,35.0,0.905,52,1 705 | 2,129,0,0,0,38.5,0.304,41,0 706 | 4,110,76,20,100,28.4,0.11800000000000001,27,0 707 | 6,80,80,36,0,39.8,0.177,28,0 708 | 10,115,0,0,0,0.0,0.261,30,1 709 | 2,127,46,21,335,34.4,0.17600000000000002,22,0 710 | 9,164,78,0,0,32.8,0.14800000000000002,45,1 711 | 2,93,64,32,160,38.0,0.674,23,1 712 | 3,158,64,13,387,31.2,0.295,24,0 713 | 5,126,78,27,22,29.6,0.439,40,0 714 | 10,129,62,36,0,41.2,0.441,38,1 715 | 0,134,58,20,291,26.4,0.35200000000000004,21,0 716 | 3,102,74,0,0,29.5,0.121,32,0 717 | 7,187,50,33,392,33.9,0.826,34,1 718 | 3,173,78,39,185,33.8,0.97,31,1 719 | 10,94,72,18,0,23.1,0.595,56,0 720 | 1,108,60,46,178,35.5,0.415,24,0 721 | 5,97,76,27,0,35.6,0.37799999999999995,52,1 722 | 4,83,86,19,0,29.3,0.317,34,0 723 | 1,114,66,36,200,38.1,0.289,21,0 724 | 1,149,68,29,127,29.3,0.349,42,1 725 | 5,117,86,30,105,39.1,0.251,42,0 726 | 1,111,94,0,0,32.8,0.265,45,0 727 | 4,112,78,40,0,39.4,0.23600000000000002,38,0 728 | 1,116,78,29,180,36.1,0.496,25,0 729 | 0,141,84,26,0,32.4,0.433,22,0 730 | 2,175,88,0,0,22.9,0.326,22,0 731 | 2,92,52,0,0,30.1,0.141,22,0 732 | 3,130,78,23,79,28.4,0.32299999999999995,34,1 733 | 8,120,86,0,0,28.4,0.259,22,1 734 | 2,174,88,37,120,44.5,0.6459999999999999,24,1 735 | 2,106,56,27,165,29.0,0.426,22,0 736 | 2,105,75,0,0,23.3,0.56,53,0 737 | 4,95,60,32,0,35.4,0.284,28,0 738 | 0,126,86,27,120,27.4,0.515,21,0 739 | 8,65,72,23,0,32.0,0.6,42,0 740 | 2,99,60,17,160,36.6,0.45299999999999996,21,0 741 | 1,102,74,0,0,39.5,0.293,42,1 742 | 11,120,80,37,150,42.3,0.785,48,1 743 | 3,102,44,20,94,30.8,0.4,26,0 744 | 1,109,58,18,116,28.5,0.21899999999999997,22,0 745 | 9,140,94,0,0,32.7,0.7340000000000001,45,1 746 | 13,153,88,37,140,40.6,1.1740000000000002,39,0 747 | 12,100,84,33,105,30.0,0.488,46,0 748 | 1,147,94,41,0,49.3,0.358,27,1 749 | 1,81,74,41,57,46.3,1.0959999999999999,32,0 750 | 3,187,70,22,200,36.4,0.408,36,1 751 | 6,162,62,0,0,24.3,0.17800000000000002,50,1 752 | 4,136,70,0,0,31.2,1.182,22,1 753 | 1,121,78,39,74,39.0,0.261,28,0 754 | 3,108,62,24,0,26.0,0.223,25,0 755 | 0,181,88,44,510,43.3,0.222,26,1 756 | 8,154,78,32,0,32.4,0.44299999999999995,45,1 757 | 1,128,88,39,110,36.5,1.057,37,1 758 | 7,137,90,41,0,32.0,0.391,39,0 759 | 0,123,72,0,0,36.3,0.258,52,1 760 | 1,106,76,0,0,37.5,0.19699999999999998,26,0 761 | 6,190,92,0,0,35.5,0.278,66,1 762 | 2,88,58,26,16,28.4,0.7659999999999999,22,0 763 | 9,170,74,31,0,44.0,0.40299999999999997,43,1 764 | 9,89,62,0,0,22.5,0.142,33,0 765 | 10,101,76,48,180,32.9,0.171,63,0 766 | 2,122,70,27,0,36.8,0.34,27,0 767 | 5,121,72,23,112,26.2,0.245,30,0 768 | 1,126,60,0,0,30.1,0.349,47,1 769 | 1,93,70,31,0,30.4,0.315,23,0 770 | -------------------------------------------------------------------------------- /Principal Component Analysis/dimensionality reduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2020-04-17T16:53:43.976333Z", 19 | "start_time": "2020-04-17T16:53:40.941030Z" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import numpy as np\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "from sklearn.datasets import load_iris\n", 27 | "import pandas as pd\n", 28 | "from sklearn.model_selection import train_test_split\n", 29 | "from sklearn.linear_model import LogisticRegression\n", 30 | "from sklearn.metrics import accuracy_score" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": { 37 | "ExecuteTime": { 38 | "end_time": "2020-04-17T16:53:43.992027Z", 39 | "start_time": "2020-04-17T16:53:43.981690Z" 40 | } 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "import warnings\n", 45 | "warnings.filterwarnings(\"ignore\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "# Dimensionality reduction Using PCA" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2020-04-17T16:53:44.209748Z", 61 | "start_time": "2020-04-17T16:53:43.997649Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "class PCA:\n", 67 | " \n", 68 | " def __init__(self, n_component, solver=\"svd\"):\n", 69 | " self.n_component = n_component\n", 70 | " self.solver=solver\n", 71 | " self.components = None\n", 72 | " self.mean = None\n", 73 | " \n", 74 | " \n", 75 | " def fit(self, X):\n", 76 | " self.mean = X.mean(axis=0)\n", 77 | " X = X - self.mean\n", 78 | " \n", 79 | " # expects row=feature, column=sample \n", 80 | " # cov = np.cov(X.T)\n", 81 | " cov = (X - X.mean(axis=0)).T.dot(X - X.mean(axis=0)) / (X.shape[0] - 1)\n", 82 | " \n", 83 | " \n", 84 | " if self.solver == \"eig\":\n", 85 | " # eigenvalue[i] -> eigenvector[:, i]\n", 86 | " eigenvalues, eigenvectors = np.linalg.eig(cov)\n", 87 | " \n", 88 | " eigenvectors = eigenvectors.T\n", 89 | " \n", 90 | " idxs = np.argsort(eigenvalues)[::-1]\n", 91 | " eigenvalues = eigenvalues[idxs]\n", 92 | " eigenvectors = eigenvectors[idxs]\n", 93 | " \n", 94 | " self.components = eigenvectors[0:self.n_component]\n", 95 | " \n", 96 | " \n", 97 | " else: \n", 98 | " # SVD\n", 99 | " _, S, Vt = np.linalg.svd(X)\n", 100 | " idxs = np.argsort(S)[::-1]\n", 101 | " \n", 102 | " S = S[idxs]\n", 103 | " Vt = Vt[idxs]\n", 104 | " \n", 105 | " self.components = Vt[0:self.n_component]\n", 106 | " \n", 107 | " \n", 108 | " def transform(self, X):\n", 109 | " X = X - self.mean\n", 110 | " return np.dot(X, self.components.T)\n", 111 | " \n", 112 | " def fit_transform(self, X):\n", 113 | " self.fit(X)\n", 114 | " return self.transform(X)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "# Feature Extraction on Diabetes dataset" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "metadata": { 128 | "ExecuteTime": { 129 | "end_time": "2020-04-17T16:53:44.419215Z", 130 | "start_time": "2020-04-17T16:53:44.218687Z" 131 | } 132 | }, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/html": [ 137 | "
\n", 138 | "\n", 151 | "\n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", 229 | "
" 230 | ], 231 | "text/plain": [ 232 | " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", 233 | "0 6 148 72 35 0 33.6 \n", 234 | "1 1 85 66 29 0 26.6 \n", 235 | "2 8 183 64 0 0 23.3 \n", 236 | "3 1 89 66 23 94 28.1 \n", 237 | "4 0 137 40 35 168 43.1 \n", 238 | "\n", 239 | " DiabetesPedigreeFunction Age Outcome \n", 240 | "0 0.627 50 1 \n", 241 | "1 0.351 31 0 \n", 242 | "2 0.672 32 1 \n", 243 | "3 0.167 21 0 \n", 244 | "4 2.288 33 1 " 245 | ] 246 | }, 247 | "execution_count": 4, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "diabetes_data = pd.read_csv(r'../datasets/diabetes_data.csv')\n", 254 | "diabetes_data.head()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 5, 260 | "metadata": { 261 | "ExecuteTime": { 262 | "end_time": "2020-04-17T16:53:44.499226Z", 263 | "start_time": "2020-04-17T16:53:44.428118Z" 264 | } 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "X = diabetes_data[diabetes_data.columns[:-1]].values\n", 269 | "y = diabetes_data[diabetes_data.columns[-1]].values" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": { 275 | "ExecuteTime": { 276 | "end_time": "2020-04-17T16:43:53.350520Z", 277 | "start_time": "2020-04-17T16:43:53.342122Z" 278 | } 279 | }, 280 | "source": [ 281 | "## Accuracy Before applying PCA" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 6, 287 | "metadata": { 288 | "ExecuteTime": { 289 | "end_time": "2020-04-17T16:53:44.652486Z", 290 | "start_time": "2020-04-17T16:53:44.507580Z" 291 | } 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 7, 301 | "metadata": { 302 | "ExecuteTime": { 303 | "end_time": "2020-04-17T16:53:44.948070Z", 304 | "start_time": "2020-04-17T16:53:44.666777Z" 305 | } 306 | }, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "Before feature extraction\n", 313 | "Number of features of X: 8\n", 314 | "Accuracy: 0.7142857142857143\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "# using Logistic Regression\n", 320 | "\n", 321 | "lr = LogisticRegression()\n", 322 | "lr.fit(X_train, y_train)\n", 323 | "y_pred = lr.predict(X_test)\n", 324 | "print(\"Before feature extraction\")\n", 325 | "print(f\"Number of features of X: {X_train.shape[1]}\")\n", 326 | "print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "## Accuracy After applying PCA" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 8, 339 | "metadata": { 340 | "ExecuteTime": { 341 | "end_time": "2020-04-17T16:53:45.049918Z", 342 | "start_time": "2020-04-17T16:53:44.960489Z" 343 | } 344 | }, 345 | "outputs": [], 346 | "source": [ 347 | "# Transforming Dataset\n", 348 | "\n", 349 | "pca = PCA(n_component=6)\n", 350 | "X_transformed = pca.fit_transform(X)\n", 351 | "X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, stratify=y, random_state=42)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 9, 357 | "metadata": { 358 | "ExecuteTime": { 359 | "end_time": "2020-04-17T16:53:45.162775Z", 360 | "start_time": "2020-04-17T16:53:45.049918Z" 361 | } 362 | }, 363 | "outputs": [ 364 | { 365 | "name": "stdout", 366 | "output_type": "stream", 367 | "text": [ 368 | "After feature extraction\n", 369 | "Number of features of X: 6\n", 370 | "Accuracy: 0.7337662337662337\n" 371 | ] 372 | } 373 | ], 374 | "source": [ 375 | "lr = LogisticRegression()\n", 376 | "lr.fit(X_train, y_train)\n", 377 | "y_pred = lr.predict(X_test)\n", 378 | "print(\"After feature extraction\")\n", 379 | "print(f\"Number of features of X: {X_train.shape[1]}\")\n", 380 | "print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": { 386 | "ExecuteTime": { 387 | "end_time": "2020-04-17T16:53:17.733258Z", 388 | "start_time": "2020-04-17T16:53:17.726273Z" 389 | } 390 | }, 391 | "source": [ 392 | "# On Mnist Dataset\n", 393 | "\n", 394 | "Reducing number of features for visualization" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 10, 400 | "metadata": { 401 | "ExecuteTime": { 402 | "end_time": "2020-04-17T16:53:45.271370Z", 403 | "start_time": "2020-04-17T16:53:45.165615Z" 404 | } 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "data = load_iris()\n", 409 | "X = data.data\n", 410 | "y = data.target" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 11, 416 | "metadata": { 417 | "ExecuteTime": { 418 | "end_time": "2020-04-17T16:53:45.397396Z", 419 | "start_time": "2020-04-17T16:53:45.276300Z" 420 | } 421 | }, 422 | "outputs": [], 423 | "source": [ 424 | "pca = PCA(n_component=2)\n", 425 | "X_projected = pca.fit_transform(X)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 12, 431 | "metadata": { 432 | "ExecuteTime": { 433 | "end_time": "2020-04-17T16:53:45.527547Z", 434 | "start_time": "2020-04-17T16:53:45.404998Z" 435 | } 436 | }, 437 | "outputs": [ 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "Shape of X: (150, 4)\n", 443 | "Shape of transformed X: (150, 2)\n" 444 | ] 445 | } 446 | ], 447 | "source": [ 448 | "print(f\"Shape of X: {X.shape}\")\n", 449 | "print(f\"Shape of transformed X: {X_projected.shape}\")" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 13, 455 | "metadata": { 456 | "ExecuteTime": { 457 | "end_time": "2020-04-17T16:53:45.655688Z", 458 | "start_time": "2020-04-17T16:53:45.537553Z" 459 | } 460 | }, 461 | "outputs": [], 462 | "source": [ 463 | "x1 = X_projected[:, 0]\n", 464 | "x2 = X_projected[:, 1]" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 14, 470 | "metadata": { 471 | "ExecuteTime": { 472 | "end_time": "2020-04-17T16:53:46.104685Z", 473 | "start_time": "2020-04-17T16:53:45.665223Z" 474 | } 475 | }, 476 | "outputs": [ 477 | { 478 | "data": { 479 | "text/plain": [ 480 | "(150, 3)" 481 | ] 482 | }, 483 | "execution_count": 14, 484 | "metadata": {}, 485 | "output_type": "execute_result" 486 | } 487 | ], 488 | "source": [ 489 | "X_trans = np.c_[x1, x2, y]\n", 490 | "X_trans.shape" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 15, 496 | "metadata": { 497 | "ExecuteTime": { 498 | "end_time": "2020-04-17T16:53:47.087110Z", 499 | "start_time": "2020-04-17T16:53:46.115345Z" 500 | } 501 | }, 502 | "outputs": [ 503 | { 504 | "data": { 505 | "image/png": "\n", 506 | "text/plain": [ 507 | "
" 508 | ] 509 | }, 510 | "metadata": { 511 | "needs_background": "light" 512 | }, 513 | "output_type": "display_data" 514 | } 515 | ], 516 | "source": [ 517 | "colors = [\"red\", \"blue\", \"green\"]\n", 518 | "for i in range(3):\n", 519 | " plt.scatter(X_trans[X_trans[:, 2] == i][:, 0], X_trans[X_trans[:, 2] == i][:, 1], c=colors[i], \n", 520 | " edgecolors=\"k\", alpha=0.7, label=data.target_names[i])\n", 521 | "\n", 522 | "plt.xlabel(\"Principal Component 1\")\n", 523 | "plt.ylabel(\"Principal Component 2\")\n", 524 | "plt.legend()\n", 525 | "plt.show()" 526 | ] 527 | } 528 | ], 529 | "metadata": { 530 | "kernelspec": { 531 | "display_name": "Python 3", 532 | "language": "python", 533 | "name": "python3" 534 | }, 535 | "language_info": { 536 | "codemirror_mode": { 537 | "name": "ipython", 538 | "version": 3 539 | }, 540 | "file_extension": ".py", 541 | "mimetype": "text/x-python", 542 | "name": "python", 543 | "nbconvert_exporter": "python", 544 | "pygments_lexer": "ipython3", 545 | "version": "3.7.6" 546 | }, 547 | "toc": { 548 | "base_numbering": 1, 549 | "nav_menu": {}, 550 | "number_sections": true, 551 | "sideBar": true, 552 | "skip_h1_title": false, 553 | "title_cell": "Table of Contents", 554 | "title_sidebar": "Contents", 555 | "toc_cell": true, 556 | "toc_position": {}, 557 | "toc_section_display": true, 558 | "toc_window_display": false 559 | }, 560 | "varInspector": { 561 | "cols": { 562 | "lenName": 16, 563 | "lenType": 16, 564 | "lenVar": 40 565 | }, 566 | "kernels_config": { 567 | "python": { 568 | "delete_cmd_postfix": "", 569 | "delete_cmd_prefix": "del ", 570 | "library": "var_list.py", 571 | "varRefreshCmd": "print(var_dic_list())" 572 | }, 573 | "r": { 574 | "delete_cmd_postfix": ") ", 575 | "delete_cmd_prefix": "rm(", 576 | "library": "var_list.r", 577 | "varRefreshCmd": "cat(var_dic_list()) " 578 | } 579 | }, 580 | "types_to_exclude": [ 581 | "module", 582 | "function", 583 | "builtin_function_or_method", 584 | "instance", 585 | "_Feature" 586 | ], 587 | "window_display": false 588 | } 589 | }, 590 | "nbformat": 4, 591 | "nbformat_minor": 4 592 | } 593 | -------------------------------------------------------------------------------- /K-Nearest Neigbors/KNN_weighted_classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2020-02-16T23:07:58.179814Z", 9 | "start_time": "2020-02-16T23:07:56.458785Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import pandas as pd\n", 15 | "import numpy as np\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "from sklearn.model_selection import StratifiedShuffleSplit\n", 18 | "import operator" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "ExecuteTime": { 26 | "end_time": "2020-02-16T23:07:58.195684Z", 27 | "start_time": "2020-02-16T23:07:58.185150Z" 28 | } 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "def getDataset(loc):\n", 33 | " columns = ['sepal_length', 'sepal_width','petal_length','petal_width', 'class']\n", 34 | " data = pd.read_csv(loc, header=None, names=columns)\n", 35 | " return data" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": { 42 | "ExecuteTime": { 43 | "end_time": "2020-02-16T23:07:58.447880Z", 44 | "start_time": "2020-02-16T23:07:58.201359Z" 45 | } 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "def splitDataset(dataset, ratio):\n", 50 | " split = StratifiedShuffleSplit(n_splits=1, test_size=ratio, random_state=42)\n", 51 | " \n", 52 | " for train_index, test_index in split.split(dataset, dataset['class']):\n", 53 | " train_data = dataset.loc[train_index]\n", 54 | " test_data = dataset.loc[test_index]\n", 55 | " \n", 56 | " \n", 57 | " return train_data, test_data" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": { 64 | "ExecuteTime": { 65 | "end_time": "2020-02-16T23:07:58.630056Z", 66 | "start_time": "2020-02-16T23:07:58.450499Z" 67 | } 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "def euclideanDistance(instance1, instance2):\n", 72 | " instance1 = np.array(instance1)\n", 73 | " instance2 = np.array(instance2)\n", 74 | " distance = np.sum(np.power(instance1 - instance2, 2))\n", 75 | " return np.sqrt(distance)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": { 82 | "ExecuteTime": { 83 | "end_time": "2020-02-16T23:07:58.768916Z", 84 | "start_time": "2020-02-16T23:07:58.632827Z" 85 | } 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# using pandas indexing methods\n", 90 | "\n", 91 | "def getKNeighbors(training_set, test_instance, k):\n", 92 | " distances = []\n", 93 | " classes = training_set['class'].unique()\n", 94 | " for row in range(len(training_set)):\n", 95 | " dist = euclideanDistance(training_set.iloc[row].values[:-1], test_instance[:-1])\n", 96 | " \n", 97 | " distances.append((training_set.iloc[row]['class'], dist))\n", 98 | "\n", 99 | " distances = sorted(distances, key=operator.itemgetter(1))[:k]\n", 100 | " inv_class_freq = {x:0 for x in classes}\n", 101 | " \n", 102 | " for cls, dist in distances:\n", 103 | " inv_class_freq[cls] += (1 / dist)\n", 104 | "\n", 105 | " return inv_class_freq" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": { 112 | "ExecuteTime": { 113 | "end_time": "2020-02-16T23:07:58.941440Z", 114 | "start_time": "2020-02-16T23:07:58.771905Z" 115 | } 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "# faster cause turning the data into a list of lists\n", 120 | "# as the dataset is smaller in size\n", 121 | "\n", 122 | "def getKNeighbors(training_set, test_instance, k):\n", 123 | " distances = []\n", 124 | " classes = training_set['class'].unique()\n", 125 | " \n", 126 | " training_set = training_set.values\n", 127 | " \n", 128 | " for row in range(len(training_set)):\n", 129 | " dist = euclideanDistance(training_set[row][:-1], test_instance[:-1])\n", 130 | " \n", 131 | " distances.append((training_set[row][-1], dist))\n", 132 | " distances = sorted(distances, key=operator.itemgetter(1))[:k]\n", 133 | " inv_class_freq = {x:0 for x in classes}\n", 134 | " \n", 135 | " for cls, dist in distances:\n", 136 | " inv_class_freq[cls] += (1 / dist)\n", 137 | "\n", 138 | " return inv_class_freq" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "metadata": { 145 | "ExecuteTime": { 146 | "end_time": "2020-02-16T23:07:59.117381Z", 147 | "start_time": "2020-02-16T23:07:58.951616Z" 148 | } 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "def getResponse(inv_freq):\n", 153 | " predicted_class = max(inv_freq, key=inv_freq.get)\n", 154 | " return predicted_class" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 8, 160 | "metadata": { 161 | "ExecuteTime": { 162 | "end_time": "2020-02-16T23:07:59.266774Z", 163 | "start_time": "2020-02-16T23:07:59.123520Z" 164 | } 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "def getAccuracy(testSet, predictions):\n", 169 | " correct = 0\n", 170 | " for x in range(len(testSet)):\n", 171 | " if testSet.iloc[x]['class'] == predictions[x]:\n", 172 | " correct += 1\n", 173 | " return (correct/len(testSet)) * 100.0" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "metadata": { 180 | "ExecuteTime": { 181 | "end_time": "2020-02-16T23:08:05.002270Z", 182 | "start_time": "2020-02-16T23:07:59.271965Z" 183 | } 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "if __name__ == '__main__':\n", 188 | " # get data\n", 189 | " data = getDataset(r'../datasets/iris_data.csv')\n", 190 | " \n", 191 | " # split data into stratified subsets\n", 192 | " ratio = 0.2\n", 193 | " trainingSet, testSet = splitDataset(data, ratio)\n", 194 | "\n", 195 | " trainingSet = trainingSet.reset_index(drop=True)\n", 196 | " testSet = testSet.reset_index(drop=True)\n", 197 | " accuracy_scores = []\n", 198 | " # generate predictions\n", 199 | " for k in range(1, 31):\n", 200 | " predictions = []\n", 201 | " for row in range(len(testSet)):\n", 202 | " inv_freq = getKNeighbors(trainingSet, testSet.iloc[row].values, k)\n", 203 | " result = getResponse(inv_freq)\n", 204 | " predictions.append(result)\n", 205 | "\n", 206 | " accuracy = getAccuracy(testSet, predictions)\n", 207 | " accuracy_scores.append(accuracy)\n", 208 | "# print(f'k: {k}, Accuracy: {round(accuracy,3)}%')" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 10, 214 | "metadata": { 215 | "ExecuteTime": { 216 | "end_time": "2020-02-16T23:08:05.817093Z", 217 | "start_time": "2020-02-16T23:08:05.006180Z" 218 | } 219 | }, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "" 225 | ] 226 | }, 227 | "execution_count": 10, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | }, 231 | { 232 | "data": { 233 | "image/png": "\n", 234 | "text/plain": [ 235 | "
" 236 | ] 237 | }, 238 | "metadata": { 239 | "needs_background": "light" 240 | }, 241 | "output_type": "display_data" 242 | } 243 | ], 244 | "source": [ 245 | "plt.figure(figsize=(12, 10))\n", 246 | "plt.plot(list(range(1, 31)), accuracy_scores)\n", 247 | "plt.scatter(list(range(1, 31)), accuracy_scores, label=\"Accuracy\")\n", 248 | "plt.title('K vs Accuracy')\n", 249 | "plt.xticks(ticks=range(1, 31))\n", 250 | "plt.xlabel('K')\n", 251 | "plt.ylabel('Accuracy', rotation=0)\n", 252 | "plt.legend()" 253 | ] 254 | } 255 | ], 256 | "metadata": { 257 | "kernelspec": { 258 | "display_name": "Python 3", 259 | "language": "python", 260 | "name": "python3" 261 | }, 262 | "language_info": { 263 | "codemirror_mode": { 264 | "name": "ipython", 265 | "version": 3 266 | }, 267 | "file_extension": ".py", 268 | "mimetype": "text/x-python", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "pygments_lexer": "ipython3", 272 | "version": "3.7.6" 273 | }, 274 | "toc": { 275 | "base_numbering": 1, 276 | "nav_menu": {}, 277 | "number_sections": true, 278 | "sideBar": true, 279 | "skip_h1_title": false, 280 | "title_cell": "Table of Contents", 281 | "title_sidebar": "Contents", 282 | "toc_cell": false, 283 | "toc_position": {}, 284 | "toc_section_display": true, 285 | "toc_window_display": false 286 | }, 287 | "varInspector": { 288 | "cols": { 289 | "lenName": 16, 290 | "lenType": 16, 291 | "lenVar": 40 292 | }, 293 | "kernels_config": { 294 | "python": { 295 | "delete_cmd_postfix": "", 296 | "delete_cmd_prefix": "del ", 297 | "library": "var_list.py", 298 | "varRefreshCmd": "print(var_dic_list())" 299 | }, 300 | "r": { 301 | "delete_cmd_postfix": ") ", 302 | "delete_cmd_prefix": "rm(", 303 | "library": "var_list.r", 304 | "varRefreshCmd": "cat(var_dic_list()) " 305 | } 306 | }, 307 | "types_to_exclude": [ 308 | "module", 309 | "function", 310 | "builtin_function_or_method", 311 | "instance", 312 | "_Feature" 313 | ], 314 | "window_display": false 315 | } 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 2 319 | } 320 | --------------------------------------------------------------------------------