├── .gitattributes
├── logo.png
├── requirements.txt
├── datasets
├── voicegender.zip
├── iris.data
├── iris_data.csv
└── diabetes_data.csv
├── Logistic Regression
└── README.md
├── Apriori algorithm
├── README.md
└── apriori_algorithm.py
├── Naive Bayes
├── README.md
└── Naive Bayes.ipynb
├── Decision Trees
└── README.md
├── K-Nearest Neigbors
├── README.md
├── KNN-Classifier.ipynb
└── KNN_weighted_classification.ipynb
├── .gitignore
├── Linear Regression
└── README.md
├── Principal Component Analysis
├── README.md
└── dimensionality reduction.ipynb
├── Random Forest
├── decision_tree.py
└── random_forest.ipynb
├── K-means
└── Spiral.txt
└── README.md
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/veb-101/Machine-Learning-Algorithms/HEAD/logo.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pip
2 | numpy
3 | pandas
4 | scipy
5 | scikit-learn
6 | matplotlib
7 | seaborn
8 |
--------------------------------------------------------------------------------
/datasets/voicegender.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/veb-101/Machine-Learning-Algorithms/HEAD/datasets/voicegender.zip
--------------------------------------------------------------------------------
/Logistic Regression/README.md:
--------------------------------------------------------------------------------
1 | ## Logistic Regression
2 | _______________
3 |
4 | * Articles Used
5 | * [x] [Logistic Regression - YouTube](https://www.youtube.com/playlist?list=PLblh5JKOoLUKxzEP5HA2d-Li7IJkHfXSe)
6 | * [x] [TLM | Logistic Regression](https://www.thelearningmachine.ai/logistic)
7 | * [x] [Logistic regression - Wikipedia](https://en.wikipedia.org/wiki/Logistic_regression)
8 | * [x] [Maximum likelihood and gradient descent demonstration – Zlatan Kremonic](https://zlatankr.github.io/posts/2017/03/06/mle-gradient-descent)
9 | * [x] [An Introduction to Logistic Regression - Towards Data Science](https://towardsdatascience.com/an-introduction-to-logistic-regression-8136ad65da2e)
10 | * [x] [A Gentle Introduction to Logistic Regression With Maximum Likelihood Estimation](https://machinelearningmastery.com/logistic-regression-with-maximum-likelihood-estimation/)
11 | * [x] [Logistic model - Maximum likelihood](https://www.statlect.com/fundamentals-of-statistics/logistic-model-maximum-likelihood)
12 |
--------------------------------------------------------------------------------
/Apriori algorithm/README.md:
--------------------------------------------------------------------------------
1 | # Apriori Algorithm (Association Rule Mining)
2 |
3 | --------
4 |
5 |
6 | * Articles/videos used:
7 | * [x] [Apriori Algorithm : Know How to Find Frequent Itemsets | Edureka](https://www.edureka.co/blog/apriori-algorithm/)
8 | * [x] [Apriori Algorithm Explained | Association Rule Mining | Finding Frequent Itemset | Edureka - YouTube](https://www.youtube.com/watch?v=guVvtZ7ZClw)
9 | * [x] [Apriori Algorithm (Associated Learning) - Fun and Easy Machine Learning - YouTube](https://www.youtube.com/watch?v=WGlMlS_Yydk)
10 | * [x] [Apriori Algorithm - GeeksforGeeks](https://www.geeksforgeeks.org/apriori-algorithm/?ref=lbp)
11 | * [x] [Apriori Algorithm - GeeksforGeeks](https://www.geeksforgeeks.org/apriori-algorithm/)
12 | * [x] [Implementing Apriori algorithm in Python - GeeksforGeeks](https://www.geeksforgeeks.org/implementing-apriori-algorithm-in-python/)
13 | * [x] [Association Rule Mining via Apriori Algorithm in Python](https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/)
14 | * [ ] [Apriori Algorithm in Python - CodeSpeedy](https://www.codespeedy.com/apriori-algorithm-in-python/)
15 | * [ ] [Data Science Apriori Algorithm in Python- Market Basket Analysis - Intellipaat](https://intellipaat.com/blog/data-science-apriori-algorithm/)
16 | * [ ] [Apriori Algorithm from Scratch - Python](http://www.vucreations.com/articles/apriori-algorithm-from-scratch-Python.html)
17 |
18 |
--------------------------------------------------------------------------------
/Naive Bayes/README.md:
--------------------------------------------------------------------------------
1 | # Gaussian Naive Bayes
2 |
3 | ---
4 |
5 | - Articles used
6 | - [x] [3blue1brown- Bayes theorem, and making probability intuitive](https://www.youtube.com/watch?v=HZGCoVF3YvM)
7 | - [x] [3blue1brown- The quick proof of Bayes' theorem](https://www.youtube.com/watch?v=U_85TaXbeIo)
8 | - [x] [Luis Serrano - Naive Bayes classifier: A friendly approach](https://www.youtube.com/watch?v=Q8l0Vip5YUw)
9 | - [x] [Andrew Ng Naive Bayes Generative Learning Algorithms](https://www.youtube.com/watch?v=z5UQyCESW64)
10 | - [x] [Andrew Ng Naive Bayes Text Clasification](https://www.youtube.com/watch?v=NFd0ZQk5bR4)
11 | - [x] [Brandon Rohrer - How Bayes Theorem works](https://www.youtube.com/watch?v=5NMxiOGL39M)
12 | - [x] [Naive Bayes Classifiers](https://www.geeksforgeeks.org/naive-bayes-classifiers)
13 | - [x] [In Depth: Naive Bayes Classification](https://jakevdp.github.io/PythonDataScienceHandbook/05.05-naive-bayes.html)
14 | - [x] [Naive Bayes Classifier in Python | Naive Bayes Algorithm | Machine Learning Algorithm | Edureka](https://www.youtube.com/watch?v=vz_xuxYS2PM&t=11s)
15 | - [x] [How to Develop a Naive Bayes Classifier from Scratch in Python](https://machinelearningmastery.com/classification-as-conditional-probability-and-the-naive-bayes-algorithm//)
16 | - [x] [Naive Bayes Classifier From Scratch](https://chrisalbon.com/machine_learning/naive_bayes/naive_bayes_classifier_from_scratch/)
17 | - [x] [kDnuggets - Naive Bayes from Scratch using Python only – No Fancy Frameworks](https://www.kdnuggets.com/2018/10/naive-bayes-from-scratch-python.html)
18 |
--------------------------------------------------------------------------------
/Decision Trees/README.md:
--------------------------------------------------------------------------------
1 | ## Decision Tree Classifier
2 | _______________
3 |
4 | * Articles/videos read and used
5 | * [x] [StatQuest: Decision Trees](https://www.youtube.com/watch?v=7VeUPuFGJHk&list=PLblh5JKOoLUICTaGLRoHQDuF_7q2GfuJF&index=34)
6 | * [x] [Decision tree learning](https://en.wikipedia.org/wiki/Decision_tree_learning)
7 | * [x] [Decision Tree Algorithm | Decision Tree in Python | Machine Learning Algorithms | Edureka](https://www.youtube.com/watch?v=qDcl-FRnwSU)
8 | * [x] [Classification And Regression Trees for Machine Learning](https://machinelearningmastery.com/classification-and-regression-trees-for-machine-learning/)
9 | * [x] [How To Implement The Decision Tree Algorithm From Scratch In Python](https://machinelearningmastery.com/implement-decision-tree-algorithm-scratch-python/)
10 | * [x] [Clas - 5 Data Science Training | Decision Tree Classifier Explained | Edureka](https://www.youtube.com/watch?v=v3tsrs1wpi4)
11 | * [x] [Understanding Decision Trees for Classification in Python](https://www.kdnuggets.com/2019/08/understanding-decision-trees-classification-python.html)
12 | * [x] [A Simple Explanation of Information Gain and Entropy](https://victorzhou.com/blog/information-gain/)
13 | * [x] [A Simple Explanation of Gini Impurity](https://victorzhou.com/blog/gini-impurity/)
14 | * [x] [In-Depth: Decision Trees and Random Forests](https://jakevdp.github.io/PythonDataScienceHandbook/05.08-random-forests.html)
15 | * [x] [The Simple Math behind 3 Decision Tree Splitting criterions](https://towardsdatascience.com/the-simple-math-behind-3-decision-tree-splitting-criterions-85d4de2a75fe)
16 |
--------------------------------------------------------------------------------
/K-Nearest Neigbors/README.md:
--------------------------------------------------------------------------------
1 | ### k-Nearest Neighbors README
2 | --------------------
3 |
4 | * Articles used:
5 | 1. [A Detailed Introduction to K-Nearest Neighbor (KNN) Algorithm](https://saravananthirumuruganathan.wordpress.com/2010/05/17/a-detailed-introduction-to-k-nearest-neighbor-knn-algorithm/)
6 | 2. [A Complete Guide to K-Nearest-Neighbors with Applications in Python and R](https://kevinzakka.github.io/2016/07/13/k-nearest-neighbor/)
7 | 3. [Tutorial To Implement k-Nearest Neighbors in Python From Scratch](https://machinelearningmastery.com/tutorial-to-implement-k-nearest-neighbors-in-python-from-scratch/)
8 | 4. [A Practical Introduction to K-Nearest Neighbors Algorithm for Regression](https://www.analyticsvidhya.com/blog/2018/08/k-nearest-neighbor-introduction-regression-python/)
9 | 5. [Introduction to k-Nearest Neighbors: A powerful Machine Learning Algorithm](https://www.analyticsvidhya.com/blog/2018/03/introduction-k-neighbours-algorithm-clustering/)
10 | 6. [Wikipedia - k-nearest neighbors algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm)
11 | 7. [Building a k-Nearest-Neighbors (k-NN) Model with Scikit-learn](https://towardsdatascience.com/building-a-k-nearest-neighbors-k-nn-model-with-scikit-learn-51209555453a)
12 | 8. [Understanding KNN(K-nearest neighbor) with example](https://kraj3.com.np/blog/2019/06/understanding-knnk-nearest-neighbor-with-example/)
13 | 9. [Dataset Extraction and analysis - Regression](https://towardsdatascience.com/linear-regression-on-boston-housing-dataset-f409b7e4a155)
14 | 10. [Weighted K-NN](https://www.geeksforgeeks.org/weighted-k-nn/)
15 |
16 | * Bias - Variance in K-NN
17 | * [Why does the variance decreases in KNN algorithm when we increase the K?](https://www.quora.com/Why-does-the-variance-decreases-in-KNN-algorithm-when-we-increase-the-K#)
18 | * [KNN: 1-nearest neighbor](https://stats.stackexchange.com/questions/151756/knn-1-nearest-neighbor/151770)
19 | * [Day 3 — K-Nearest Neighbors and Bias–Variance Tradeoff](https://medium.com/30-days-of-machine-learning/day-3-k-nearest-neighbors-and-bias-variance-tradeoff-75f84d515bdb)
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # pipenv
86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
88 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
89 | # install all needed dependencies.
90 | #Pipfile.lock
91 |
92 | # celery beat schedule file
93 | celerybeat-schedule
94 |
95 | # SageMath parsed files
96 | *.sage.py
97 |
98 | # Environments
99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 |
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 |
111 | # Rope project settings
112 | .ropeproject
113 |
114 | # mkdocs documentation
115 | /site
116 |
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 |
122 | # Pyre type checker
123 | .pyre/
124 | Decision Trees/.vscode/settings.json
125 | .vscode/settings.json
126 |
--------------------------------------------------------------------------------
/Linear Regression/README.md:
--------------------------------------------------------------------------------
1 | - Linear Regression Articles and Videos
2 |
3 | - [x] [A BEGINNERS GUIDE TO REGRESSION TECHNIQUES](https://analyticsindiamag.com/a-beginners-guide-to-regression-techniques/)
4 | - [x] [Linear Regression Algorithm | Linear Regression in Python | Machine Learning Algorithm | Edureka - YouTube](https://www.youtube.com/watch?v=E5RjzSK0fvY)
5 | - [x] [In Depth: Linear Regression | Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/05.06-linear-regression.html)
6 | - [x] [Linear Models - YouTube](https://www.youtube.com/playlist?list=PLblh5JKOoLUIzaEkCLIUxQFjPIlapw8nU)
7 | - [x] [Statistics 101: Linear Regression, The Very Basics - YouTube](https://www.youtube.com/watch?v=ZkjP5RJLQF4&list=PLIeGtxpvyG-LoKUpV0fSY8BGKIMIdmfCi&index=1)
8 | - [x] [How to Implement Linear Regression From Scratch in Python](https://machinelearningmastery.com/implement-linear-regression-stochastic-gradient-descent-scratch-python/)
9 | - [x] [Linear Regression using Python - Towards Data Science](https://towardsdatascience.com/linear-regression-using-python-b136c91bf0a2)
10 | - [x] [Mathematical explanation for Linear Regression working - GeeksforGeeks](https://www.geeksforgeeks.org/mathematical-explanation-for-linear-regression-working/)
11 | - [x] [Gradient Descent in Linear Regression - GeeksforGeeks](https://www.geeksforgeeks.org/gradient-descent-in-linear-regression/)
12 | - [x] [ML | Normal Equation in Linear Regression - GeeksforGeeks](https://www.geeksforgeeks.org/ml-normal-equation-in-linear-regression/)
13 | - [x] [Univariate Linear Regression in Python - GeeksforGeeks](https://www.geeksforgeeks.org/univariate-linear-regression-in-python/)
14 | - [x] [How to do Linear Regression and Logistic Regression in Machine Learning?](https://mlfromscratch.com/machine-learning-introduction-8-linear-regression-and-logistic-regression/#/)
15 | - [x] [Linear Regression (Python Implementation) - GeeksforGeeks](https://www.geeksforgeeks.org/linear-regression-python-implementation/)
16 | - [x] [ML | Multiple Linear Regression using Python - GeeksforGeeks](https://www.geeksforgeeks.org/ml-multiple-linear-regression-using-python/)
17 | - [x] [A Complete Tutorial on Ridge and Lasso Regression in Python](https://www.analyticsvidhya.com/blog/2016/01/complete-tutorial-ridge-lasso-regression-python/)
18 | - [x] [Python/linear_regression.py at master · TheAlgorithms/Python](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/linear_regression.py)
19 | - [x] [Python | Linear Regression using sklearn - GeeksforGeeks](https://www.geeksforgeeks.org/python-linear-regression-using-sklearn/)
20 | - [ ] [ML | Locally weighted Linear Regression - GeeksforGeeks](https://www.geeksforgeeks.org/ml-locally-weighted-linear-regression/)
21 | - [x] [Statistics PL15 - Multiple Regression - YouTube](https://www.youtube.com/playlist?list=PLIeGtxpvyG-IqjoU8IiF0Yu1WtxNq_4z-)
22 | - [ ] [Statistics PL18 - Nonlinear Regression - YouTube](https://www.youtube.com/playlist?list=PLIeGtxpvyG-KE0M1r5cjbC_7Q_dVlKVq4)
23 | - [ ] [Isotonic Regression is THE Coolest Machine-Learning Model You Might Not Have Heard Of](https://towardsdatascience.com/isotonic-regression-is-the-coolest-machine-learning-model-you-might-not-have-heard-of-3ce14afc6d1e)
24 |
25 | - For 3-D plot:
26 | - [Multiple linear regression with Python, numpy, matplotlib, plot in 3d](https://www.aiproblog.com/index.php/forums/topic/multiple-linear-regression-with-python-numpy-matplotlib-plot-in-3d/)
27 |
--------------------------------------------------------------------------------
/Principal Component Analysis/README.md:
--------------------------------------------------------------------------------
1 | # Principal Component Analysis
2 |
3 | ---
4 | **Articles Used**
5 |
6 | * PCA
7 | * [x] [Principal Component Analysis](https://sebastianraschka.com/Articles/2015_pca_in_3_steps.html#preparing-the-iris-dataset)
8 | * [x] [Implementing a Principal Component Analysis (PCA)](https://sebastianraschka.com/Articles/2014_pca_step_by_step.html#4-computing-eigenvectors-and-corresponding-eigenvalues)
9 | * [x] [Kernel tricks and nonlinear dimensionality reduction via RBF kernel PCA](https://sebastianraschka.com/Articles/2014_kernel_pca.html)
10 | * [x] [The Mathematics Behind Principal Component Analysis](https://towardsdatascience.com/the-mathematics-behind-principal-component-analysis-fff2d7f4b643)
11 | * [x] [A tutorial on Principal Components Analysis](http://www.cs.otago.ac.nz/cosc453/student_tutorials/principal_components.pdf)
12 | * [x] [Principal Component Analysis - Youtube](https://www.youtube.com/playlist?list=PLBv09BD7ez_5_yapAg86Od6JeeypkS4YM)
13 | * [x] [Dimensionality Reduction For Dummies — Part 1: Intuition](https://towardsdatascience.com/https-medium-com-abdullatif-h-dimensionality-reduction-for-dummies-part-1-a8c9ec7b7e79)
14 | * [x] [Data Analysis 6: Principal Component Analysis (PCA) - Computerphile](https://www.youtube.com/watch?v=TJdH6rPA-TI)
15 | * [x] [Visual Explanation of Principal Component Analysis, Covariance, SVD](https://www.youtube.com/watch?v=5HNr_j6LmPc)
16 | * [x] [luis serrano pca](https://www.youtube.com/watch?v=g-Hb26agBFg)
17 | * [ ] [Dimensionality reduction and PCA](https://www.youtube.com/playlist?list=PLBv09BD7ez_4InDh85LM_43Bsw0cFDHdN)
18 | * [x] [What is an intuitive explanation for PCA? - Quora](https://www.quora.com/What-is-an-intuitive-explanation-for-PCA)
19 | * [x] [What is an intuitive explanation of the relation between PCA and SVD?](https://www.quora.com/What-is-an-intuitive-explanation-of-the-relation-between-PCA-and-SVD)
20 | * [x] [Why don't people use SVD in PCA rather than eigen value decomposition? - Quora](https://www.quora.com/Why-dont-people-use-SVD-in-PCA-rather-than-eigen-value-decomposition)
21 | * [x] [In Depth: Principal Component Analysis](https://jakevdp.github.io/PythonDataScienceHandbook/05.09-principal-component-analysis.html)
22 |
23 | * SVD
24 | * [x] [Gilbert strang - SVD](https://www.youtube.com/watch?v=rYz83XPxiZo)
25 | * [x] [You Don’t Know SVD (Singular Value Decomposition)](https://towardsdatascience.com/svd-8c2f72e264f)
26 | * [x] [(114) A geometrical interpretation of the SVD - YouTube](https://www.youtube.com/watch?v=NsNNI_-JPUY)
27 | * [ ] [SVD playlist](https://www.youtube.com/playlist?list=PLMrJAkhIeNNSVjnsviglFoY2nXildDCcv)
28 | * [ ] [Gilbert strang - Computing Eigenvalues and Singular Values](https://www.youtube.com/watch?v=d32WV1rKoVk)
29 | * [x] [Gilbert strang - Singular Value Decomposition](https://www.youtube.com/watch?v=mBcLRGuAFUk)
30 | * [x] [Computing the SVD](https://www.youtube.com/watch?v=cOUTpqlX-Xs&t=22s)
31 | * [x] [Lecture 47 — Singular Value Decomposition | Stanford University](https://www.youtube.com/watch?v=P5mlg91as1c)
32 | * [x] [How to Calculate the Singular-Value Decomposition (SVD) from Scratch with Python](https://machinelearningmastery.com/singular-value-decomposition-for-machine-learning/)
33 | * [x] [What is an intuitive explanation of singular value decomposition (SVD)? - Quora](https://www.quora.com/What-is-an-intuitive-explanation-of-singular-value-decomposition-SVD)
34 | * [x] [What is the meaning behind the singular value in Singular Value Decomposition? - Quora](https://www.quora.com/What-is-the-meaning-behind-the-singular-value-in-Singular-Value-Decomposition)
35 | * [x] [What is the best way of introducing singular value decomposition (SVD) on a linear algebra course? Why is it so important? Are there any applications which have a real impact? - Quora](https://www.quora.com/What-is-the-best-way-of-introducing-singular-value-decomposition-SVD-on-a-linear-algebra-course-Why-is-it-so-important-Are-there-any-applications-which-have-a-real-impact)
36 | * [x] [What's the difference between SVD and SVD++? - Quora](https://www.quora.com/Whats-the-difference-between-SVD-and-SVD++)
37 | * [x] [What is the purpose of Singular Value Decomposition? - Quora](https://www.quora.com/What-is-the-purpose-of-Singular-Value-Decomposition)
38 |
--------------------------------------------------------------------------------
/Random Forest/decision_tree.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import Counter
3 |
4 |
5 | def giniImpurity(y):
6 | hist = np.bincount(y)
7 | ps = hist / len(y)
8 | return np.sum(ps * (1 - ps))
9 |
10 |
11 | class Node:
12 |
13 | def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
14 | self.feature = feature
15 | self.threshold = threshold
16 | self.left = left
17 | self.right = right
18 | self.value = value
19 |
20 | def is_leaf_node(self):
21 | return self.value is not None
22 |
23 |
24 | class DecisionTree():
25 |
26 | def __init__(self, min_samples_split=5, max_depth=100, n_feats=None):
27 | self.min_samples_split = min_samples_split
28 | self.max_depth = max_depth
29 | self.n_feats = n_feats # for random forest, choosing a subset of features
30 | self.root = None
31 |
32 | def fit(self, X, y):
33 | self.n_feats = X.shape[1] if not self.n_feats else min(
34 | self.n_feats, X.shape[1])
35 | self.root = self._grow_tree(X, y)
36 |
37 | def _grow_tree(self, X, y, depth=0):
38 | n_samples, n_features = X.shape
39 | n_labels = len(np.unique(y))
40 |
41 | # stopping criteria
42 | if (depth >= self.max_depth or n_labels == 1 or n_samples < self.min_samples_split):
43 | leaf_value = self._most_common_label(y)
44 | return Node(value=leaf_value)
45 |
46 | feat_idxs = np.random.choice(n_features, self.n_feats, replace=False)
47 |
48 | # greedily select the best split according to the gini gain
49 | best_feat, best_thresh = self._best_criteria(X, y, feat_idxs)
50 |
51 | # grow the children that result from the split
52 | left_idxs, right_idxs = self._split(X[:, best_feat], best_thresh)
53 | left = self._grow_tree(X[left_idxs, :], y[left_idxs], depth+1)
54 | right = self._grow_tree(X[right_idxs, :], y[right_idxs], depth+1)
55 | return Node(best_feat, best_thresh, left, right)
56 |
57 | def _best_criteria(self, X, y, feat_idxs):
58 | best_gain = -1
59 | split_idx, split_thresh = None, None
60 | for feat_idx in feat_idxs:
61 | X_column = X[:, feat_idx]
62 | thresholds = np.unique(X_column)
63 | for threshold in thresholds:
64 | gain = self._gini_gain(y, X_column, threshold)
65 |
66 | if gain > best_gain:
67 | best_gain = gain
68 | split_idx = feat_idx
69 | split_thresh = threshold
70 |
71 | return split_idx, split_thresh
72 |
73 | def _gini_gain(self, y, X_column, split_thresh):
74 | # parent
75 | parent_gini_impurity = giniImpurity(y)
76 |
77 | # generate split
78 | left_idxs, right_idxs = self._split(X_column, split_thresh)
79 |
80 | if len(left_idxs) == 0 or len(right_idxs) == 0:
81 | return 0
82 |
83 | # compute the weighted avg. for the children
84 | n = len(y)
85 | n_l, n_r = len(left_idxs), len(right_idxs)
86 | g_l, g_r = giniImpurity(y[left_idxs]), giniImpurity(y[right_idxs])
87 | child_gini_impurity = (n_l / n) * g_l + (n_r / n) * g_r
88 |
89 | # gini gain is difference in loss before vs. after split
90 | gg = parent_gini_impurity - child_gini_impurity
91 | return gg
92 |
93 | def _split(self, X_column, split_thresh):
94 | left_idxs = np.argwhere(X_column <= split_thresh).flatten()
95 | right_idxs = np.argwhere(X_column > split_thresh).flatten()
96 | return left_idxs, right_idxs
97 |
98 | def _most_common_label(self, y):
99 | counter = Counter(y)
100 | most_common = counter.most_common(1)[0][0]
101 | return most_common
102 |
103 | def predict(self, X):
104 | return np.array([self._traverse_tree(x, self.root) for x in X])
105 |
106 | def _traverse_tree(self, x, node):
107 | if node.is_leaf_node():
108 | return node.value
109 |
110 | if x[node.feature] <= node.threshold:
111 | return self._traverse_tree(x, node.left)
112 | return self._traverse_tree(x, node.right)
113 |
--------------------------------------------------------------------------------
/datasets/iris.data:
--------------------------------------------------------------------------------
1 | 5.1,3.5,1.4,0.2,Iris-setosa
2 | 4.9,3.0,1.4,0.2,Iris-setosa
3 | 4.7,3.2,1.3,0.2,Iris-setosa
4 | 4.6,3.1,1.5,0.2,Iris-setosa
5 | 5.0,3.6,1.4,0.2,Iris-setosa
6 | 5.4,3.9,1.7,0.4,Iris-setosa
7 | 4.6,3.4,1.4,0.3,Iris-setosa
8 | 5.0,3.4,1.5,0.2,Iris-setosa
9 | 4.4,2.9,1.4,0.2,Iris-setosa
10 | 4.9,3.1,1.5,0.1,Iris-setosa
11 | 5.4,3.7,1.5,0.2,Iris-setosa
12 | 4.8,3.4,1.6,0.2,Iris-setosa
13 | 4.8,3.0,1.4,0.1,Iris-setosa
14 | 4.3,3.0,1.1,0.1,Iris-setosa
15 | 5.8,4.0,1.2,0.2,Iris-setosa
16 | 5.7,4.4,1.5,0.4,Iris-setosa
17 | 5.4,3.9,1.3,0.4,Iris-setosa
18 | 5.1,3.5,1.4,0.3,Iris-setosa
19 | 5.7,3.8,1.7,0.3,Iris-setosa
20 | 5.1,3.8,1.5,0.3,Iris-setosa
21 | 5.4,3.4,1.7,0.2,Iris-setosa
22 | 5.1,3.7,1.5,0.4,Iris-setosa
23 | 4.6,3.6,1.0,0.2,Iris-setosa
24 | 5.1,3.3,1.7,0.5,Iris-setosa
25 | 4.8,3.4,1.9,0.2,Iris-setosa
26 | 5.0,3.0,1.6,0.2,Iris-setosa
27 | 5.0,3.4,1.6,0.4,Iris-setosa
28 | 5.2,3.5,1.5,0.2,Iris-setosa
29 | 5.2,3.4,1.4,0.2,Iris-setosa
30 | 4.7,3.2,1.6,0.2,Iris-setosa
31 | 4.8,3.1,1.6,0.2,Iris-setosa
32 | 5.4,3.4,1.5,0.4,Iris-setosa
33 | 5.2,4.1,1.5,0.1,Iris-setosa
34 | 5.5,4.2,1.4,0.2,Iris-setosa
35 | 4.9,3.1,1.5,0.1,Iris-setosa
36 | 5.0,3.2,1.2,0.2,Iris-setosa
37 | 5.5,3.5,1.3,0.2,Iris-setosa
38 | 4.9,3.1,1.5,0.1,Iris-setosa
39 | 4.4,3.0,1.3,0.2,Iris-setosa
40 | 5.1,3.4,1.5,0.2,Iris-setosa
41 | 5.0,3.5,1.3,0.3,Iris-setosa
42 | 4.5,2.3,1.3,0.3,Iris-setosa
43 | 4.4,3.2,1.3,0.2,Iris-setosa
44 | 5.0,3.5,1.6,0.6,Iris-setosa
45 | 5.1,3.8,1.9,0.4,Iris-setosa
46 | 4.8,3.0,1.4,0.3,Iris-setosa
47 | 5.1,3.8,1.6,0.2,Iris-setosa
48 | 4.6,3.2,1.4,0.2,Iris-setosa
49 | 5.3,3.7,1.5,0.2,Iris-setosa
50 | 5.0,3.3,1.4,0.2,Iris-setosa
51 | 7.0,3.2,4.7,1.4,Iris-versicolor
52 | 6.4,3.2,4.5,1.5,Iris-versicolor
53 | 6.9,3.1,4.9,1.5,Iris-versicolor
54 | 5.5,2.3,4.0,1.3,Iris-versicolor
55 | 6.5,2.8,4.6,1.5,Iris-versicolor
56 | 5.7,2.8,4.5,1.3,Iris-versicolor
57 | 6.3,3.3,4.7,1.6,Iris-versicolor
58 | 4.9,2.4,3.3,1.0,Iris-versicolor
59 | 6.6,2.9,4.6,1.3,Iris-versicolor
60 | 5.2,2.7,3.9,1.4,Iris-versicolor
61 | 5.0,2.0,3.5,1.0,Iris-versicolor
62 | 5.9,3.0,4.2,1.5,Iris-versicolor
63 | 6.0,2.2,4.0,1.0,Iris-versicolor
64 | 6.1,2.9,4.7,1.4,Iris-versicolor
65 | 5.6,2.9,3.6,1.3,Iris-versicolor
66 | 6.7,3.1,4.4,1.4,Iris-versicolor
67 | 5.6,3.0,4.5,1.5,Iris-versicolor
68 | 5.8,2.7,4.1,1.0,Iris-versicolor
69 | 6.2,2.2,4.5,1.5,Iris-versicolor
70 | 5.6,2.5,3.9,1.1,Iris-versicolor
71 | 5.9,3.2,4.8,1.8,Iris-versicolor
72 | 6.1,2.8,4.0,1.3,Iris-versicolor
73 | 6.3,2.5,4.9,1.5,Iris-versicolor
74 | 6.1,2.8,4.7,1.2,Iris-versicolor
75 | 6.4,2.9,4.3,1.3,Iris-versicolor
76 | 6.6,3.0,4.4,1.4,Iris-versicolor
77 | 6.8,2.8,4.8,1.4,Iris-versicolor
78 | 6.7,3.0,5.0,1.7,Iris-versicolor
79 | 6.0,2.9,4.5,1.5,Iris-versicolor
80 | 5.7,2.6,3.5,1.0,Iris-versicolor
81 | 5.5,2.4,3.8,1.1,Iris-versicolor
82 | 5.5,2.4,3.7,1.0,Iris-versicolor
83 | 5.8,2.7,3.9,1.2,Iris-versicolor
84 | 6.0,2.7,5.1,1.6,Iris-versicolor
85 | 5.4,3.0,4.5,1.5,Iris-versicolor
86 | 6.0,3.4,4.5,1.6,Iris-versicolor
87 | 6.7,3.1,4.7,1.5,Iris-versicolor
88 | 6.3,2.3,4.4,1.3,Iris-versicolor
89 | 5.6,3.0,4.1,1.3,Iris-versicolor
90 | 5.5,2.5,4.0,1.3,Iris-versicolor
91 | 5.5,2.6,4.4,1.2,Iris-versicolor
92 | 6.1,3.0,4.6,1.4,Iris-versicolor
93 | 5.8,2.6,4.0,1.2,Iris-versicolor
94 | 5.0,2.3,3.3,1.0,Iris-versicolor
95 | 5.6,2.7,4.2,1.3,Iris-versicolor
96 | 5.7,3.0,4.2,1.2,Iris-versicolor
97 | 5.7,2.9,4.2,1.3,Iris-versicolor
98 | 6.2,2.9,4.3,1.3,Iris-versicolor
99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 |
152 |
--------------------------------------------------------------------------------
/datasets/iris_data.csv:
--------------------------------------------------------------------------------
1 | 5.1,3.5,1.4,0.2,Iris-setosa
2 | 4.9,3.0,1.4,0.2,Iris-setosa
3 | 4.7,3.2,1.3,0.2,Iris-setosa
4 | 4.6,3.1,1.5,0.2,Iris-setosa
5 | 5.0,3.6,1.4,0.2,Iris-setosa
6 | 5.4,3.9,1.7,0.4,Iris-setosa
7 | 4.6,3.4,1.4,0.3,Iris-setosa
8 | 5.0,3.4,1.5,0.2,Iris-setosa
9 | 4.4,2.9,1.4,0.2,Iris-setosa
10 | 4.9,3.1,1.5,0.1,Iris-setosa
11 | 5.4,3.7,1.5,0.2,Iris-setosa
12 | 4.8,3.4,1.6,0.2,Iris-setosa
13 | 4.8,3.0,1.4,0.1,Iris-setosa
14 | 4.3,3.0,1.1,0.1,Iris-setosa
15 | 5.8,4.0,1.2,0.2,Iris-setosa
16 | 5.7,4.4,1.5,0.4,Iris-setosa
17 | 5.4,3.9,1.3,0.4,Iris-setosa
18 | 5.1,3.5,1.4,0.3,Iris-setosa
19 | 5.7,3.8,1.7,0.3,Iris-setosa
20 | 5.1,3.8,1.5,0.3,Iris-setosa
21 | 5.4,3.4,1.7,0.2,Iris-setosa
22 | 5.1,3.7,1.5,0.4,Iris-setosa
23 | 4.6,3.6,1.0,0.2,Iris-setosa
24 | 5.1,3.3,1.7,0.5,Iris-setosa
25 | 4.8,3.4,1.9,0.2,Iris-setosa
26 | 5.0,3.0,1.6,0.2,Iris-setosa
27 | 5.0,3.4,1.6,0.4,Iris-setosa
28 | 5.2,3.5,1.5,0.2,Iris-setosa
29 | 5.2,3.4,1.4,0.2,Iris-setosa
30 | 4.7,3.2,1.6,0.2,Iris-setosa
31 | 4.8,3.1,1.6,0.2,Iris-setosa
32 | 5.4,3.4,1.5,0.4,Iris-setosa
33 | 5.2,4.1,1.5,0.1,Iris-setosa
34 | 5.5,4.2,1.4,0.2,Iris-setosa
35 | 4.9,3.1,1.5,0.1,Iris-setosa
36 | 5.0,3.2,1.2,0.2,Iris-setosa
37 | 5.5,3.5,1.3,0.2,Iris-setosa
38 | 4.9,3.1,1.5,0.1,Iris-setosa
39 | 4.4,3.0,1.3,0.2,Iris-setosa
40 | 5.1,3.4,1.5,0.2,Iris-setosa
41 | 5.0,3.5,1.3,0.3,Iris-setosa
42 | 4.5,2.3,1.3,0.3,Iris-setosa
43 | 4.4,3.2,1.3,0.2,Iris-setosa
44 | 5.0,3.5,1.6,0.6,Iris-setosa
45 | 5.1,3.8,1.9,0.4,Iris-setosa
46 | 4.8,3.0,1.4,0.3,Iris-setosa
47 | 5.1,3.8,1.6,0.2,Iris-setosa
48 | 4.6,3.2,1.4,0.2,Iris-setosa
49 | 5.3,3.7,1.5,0.2,Iris-setosa
50 | 5.0,3.3,1.4,0.2,Iris-setosa
51 | 7.0,3.2,4.7,1.4,Iris-versicolor
52 | 6.4,3.2,4.5,1.5,Iris-versicolor
53 | 6.9,3.1,4.9,1.5,Iris-versicolor
54 | 5.5,2.3,4.0,1.3,Iris-versicolor
55 | 6.5,2.8,4.6,1.5,Iris-versicolor
56 | 5.7,2.8,4.5,1.3,Iris-versicolor
57 | 6.3,3.3,4.7,1.6,Iris-versicolor
58 | 4.9,2.4,3.3,1.0,Iris-versicolor
59 | 6.6,2.9,4.6,1.3,Iris-versicolor
60 | 5.2,2.7,3.9,1.4,Iris-versicolor
61 | 5.0,2.0,3.5,1.0,Iris-versicolor
62 | 5.9,3.0,4.2,1.5,Iris-versicolor
63 | 6.0,2.2,4.0,1.0,Iris-versicolor
64 | 6.1,2.9,4.7,1.4,Iris-versicolor
65 | 5.6,2.9,3.6,1.3,Iris-versicolor
66 | 6.7,3.1,4.4,1.4,Iris-versicolor
67 | 5.6,3.0,4.5,1.5,Iris-versicolor
68 | 5.8,2.7,4.1,1.0,Iris-versicolor
69 | 6.2,2.2,4.5,1.5,Iris-versicolor
70 | 5.6,2.5,3.9,1.1,Iris-versicolor
71 | 5.9,3.2,4.8,1.8,Iris-versicolor
72 | 6.1,2.8,4.0,1.3,Iris-versicolor
73 | 6.3,2.5,4.9,1.5,Iris-versicolor
74 | 6.1,2.8,4.7,1.2,Iris-versicolor
75 | 6.4,2.9,4.3,1.3,Iris-versicolor
76 | 6.6,3.0,4.4,1.4,Iris-versicolor
77 | 6.8,2.8,4.8,1.4,Iris-versicolor
78 | 6.7,3.0,5.0,1.7,Iris-versicolor
79 | 6.0,2.9,4.5,1.5,Iris-versicolor
80 | 5.7,2.6,3.5,1.0,Iris-versicolor
81 | 5.5,2.4,3.8,1.1,Iris-versicolor
82 | 5.5,2.4,3.7,1.0,Iris-versicolor
83 | 5.8,2.7,3.9,1.2,Iris-versicolor
84 | 6.0,2.7,5.1,1.6,Iris-versicolor
85 | 5.4,3.0,4.5,1.5,Iris-versicolor
86 | 6.0,3.4,4.5,1.6,Iris-versicolor
87 | 6.7,3.1,4.7,1.5,Iris-versicolor
88 | 6.3,2.3,4.4,1.3,Iris-versicolor
89 | 5.6,3.0,4.1,1.3,Iris-versicolor
90 | 5.5,2.5,4.0,1.3,Iris-versicolor
91 | 5.5,2.6,4.4,1.2,Iris-versicolor
92 | 6.1,3.0,4.6,1.4,Iris-versicolor
93 | 5.8,2.6,4.0,1.2,Iris-versicolor
94 | 5.0,2.3,3.3,1.0,Iris-versicolor
95 | 5.6,2.7,4.2,1.3,Iris-versicolor
96 | 5.7,3.0,4.2,1.2,Iris-versicolor
97 | 5.7,2.9,4.2,1.3,Iris-versicolor
98 | 6.2,2.9,4.3,1.3,Iris-versicolor
99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 |
152 |
--------------------------------------------------------------------------------
/Apriori algorithm/apriori_algorithm.py:
--------------------------------------------------------------------------------
1 | # Apriori algorithm developed and used on a sample dataset
2 | # This code was written just for demonstration and learning purposes
3 |
4 | from collections import Counter
5 | from itertools import combinations
6 | import numpy as np
7 |
8 | # transactions = {
9 | # 1: ["a", "c", "d"],
10 | # 2: ["b", "c", "e"],
11 | # 3: ["a", "b", "c", "e"],
12 | # 5: ["b", "e"],
13 | # 6: ["a", "c", "e"]
14 | # }
15 |
16 |
17 | transactions = {
18 | 1: ["1", "3", "4"],
19 | 2: ["2", "3", "5"],
20 | 3: ["1", "2", "3", "5"],
21 | 5: ["2", "5"],
22 | 6: ["1", "3", "5"]
23 | }
24 |
25 | min_support_count = 2
26 | min_confidence_value = 0.6
27 |
28 |
29 | # apriori pruning concept
30 | def _pruning(current, previous, size):
31 | final_keys = []
32 | previous = [tuple(i) for i in previous]
33 | for key in current:
34 | FLAG = False
35 | current_comb = list(combinations(key, size))
36 | for i in current_comb:
37 | if i in previous or i[::-1] in previous:
38 | FLAG = True
39 | else:
40 | FLAG = False
41 | break
42 |
43 | if FLAG:
44 | final_keys.append(key)
45 |
46 | return final_keys
47 |
48 |
49 | def support_value(itemset_keys_, transactions):
50 | itemset = {key: 0 for key in itemset_keys_}
51 |
52 | for keys in itemset_keys_:
53 | for val in transactions.values():
54 | if set(keys) & set(val) == set(keys):
55 | itemset[keys] += 1
56 | return itemset
57 |
58 |
59 | # creating frequent itemset
60 | def get_frequent_itemset(size=None, transactions=None, itemset=None):
61 | if size == 1:
62 | itemset = Counter()
63 |
64 | for val in transactions.values():
65 | itemset.update(val)
66 |
67 | else:
68 |
69 | prev_itemset_keys = list(itemset.keys())
70 | prev_itemset = itemset.copy()
71 |
72 | valid_keys = list(set(itemset.keys()))
73 | # flatten list of tuple -> keys: [(), ()] -> []
74 | # useful for running a combination of all the chosen features
75 | l = []
76 | for row in valid_keys:
77 | l.extend(row)
78 |
79 | valid_keys = set(l)
80 |
81 | # candidate itemset keys
82 | itemset_keys_ = list(combinations(valid_keys, size))
83 |
84 | # Apriori algorithm is based on theconcept that a subset
85 | # of a frequent itemset must also be a frequent itemset
86 | # so we are pruning away those features whose subset are not present
87 | # in the previous frequent itemset
88 | if size >= 2:
89 | itemset_keys_ = _pruning(
90 | itemset_keys_, prev_itemset_keys, size - 1)
91 |
92 | # finding support value for each of the selected itemset feature combination
93 | itemset = support_value(itemset_keys_, transactions)
94 |
95 | # defaulting back to th previous frequent itemset if
96 | # the iteration doesn't find any itemset which has the theshold required
97 | if itemset == {}:
98 | itemset = prev_itemset
99 |
100 | # getting frequent itemset from itemset
101 | # Frequent Itemset is an itemset whose support
102 | # value is greater than a threshold value(support).
103 |
104 | frequent_itemset = {}
105 | for key, val in itemset.items():
106 | if val >= min_support_count:
107 | frequent_itemset[key] = val
108 |
109 | return frequent_itemset
110 |
111 |
112 | def finding_subsets(frequent_set):
113 | item_list = []
114 | size = len(list(frequent_set.keys())[0])
115 | for key in frequent_set.keys():
116 | subsets = []
117 | for i in range(1, size):
118 | subsets.append(list(combinations(key, i)))
119 |
120 | subsets = list(np.array(subsets).flatten())
121 | subsets.insert(0, key)
122 | item_list.append(subsets)
123 |
124 | return item_list
125 |
126 |
127 | def finding_rules(itemset_sub):
128 | print("Antecedents --> Consequents --- Confidence")
129 | for i in range(1, len(itemset_sub)):
130 |
131 | # passing as list as we have designed support_value function as
132 | # a function that takes an iteratable list of itemsets
133 | x = support_value([itemset_sub[0], ], transactions)
134 | y = support_value([itemset_sub[i], ], transactions)
135 | confidence = list(x.values())[0] / list(y.values())[0]
136 | if confidence >= min_confidence_value:
137 | print(
138 | f"{itemset_sub[i]} --> {itemset_sub[0]} --- {round(confidence, 2)}")
139 |
140 |
141 | print("""
142 | ITEMS
143 | 1: Banana
144 | 2: Eggs
145 | 3: Milk
146 | 4: Tea
147 | 5: Bread
148 |
149 | """)
150 |
151 | f = {}
152 |
153 | for i in range(1, 5):
154 | f = get_frequent_itemset(size=i, transactions=transactions,
155 | itemset=f)
156 |
157 | # frequent_itemsets
158 |
159 | print("Frequent Itemsets...")
160 | for key, val in f.items():
161 | print(f"Itemset: {key}, support value: {val}")
162 |
163 |
164 | subset = finding_subsets(f)
165 |
166 | for i in subset:
167 | print(f"Rules for itemset - {i[0]}")
168 | finding_rules(i)
169 | print()
170 |
--------------------------------------------------------------------------------
/K-means/Spiral.txt:
--------------------------------------------------------------------------------
1 | f1 f2 label
2 | 31.95 7.95 3
3 | 31.15 7.3 3
4 | 30.45 6.65 3
5 | 29.7 6 3
6 | 28.9 5.55 3
7 | 28.05 5 3
8 | 27.2 4.55 3
9 | 26.35 4.15 3
10 | 25.4 3.85 3
11 | 24.6 3.6 3
12 | 23.6 3.3 3
13 | 22.75 3.15 3
14 | 21.85 3.05 3
15 | 20.9 3 3
16 | 20 2.9 3
17 | 19.1 3 3
18 | 18.2 3.2 3
19 | 17.3 3.25 3
20 | 16.55 3.5 3
21 | 15.7 3.7 3
22 | 14.85 4.1 3
23 | 14.15 4.4 3
24 | 13.4 4.75 3
25 | 12.7 5.2 3
26 | 12.05 5.65 3
27 | 11.45 6.15 3
28 | 10.9 6.65 3
29 | 10.3 7.25 3
30 | 9.7 7.85 3
31 | 9.35 8.35 3
32 | 8.9 9.05 3
33 | 8.55 9.65 3
34 | 8.15 10.35 3
35 | 7.95 10.95 3
36 | 7.75 11.7 3
37 | 7.55 12.35 3
38 | 7.45 13 3
39 | 7.35 13.75 3
40 | 7.3 14.35 3
41 | 7.35 14.95 3
42 | 7.35 15.75 3
43 | 7.55 16.35 3
44 | 7.7 16.95 3
45 | 7.8 17.55 3
46 | 8.05 18.15 3
47 | 8.3 18.75 3
48 | 8.65 19.3 3
49 | 8.9 19.85 3
50 | 9.3 20.3 3
51 | 9.65 20.8 3
52 | 10.2 21.25 3
53 | 10.6 21.65 3
54 | 11.1 22.15 3
55 | 11.55 22.45 3
56 | 11.95 22.7 3
57 | 12.55 23 3
58 | 13.05 23.2 3
59 | 13.45 23.4 3
60 | 14 23.55 3
61 | 14.55 23.6 3
62 | 15.1 23.75 3
63 | 15.7 23.75 3
64 | 16.15 23.85 3
65 | 16.7 23.8 3
66 | 17.15 23.75 3
67 | 17.75 23.75 3
68 | 18.2 23.6 3
69 | 18.65 23.5 3
70 | 19.1 23.35 3
71 | 19.6 23.15 3
72 | 20 22.95 3
73 | 20.4 22.7 3
74 | 20.7 22.55 3
75 | 21 22.15 3
76 | 21.45 21.95 3
77 | 21.75 21.55 3
78 | 22 21.25 3
79 | 22.25 21 3
80 | 22.5 20.7 3
81 | 22.65 20.35 3
82 | 22.75 20.05 3
83 | 22.9 19.65 3
84 | 23 19.35 3
85 | 23.1 19 3
86 | 23.15 18.65 3
87 | 23.2 18.25 3
88 | 23.2 18.05 3
89 | 23.2 17.8 3
90 | 23.1 17.45 3
91 | 23.05 17.15 3
92 | 22.9 16.9 3
93 | 22.85 16.6 3
94 | 22.7 16.4 3
95 | 22.6 16.2 3
96 | 22.55 16.05 3
97 | 22.4 15.95 3
98 | 22.35 15.8 3
99 | 22.2 15.65 3
100 | 22.15 15.55 3
101 | 22 15.4 3
102 | 21.9 15.3 3
103 | 21.85 15.25 3
104 | 21.75 15.15 3
105 | 21.65 15.05 3
106 | 21.55 15 3
107 | 21.5 14.9 3
108 | 19.35 31.65 1
109 | 20.35 31.45 1
110 | 21.35 31.1 1
111 | 22.25 30.9 1
112 | 23.2 30.45 1
113 | 23.95 30.05 1
114 | 24.9 29.65 1
115 | 25.6 29.05 1
116 | 26.35 28.5 1
117 | 27.15 27.9 1
118 | 27.75 27.35 1
119 | 28.3 26.6 1
120 | 28.95 25.85 1
121 | 29.5 25.15 1
122 | 29.95 24.45 1
123 | 30.4 23.7 1
124 | 30.6 22.9 1
125 | 30.9 22.1 1
126 | 31.25 21.3 1
127 | 31.35 20.55 1
128 | 31.5 19.7 1
129 | 31.55 18.9 1
130 | 31.65 18.15 1
131 | 31.6 17.35 1
132 | 31.45 16.55 1
133 | 31.3 15.8 1
134 | 31.15 15.05 1
135 | 30.9 14.35 1
136 | 30.6 13.65 1
137 | 30.3 13 1
138 | 29.9 12.3 1
139 | 29.5 11.75 1
140 | 29 11.15 1
141 | 28.5 10.6 1
142 | 28 10.1 1
143 | 27.55 9.65 1
144 | 26.9 9.1 1
145 | 26.25 8.8 1
146 | 25.7 8.4 1
147 | 25.15 8.05 1
148 | 24.5 7.75 1
149 | 23.9 7.65 1
150 | 23.15 7.4 1
151 | 22.5 7.3 1
152 | 21.9 7.1 1
153 | 21.25 7.05 1
154 | 20.5 7 1
155 | 19.9 6.95 1
156 | 19.25 7.05 1
157 | 18.75 7.1 1
158 | 18.05 7.25 1
159 | 17.5 7.35 1
160 | 16.9 7.6 1
161 | 16.35 7.8 1
162 | 15.8 8.05 1
163 | 15.4 8.35 1
164 | 14.9 8.7 1
165 | 14.45 8.9 1
166 | 13.95 9.3 1
167 | 13.6 9.65 1
168 | 13.25 10.1 1
169 | 12.95 10.55 1
170 | 12.65 10.9 1
171 | 12.35 11.4 1
172 | 12.2 11.75 1
173 | 11.95 12.2 1
174 | 11.8 12.65 1
175 | 11.75 13.05 1
176 | 11.55 13.6 1
177 | 11.55 14 1
178 | 11.55 14.35 1
179 | 11.55 14.7 1
180 | 11.6 15.25 1
181 | 11.65 15.7 1
182 | 11.8 16.05 1
183 | 11.85 16.5 1
184 | 12 16.75 1
185 | 12.15 17.2 1
186 | 12.3 17.6 1
187 | 12.55 17.85 1
188 | 12.8 18.05 1
189 | 13.1 18.4 1
190 | 13.3 18.6 1
191 | 13.55 18.85 1
192 | 13.8 19.05 1
193 | 14.15 19.25 1
194 | 14.45 19.5 1
195 | 14.85 19.55 1
196 | 15 19.7 1
197 | 15.25 19.7 1
198 | 15.55 19.85 1
199 | 15.95 19.9 1
200 | 16.2 19.9 1
201 | 16.55 19.9 1
202 | 16.85 19.9 1
203 | 17.2 19.9 1
204 | 17.4 19.8 1
205 | 17.65 19.75 1
206 | 17.8 19.7 1
207 | 18 19.6 1
208 | 18.2 19.55 1
209 | 3.9 9.6 2
210 | 3.55 10.65 2
211 | 3.35 11.4 2
212 | 3.1 12.35 2
213 | 3.1 13.25 2
214 | 3.05 14.15 2
215 | 3 15.1 2
216 | 3.1 16 2
217 | 3.2 16.85 2
218 | 3.45 17.75 2
219 | 3.7 18.7 2
220 | 3.95 19.55 2
221 | 4.35 20.25 2
222 | 4.7 21.1 2
223 | 5.15 21.8 2
224 | 5.6 22.5 2
225 | 6.2 23.3 2
226 | 6.8 23.85 2
227 | 7.35 24.45 2
228 | 8.05 24.95 2
229 | 8.8 25.45 2
230 | 9.5 26 2
231 | 10.2 26.35 2
232 | 10.9 26.75 2
233 | 11.7 27 2
234 | 12.45 27.25 2
235 | 13.3 27.6 2
236 | 14.05 27.6 2
237 | 14.7 27.75 2
238 | 15.55 27.75 2
239 | 16.4 27.75 2
240 | 17.1 27.75 2
241 | 17.9 27.75 2
242 | 18.55 27.7 2
243 | 19.35 27.6 2
244 | 20.1 27.35 2
245 | 20.7 27.1 2
246 | 21.45 26.8 2
247 | 22.05 26.5 2
248 | 22.7 26.15 2
249 | 23.35 25.65 2
250 | 23.8 25.3 2
251 | 24.3 24.85 2
252 | 24.75 24.35 2
253 | 25.25 23.95 2
254 | 25.65 23.45 2
255 | 26.05 23 2
256 | 26.2 22.3 2
257 | 26.6 21.8 2
258 | 26.75 21.25 2
259 | 27 20.7 2
260 | 27.15 20.15 2
261 | 27.15 19.6 2
262 | 27.35 19.1 2
263 | 27.35 18.45 2
264 | 27.4 18 2
265 | 27.3 17.4 2
266 | 27.15 16.9 2
267 | 27 16.4 2
268 | 27 15.9 2
269 | 26.75 15.35 2
270 | 26.55 14.85 2
271 | 26.3 14.45 2
272 | 25.95 14.1 2
273 | 25.75 13.7 2
274 | 25.35 13.3 2
275 | 25.05 12.95 2
276 | 24.8 12.7 2
277 | 24.4 12.45 2
278 | 24.05 12.2 2
279 | 23.55 11.85 2
280 | 23.2 11.65 2
281 | 22.75 11.4 2
282 | 22.3 11.3 2
283 | 21.9 11.1 2
284 | 21.45 11.05 2
285 | 21.1 11 2
286 | 20.7 10.95 2
287 | 20.35 10.95 2
288 | 19.95 11 2
289 | 19.55 11 2
290 | 19.15 11.05 2
291 | 18.85 11.1 2
292 | 18.45 11.25 2
293 | 18.15 11.35 2
294 | 17.85 11.5 2
295 | 17.5 11.7 2
296 | 17.2 11.95 2
297 | 17 12.05 2
298 | 16.75 12.2 2
299 | 16.65 12.35 2
300 | 16.5 12.5 2
301 | 16.35 12.7 2
302 | 16.2 12.8 2
303 | 16.15 12.95 2
304 | 16 13.1 2
305 | 15.95 13.25 2
306 | 15.9 13.4 2
307 | 15.8 13.5 2
308 | 15.8 13.65 2
309 | 15.75 13.85 2
310 | 15.65 14.05 2
311 | 15.65 14.25 2
312 | 15.65 14.5 2
313 | 15.65 14.6 2
314 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Machine Learning Algorithms
2 |
3 | [](https://mybinder.org/v2/gh/veb-101/Machine-Learning-Algorithms/master) [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/) [](https://github.com/veb-101/Machine-Learning-Algorithms/issues) [](https://github.com/veb-101/Machine-Learning-Algorithms/pulls)
4 |
5 | [](https://repl.it/repls/folder/machine%20learning%20algorithms?ref=button)
6 |
7 | ---
8 |
9 | #### Numpy implementation of some basic machine learning algorithms
10 |
11 |
12 |
13 | ---
14 |
15 | ##### Run Online
16 |
17 | | Sr. No. | Algorithm | View | Python File | Colab Notebook |
18 | | ------- | ----------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
19 | | 1.a | KNN - Classification | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Classifier.ipynb) | [.py](https://repl.it/@VaibhavSingh4/1a-k-NN-classification) | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Classifier.ipynb) |
20 | | 1.b | KNN - Regression | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Regression.ipynb) | [.py](https://repl.it/@VaibhavSingh4/1b-k-NN-Regression) | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN-Regression.ipynb) |
21 | | 1.c | KNN - Weighted Classification | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN_weighted_classification.ipynb) | [.py](https://repl.it/@VaibhavSingh4/1c-KNN-weighted-classification) | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-Nearest%20Neigbors/KNN_weighted_classification.ipynb) |
22 | | 2 | Linear Regression | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Linear%20Regression/linear_regression.ipynb) | | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Linear%20Regression/linear_regression.ipynb) |
23 | | 3 | Logistic Regression | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Logistic%20Regression/logistic%20regression.ipynb) | [.py](https/repl.it/@VaibhavSingh4/Logistic-Regression) | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Logistic%20Regression/logistic%20regression.ipynb) |
24 | | 4 | Decision Trees | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Decision%20Trees/decision%20tree.ipynb) | [.py](https://repl.it/@VaibhavSingh4/decision-tree) | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Decision%20Trees/decision%20tree.ipynb) |
25 | | 5 | Naive Bayes | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Naive%20Bayes/Naive%20Bayes.ipynb) | | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Naive%20Bayes/Naive%20Bayes.ipynb) |
26 | | 6 | Random Forest | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Random%20Forest/random_forest.ipynb) | | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Random%20Forest/random_forest.ipynb) |
27 | | 7 | AdaBoost | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Boosting%20-%20AdaBoost/adaboost.ipynb) | | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Boosting%20-%20AdaBoost/adaboost.ipynb) |
28 | | 8 | K-means Clustering | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/K-means/K_means.ipynb) | [.py](https://repl.it/@VaibhavSingh4/k-means) | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/K-means/K_means.ipynb) |
29 | | 9 | PCA | [notebook](https://nbviewer.jupyter.org/github/veb-101/Machine-Learning-Algorithms/blob/master/Principal%20Component%20Analysis/dimensionality%20reduction.ipynb) | | [](https://colab.research.google.com/github/veb-101/Machine-Learning-Algorithms/blob/master/Principal%20Component%20Analysis/dimensionality%20reduction.ipynb) |
30 | | 10 | Apriori Algorithm | | [.py](https://repl.it/@VaibhavSingh4/Apriori-Algorithm) | |
--------------------------------------------------------------------------------
/Random Forest/random_forest.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "toc": true
7 | },
8 | "source": [
9 | "
Table of Contents
\n",
10 | ""
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {
17 | "ExecuteTime": {
18 | "end_time": "2020-04-12T16:49:27.604267Z",
19 | "start_time": "2020-04-12T16:49:25.091457Z"
20 | }
21 | },
22 | "outputs": [],
23 | "source": [
24 | "import numpy as np\n",
25 | "\n",
26 | "from sklearn import datasets\n",
27 | "from sklearn.model_selection import train_test_split\n",
28 | "from sklearn.metrics import precision_score, recall_score, f1_score\n",
29 | "\n",
30 | "import pandas as pd\n",
31 | "\n",
32 | "from collections import Counter\n",
33 | "from decision_tree import DecisionTree"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "# Model Definition\n",
41 | "\n",
42 | "Using previously written Decision tree"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 2,
48 | "metadata": {
49 | "ExecuteTime": {
50 | "end_time": "2020-04-12T16:49:27.617602Z",
51 | "start_time": "2020-04-12T16:49:27.610032Z"
52 | }
53 | },
54 | "outputs": [],
55 | "source": [
56 | "def bootstrap_sample(X, y):\n",
57 | " n_samples = X.shape[0]\n",
58 | " idxs = np.random.choice(n_samples, n_samples, replace=True)\n",
59 | " return X[idxs], y[idxs]\n",
60 | "\n",
61 | "\n",
62 | "def most_common_label(y):\n",
63 | " counter = Counter(y)\n",
64 | " most_common = counter.most_common(1)[0][0]\n",
65 | " return most_common"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 3,
71 | "metadata": {
72 | "ExecuteTime": {
73 | "end_time": "2020-04-12T16:49:27.819326Z",
74 | "start_time": "2020-04-12T16:49:27.621157Z"
75 | }
76 | },
77 | "outputs": [],
78 | "source": [
79 | "class RandomForest:\n",
80 | "\n",
81 | " def __init__(self, n_trees=10, min_samples_split=2,\n",
82 | " max_depth=100, n_feats=None):\n",
83 | " self.n_trees = n_trees\n",
84 | " self.min_samples_split = min_samples_split\n",
85 | " self.max_depth = max_depth\n",
86 | " self.n_feats = n_feats\n",
87 | " self.trees = []\n",
88 | "\n",
89 | " def fit(self, X, y):\n",
90 | " self.trees = []\n",
91 | " for _ in range(self.n_trees):\n",
92 | " tree = DecisionTree(min_samples_split=self.min_samples_split,\n",
93 | " max_depth=self.max_depth, n_feats=self.n_feats)\n",
94 | " X_samp, y_samp = bootstrap_sample(X, y)\n",
95 | " tree.fit(X_samp, y_samp)\n",
96 | " self.trees.append(tree)\n",
97 | "\n",
98 | " def predict(self, X):\n",
99 | " tree_preds = np.array([tree.predict(X) for tree in self.trees])\n",
100 | " tree_preds = np.swapaxes(tree_preds, 0, 1)\n",
101 | " y_pred = [most_common_label(tree_pred) for tree_pred in tree_preds]\n",
102 | " return np.array(y_pred)"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 4,
108 | "metadata": {
109 | "ExecuteTime": {
110 | "end_time": "2020-04-12T16:49:27.994854Z",
111 | "start_time": "2020-04-12T16:49:27.833497Z"
112 | }
113 | },
114 | "outputs": [],
115 | "source": [
116 | "def accuracy(y_true, y_pred):\n",
117 | " accuracy = np.sum(y_true == y_pred) / len(y_true)\n",
118 | " return accuracy"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "# Training and testing on Diabetes Dataset"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 5,
131 | "metadata": {
132 | "ExecuteTime": {
133 | "end_time": "2020-04-12T16:49:29.037211Z",
134 | "start_time": "2020-04-12T16:49:28.003482Z"
135 | }
136 | },
137 | "outputs": [],
138 | "source": [
139 | "cols = [\"Pregnancies\", \"Glucose\", \"BloodPressure\", \"SkinThickness\",\n",
140 | " \"Insulin\", \"BMI\", \"DiabetesPedigreeFunction\", \"Age\", \"Outcome\"]\n",
141 | "url = \"https://gist.githubusercontent.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f/raw/819b69b5736821ccee93d05b51de0510bea00294/pima-indians-diabetes.csv\"\n",
142 | "diabetes_data = pd.read_csv(url, skiprows=9, header=None, names=cols)"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 6,
148 | "metadata": {
149 | "ExecuteTime": {
150 | "end_time": "2020-04-12T16:49:29.087001Z",
151 | "start_time": "2020-04-12T16:49:29.044736Z"
152 | }
153 | },
154 | "outputs": [
155 | {
156 | "data": {
157 | "text/plain": [
158 | "(768, 9)"
159 | ]
160 | },
161 | "execution_count": 6,
162 | "metadata": {},
163 | "output_type": "execute_result"
164 | }
165 | ],
166 | "source": [
167 | "diabetes_data.shape"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 7,
173 | "metadata": {
174 | "ExecuteTime": {
175 | "end_time": "2020-04-12T16:49:29.305869Z",
176 | "start_time": "2020-04-12T16:49:29.103088Z"
177 | }
178 | },
179 | "outputs": [],
180 | "source": [
181 | "X = diabetes_data[cols[:-1]].values\n",
182 | "y = diabetes_data[cols[-1]].values"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 8,
188 | "metadata": {
189 | "ExecuteTime": {
190 | "end_time": "2020-04-12T16:49:29.457842Z",
191 | "start_time": "2020-04-12T16:49:29.316553Z"
192 | }
193 | },
194 | "outputs": [],
195 | "source": [
196 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 9,
202 | "metadata": {
203 | "ExecuteTime": {
204 | "end_time": "2020-04-12T16:49:43.284602Z",
205 | "start_time": "2020-04-12T16:49:29.464289Z"
206 | }
207 | },
208 | "outputs": [],
209 | "source": [
210 | "rnd_clf = RandomForest(max_depth=10, n_trees=10, n_feats=6)\n",
211 | "rnd_clf.fit(X_train, y_train)\n",
212 | "y_pred = rnd_clf.predict(X_test)"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "# Metrics"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 10,
225 | "metadata": {
226 | "ExecuteTime": {
227 | "end_time": "2020-04-12T16:49:43.300402Z",
228 | "start_time": "2020-04-12T16:49:43.288586Z"
229 | }
230 | },
231 | "outputs": [
232 | {
233 | "name": "stdout",
234 | "output_type": "stream",
235 | "text": [
236 | "Accuracy: 0.8181818181818182\n"
237 | ]
238 | }
239 | ],
240 | "source": [
241 | "print(f\"Accuracy: {accuracy(y_test, y_pred)}\")"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 11,
247 | "metadata": {
248 | "ExecuteTime": {
249 | "end_time": "2020-04-12T16:49:43.470452Z",
250 | "start_time": "2020-04-12T16:49:43.306229Z"
251 | }
252 | },
253 | "outputs": [
254 | {
255 | "name": "stdout",
256 | "output_type": "stream",
257 | "text": [
258 | "Precision: 0.782608695652174\n",
259 | "Recall: 0.6666666666666666\n",
260 | "F1-Score: 0.72\n"
261 | ]
262 | }
263 | ],
264 | "source": [
265 | "print(f\"Precision: {precision_score(y_test, y_pred)}\")\n",
266 | "print(f\"Recall: {recall_score(y_test, y_pred)}\")\n",
267 | "print(f\"F1-Score: {f1_score(y_test, y_pred)}\")"
268 | ]
269 | }
270 | ],
271 | "metadata": {
272 | "kernelspec": {
273 | "display_name": "Python 3",
274 | "language": "python",
275 | "name": "python3"
276 | },
277 | "language_info": {
278 | "codemirror_mode": {
279 | "name": "ipython",
280 | "version": 3
281 | },
282 | "file_extension": ".py",
283 | "mimetype": "text/x-python",
284 | "name": "python",
285 | "nbconvert_exporter": "python",
286 | "pygments_lexer": "ipython3",
287 | "version": "3.7.6"
288 | },
289 | "toc": {
290 | "base_numbering": 1,
291 | "nav_menu": {},
292 | "number_sections": true,
293 | "sideBar": true,
294 | "skip_h1_title": false,
295 | "title_cell": "Table of Contents",
296 | "title_sidebar": "Contents",
297 | "toc_cell": true,
298 | "toc_position": {},
299 | "toc_section_display": true,
300 | "toc_window_display": false
301 | },
302 | "varInspector": {
303 | "cols": {
304 | "lenName": 16,
305 | "lenType": 16,
306 | "lenVar": 40
307 | },
308 | "kernels_config": {
309 | "python": {
310 | "delete_cmd_postfix": "",
311 | "delete_cmd_prefix": "del ",
312 | "library": "var_list.py",
313 | "varRefreshCmd": "print(var_dic_list())"
314 | },
315 | "r": {
316 | "delete_cmd_postfix": ") ",
317 | "delete_cmd_prefix": "rm(",
318 | "library": "var_list.r",
319 | "varRefreshCmd": "cat(var_dic_list()) "
320 | }
321 | },
322 | "types_to_exclude": [
323 | "module",
324 | "function",
325 | "builtin_function_or_method",
326 | "instance",
327 | "_Feature"
328 | ],
329 | "window_display": false
330 | }
331 | },
332 | "nbformat": 4,
333 | "nbformat_minor": 4
334 | }
335 |
--------------------------------------------------------------------------------
/K-Nearest Neigbors/KNN-Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "ExecuteTime": {
8 | "end_time": "2020-02-16T23:06:30.008859Z",
9 | "start_time": "2020-02-16T23:06:30.002839Z"
10 | }
11 | },
12 | "outputs": [],
13 | "source": [
14 | "import csv\n",
15 | "import random\n",
16 | "import operator\n",
17 | "import math\n",
18 | "\n",
19 | "random.seed(47)"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {
26 | "ExecuteTime": {
27 | "end_time": "2020-02-16T23:06:30.143114Z",
28 | "start_time": "2020-02-16T23:06:30.010816Z"
29 | }
30 | },
31 | "outputs": [],
32 | "source": [
33 | "def viewDataset(file):\n",
34 | " with open(file) as csvfile:\n",
35 | " lines = csv.reader(csvfile)\n",
36 | " for row in lines:\n",
37 | " print(', '.join(row))\n"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 3,
43 | "metadata": {
44 | "ExecuteTime": {
45 | "end_time": "2020-02-16T23:06:30.290169Z",
46 | "start_time": "2020-02-16T23:06:30.149057Z"
47 | }
48 | },
49 | "outputs": [],
50 | "source": [
51 | "dataset = r'../datasets/iris.data'\n",
52 | "# viewDataset(dataset)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 4,
58 | "metadata": {
59 | "ExecuteTime": {
60 | "end_time": "2020-02-16T23:06:30.439251Z",
61 | "start_time": "2020-02-16T23:06:30.293164Z"
62 | }
63 | },
64 | "outputs": [],
65 | "source": [
66 | "def handleDataset(filename, split):\n",
67 | " trainingSet = []\n",
68 | " testSet = []\n",
69 | " with open(filename, 'r') as csvfile:\n",
70 | " lines = csv.reader(csvfile)\n",
71 | " dataset = list(lines)\n",
72 | " for x in range(len(dataset) - 1):\n",
73 | " for y in range(4):\n",
74 | " dataset[x][y] = float(dataset[x][y])\n",
75 | " if random.random() < split:\n",
76 | " trainingSet.append(dataset[x])\n",
77 | " else:\n",
78 | " testSet.append(dataset[x])\n",
79 | " return trainingSet, testSet"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 5,
85 | "metadata": {
86 | "ExecuteTime": {
87 | "end_time": "2020-02-16T23:06:30.580287Z",
88 | "start_time": "2020-02-16T23:06:30.440284Z"
89 | }
90 | },
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "Train: 101\n",
97 | "Test: 49\n"
98 | ]
99 | }
100 | ],
101 | "source": [
102 | "# test handleDataset\n",
103 | "\n",
104 | "trainingSet, testSet = handleDataset(dataset, 0.66)\n",
105 | "print ('Train: ' + repr(len(trainingSet)))\n",
106 | "print ('Test: ' + repr(len(testSet)))"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 6,
112 | "metadata": {
113 | "ExecuteTime": {
114 | "end_time": "2020-02-16T23:06:30.721943Z",
115 | "start_time": "2020-02-16T23:06:30.582311Z"
116 | }
117 | },
118 | "outputs": [],
119 | "source": [
120 | "def euclideanDistance(instance1, instance2, length):\n",
121 | " distance = 0\n",
122 | " for x in range(length):\n",
123 | " distance += pow((instance1[x] - instance2[x]), 2)\n",
124 | " return math.sqrt(distance)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 7,
130 | "metadata": {
131 | "ExecuteTime": {
132 | "end_time": "2020-02-16T23:06:30.858538Z",
133 | "start_time": "2020-02-16T23:06:30.731964Z"
134 | }
135 | },
136 | "outputs": [
137 | {
138 | "name": "stdout",
139 | "output_type": "stream",
140 | "text": [
141 | "Distance: 3.4641016151377544\n"
142 | ]
143 | }
144 | ],
145 | "source": [
146 | "# Test Euclidean Distance\n",
147 | "data1 = [2, 2, 2, 'a']\n",
148 | "data2 = [4, 4, 4, 'b']\n",
149 | "distance = euclideanDistance(data1, data2, 3)\n",
150 | "print('Distance: ' + repr(distance))"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": 8,
156 | "metadata": {
157 | "ExecuteTime": {
158 | "end_time": "2020-02-16T23:06:30.995044Z",
159 | "start_time": "2020-02-16T23:06:30.861489Z"
160 | }
161 | },
162 | "outputs": [],
163 | "source": [
164 | "def getKNeighbors(trainingSet, testInstance, k):\n",
165 | " distances = []\n",
166 | " length = len(testInstance) - 1\n",
167 | " for x in range(len(trainingSet)):\n",
168 | " dist = euclideanDistance(testInstance, trainingSet[x], length)\n",
169 | " distances.append((trainingSet[x], dist))\n",
170 | " distances.sort(key=operator.itemgetter(1))\n",
171 | " neighbors = []\n",
172 | " for x in range(k):\n",
173 | " neighbors.append(distances[x][0])\n",
174 | " return neighbors"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 9,
180 | "metadata": {
181 | "ExecuteTime": {
182 | "end_time": "2020-02-16T23:06:31.162445Z",
183 | "start_time": "2020-02-16T23:06:30.996140Z"
184 | }
185 | },
186 | "outputs": [
187 | {
188 | "name": "stdout",
189 | "output_type": "stream",
190 | "text": [
191 | "[[4, 4, 4, 'b']]\n"
192 | ]
193 | }
194 | ],
195 | "source": [
196 | "# test getKNeighbors\n",
197 | "\n",
198 | "trainSet = [[2, 2, 2, 'a'], [4, 4, 4, 'b']]\n",
199 | "testInstance = [5, 5, 5]\n",
200 | "k = 1\n",
201 | "neighbors = getKNeighbors(trainSet, testInstance, 1)\n",
202 | "print(neighbors)"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": 10,
208 | "metadata": {
209 | "ExecuteTime": {
210 | "end_time": "2020-02-16T23:06:31.315818Z",
211 | "start_time": "2020-02-16T23:06:31.163407Z"
212 | }
213 | },
214 | "outputs": [],
215 | "source": [
216 | "def getResponse(neighbors):\n",
217 | " classVotes = {}\n",
218 | " for x in range(len(neighbors)):\n",
219 | " response = neighbors[x][-1]\n",
220 | " if response in classVotes:\n",
221 | " classVotes[response] += 1\n",
222 | " else:\n",
223 | " classVotes[response] = 1\n",
224 | "# print(classVotes)\n",
225 | " sortedVotes = sorted(classVotes.items(),\n",
226 | " key=operator.itemgetter(1), reverse=True)\n",
227 | "# print(sortedVotes)\n",
228 | " return sortedVotes[0][0]"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 11,
234 | "metadata": {
235 | "ExecuteTime": {
236 | "end_time": "2020-02-16T23:06:31.484233Z",
237 | "start_time": "2020-02-16T23:06:31.321761Z"
238 | }
239 | },
240 | "outputs": [
241 | {
242 | "name": "stdout",
243 | "output_type": "stream",
244 | "text": [
245 | "a\n"
246 | ]
247 | }
248 | ],
249 | "source": [
250 | "# test getResponse\n",
251 | "\n",
252 | "neighbors = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n",
253 | "print(getResponse(neighbors))"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 12,
259 | "metadata": {
260 | "ExecuteTime": {
261 | "end_time": "2020-02-16T23:06:31.632980Z",
262 | "start_time": "2020-02-16T23:06:31.490217Z"
263 | }
264 | },
265 | "outputs": [],
266 | "source": [
267 | "def getAccuracy(testSet, predictions):\n",
268 | " correct = 0\n",
269 | " testSet_length = len(testSet)\n",
270 | " for x in range(testSet_length):\n",
271 | " if testSet[x][-1] == predictions[x]:\n",
272 | " correct += 1\n",
273 | " return (correct/testSet_length) * 100.0"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 13,
279 | "metadata": {
280 | "ExecuteTime": {
281 | "end_time": "2020-02-16T23:06:31.768505Z",
282 | "start_time": "2020-02-16T23:06:31.638927Z"
283 | }
284 | },
285 | "outputs": [
286 | {
287 | "name": "stdout",
288 | "output_type": "stream",
289 | "text": [
290 | "66.66666666666666\n"
291 | ]
292 | }
293 | ],
294 | "source": [
295 | "# test getAccuracy\n",
296 | "\n",
297 | "testSet = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n",
298 | "predictions = ['a', 'a', 'a']\n",
299 | "accuracy = getAccuracy(testSet, predictions)\n",
300 | "print(accuracy)"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 14,
306 | "metadata": {
307 | "ExecuteTime": {
308 | "end_time": "2020-02-16T23:06:32.449281Z",
309 | "start_time": "2020-02-16T23:06:31.769490Z"
310 | }
311 | },
312 | "outputs": [],
313 | "source": [
314 | "from sklearn.metrics import accuracy_score"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": 15,
320 | "metadata": {
321 | "ExecuteTime": {
322 | "end_time": "2020-02-16T23:06:32.465145Z",
323 | "start_time": "2020-02-16T23:06:32.455179Z"
324 | }
325 | },
326 | "outputs": [],
327 | "source": [
328 | "def main():\n",
329 | " # prepare data\n",
330 | " split = 0.8\n",
331 | " trainingSet, testSet = handleDataset(dataset, split)\n",
332 | " print('Train: ' + repr(len(trainingSet)))\n",
333 | " print('Test: ' + repr(len(testSet)))\n",
334 | " # generate predictions\n",
335 | " predictions = []\n",
336 | " k = 3\n",
337 | " for x in range(len(testSet)):\n",
338 | " neighbors = getKNeighbors(trainingSet, testSet[x], k)\n",
339 | " result = getResponse(neighbors) \n",
340 | " predictions.append(result)\n",
341 | " print(f'> predicted = {result}, actual = {testSet[x][-1]}')\n",
342 | " \n",
343 | " accuracy = getAccuracy(testSet, predictions)\n",
344 | " print(f'k: {k}, Accuracy: {round(accuracy,3)}%')"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 16,
350 | "metadata": {
351 | "ExecuteTime": {
352 | "end_time": "2020-02-16T23:06:32.623369Z",
353 | "start_time": "2020-02-16T23:06:32.467140Z"
354 | }
355 | },
356 | "outputs": [
357 | {
358 | "name": "stdout",
359 | "output_type": "stream",
360 | "text": [
361 | "Train: 121\n",
362 | "Test: 29\n",
363 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
364 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
365 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
366 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
367 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
368 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
369 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
370 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
371 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
372 | "> predicted = Iris-setosa, actual = Iris-setosa\n",
373 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
374 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
375 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
376 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
377 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
378 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
379 | "> predicted = Iris-virginica, actual = Iris-versicolor\n",
380 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
381 | "> predicted = Iris-virginica, actual = Iris-versicolor\n",
382 | "> predicted = Iris-versicolor, actual = Iris-versicolor\n",
383 | "> predicted = Iris-versicolor, actual = Iris-virginica\n",
384 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
385 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
386 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
387 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
388 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
389 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
390 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
391 | "> predicted = Iris-virginica, actual = Iris-virginica\n",
392 | "k: 3, Accuracy: 89.655%\n"
393 | ]
394 | }
395 | ],
396 | "source": [
397 | "main()"
398 | ]
399 | }
400 | ],
401 | "metadata": {
402 | "kernelspec": {
403 | "display_name": "Python 3",
404 | "language": "python",
405 | "name": "python3"
406 | },
407 | "language_info": {
408 | "codemirror_mode": {
409 | "name": "ipython",
410 | "version": 3
411 | },
412 | "file_extension": ".py",
413 | "mimetype": "text/x-python",
414 | "name": "python",
415 | "nbconvert_exporter": "python",
416 | "pygments_lexer": "ipython3",
417 | "version": "3.7.6"
418 | },
419 | "toc": {
420 | "base_numbering": 1,
421 | "nav_menu": {},
422 | "number_sections": true,
423 | "sideBar": true,
424 | "skip_h1_title": false,
425 | "title_cell": "Table of Contents",
426 | "title_sidebar": "Contents",
427 | "toc_cell": false,
428 | "toc_position": {},
429 | "toc_section_display": true,
430 | "toc_window_display": false
431 | },
432 | "varInspector": {
433 | "cols": {
434 | "lenName": 16,
435 | "lenType": 16,
436 | "lenVar": 40
437 | },
438 | "kernels_config": {
439 | "python": {
440 | "delete_cmd_postfix": "",
441 | "delete_cmd_prefix": "del ",
442 | "library": "var_list.py",
443 | "varRefreshCmd": "print(var_dic_list())"
444 | },
445 | "r": {
446 | "delete_cmd_postfix": ") ",
447 | "delete_cmd_prefix": "rm(",
448 | "library": "var_list.r",
449 | "varRefreshCmd": "cat(var_dic_list()) "
450 | }
451 | },
452 | "types_to_exclude": [
453 | "module",
454 | "function",
455 | "builtin_function_or_method",
456 | "instance",
457 | "_Feature"
458 | ],
459 | "window_display": false
460 | }
461 | },
462 | "nbformat": 4,
463 | "nbformat_minor": 2
464 | }
465 |
--------------------------------------------------------------------------------
/Naive Bayes/Naive Bayes.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "**Gaussian Naive Bayes**\n",
8 | "\n",
9 | "***Bayes Theorem:***\n",
10 | "$P(A|B) = \\frac{P(B|A)P(A)}{P(B}$\n",
11 | "\n",
12 | "\n",
13 | "***Naive Bayes:***\n",
14 | "\n",
15 | "$\\mathbf{P(y|X) = \\frac{P(X|y)P(y)}{P(X}}$\n",
16 | "\n",
17 | "\n",
18 | "$\\mathbf{X = (x_{1}, x_{2}, x_{3}, x_{4}, x_{5},...,x_{n})}$\n",
19 | "\n",
20 | "\n",
21 | "$\\mathbf{P(y|X) = \\frac{P(x_{1}|y).P(x_{2}|y)....P(x_{n}|y).P(y)}{P(X)}}$"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {
28 | "ExecuteTime": {
29 | "end_time": "2020-04-11T22:38:10.083585Z",
30 | "start_time": "2020-04-11T22:38:08.030037Z"
31 | }
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import numpy as np\n",
36 | "import pandas as pd\n",
37 | "from sklearn.model_selection import train_test_split"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 2,
43 | "metadata": {
44 | "ExecuteTime": {
45 | "end_time": "2020-04-11T22:38:10.137508Z",
46 | "start_time": "2020-04-11T22:38:10.091643Z"
47 | }
48 | },
49 | "outputs": [],
50 | "source": [
51 | "class NaiveBayes(object):\n",
52 | " \n",
53 | " def fit(self, X, y):\n",
54 | " n_samples, n_features = X.shape\n",
55 | " self._classes = np.unique(y)\n",
56 | " n_classes = len(self._classes)\n",
57 | " \n",
58 | " # mean, variance, priors\n",
59 | " self._mean = np.zeros((n_classes, n_features), dtype=np.float64)\n",
60 | " self._var = np.zeros((n_classes, n_features), dtype=np.float64)\n",
61 | " self._priors = np.zeros(n_classes, dtype=np.float64)\n",
62 | "\n",
63 | " # extracting mean, variance and priors for each class\n",
64 | " # useful in calculating pdf during prediction\n",
65 | " for c in self._classes:\n",
66 | " X_c = X[y==c]\n",
67 | " self._mean[c, :] = X_c.mean(axis=0)\n",
68 | " self._var[c, :] = X_c.var(axis=0)\n",
69 | " self._priors[c] = X_c.shape[0] / float(n_samples)\n",
70 | "\n",
71 | " def predict(self, X):\n",
72 | " y_pred = [self._predict(x) for x in X]\n",
73 | " return np.array(y_pred)\n",
74 | "\n",
75 | " def _predict(self, x):\n",
76 | " posteriors = []\n",
77 | "\n",
78 | " # calculate posterior probability for each class\n",
79 | " for idx, c in enumerate(self._classes):\n",
80 | " prior = np.log(self._priors[idx])\n",
81 | " class_conditional = np.sum(np.log(self.gaussian_pdf(idx, x)))\n",
82 | " posterior = prior + class_conditional\n",
83 | " posteriors.append(posterior)\n",
84 | " \n",
85 | " # return class with highest posterior probability\n",
86 | " return self._classes[np.argmax(posteriors)]\n",
87 | " \n",
88 | "\n",
89 | " def gaussian_pdf(self, class_idx, x):\n",
90 | " mean = self._mean[class_idx]\n",
91 | " var = self._var[class_idx]\n",
92 | " numerator = np.exp(- (x-mean)**2 / (2 * var))\n",
93 | " denominator = np.sqrt(2 * np.pi * var)\n",
94 | " return numerator / denominator"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 3,
100 | "metadata": {
101 | "ExecuteTime": {
102 | "end_time": "2020-04-11T22:38:10.283490Z",
103 | "start_time": "2020-04-11T22:38:10.144440Z"
104 | }
105 | },
106 | "outputs": [],
107 | "source": [
108 | "def accuracy(y_true, y_pred):\n",
109 | " accuracy = np.sum(y_true == y_pred) / len(y_true)\n",
110 | " return accuracy"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 4,
116 | "metadata": {
117 | "ExecuteTime": {
118 | "end_time": "2020-04-11T22:38:10.548884Z",
119 | "start_time": "2020-04-11T22:38:10.292962Z"
120 | }
121 | },
122 | "outputs": [],
123 | "source": [
124 | "from sklearn.datasets import load_iris\n",
125 | "\n",
126 | "data = load_iris()"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 5,
132 | "metadata": {
133 | "ExecuteTime": {
134 | "end_time": "2020-04-11T22:38:10.561691Z",
135 | "start_time": "2020-04-11T22:38:10.549919Z"
136 | }
137 | },
138 | "outputs": [],
139 | "source": [
140 | "X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 6,
146 | "metadata": {
147 | "ExecuteTime": {
148 | "end_time": "2020-04-11T22:38:10.695826Z",
149 | "start_time": "2020-04-11T22:38:10.566489Z"
150 | }
151 | },
152 | "outputs": [
153 | {
154 | "name": "stdout",
155 | "output_type": "stream",
156 | "text": [
157 | "(112, 4)\n",
158 | "(38, 4)\n",
159 | "(112,)\n",
160 | "(38,)\n"
161 | ]
162 | }
163 | ],
164 | "source": [
165 | "print(X_train.shape)\n",
166 | "print(X_test.shape)\n",
167 | "print(y_train.shape)\n",
168 | "print(y_test.shape)"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 7,
174 | "metadata": {
175 | "ExecuteTime": {
176 | "end_time": "2020-04-11T22:38:10.840526Z",
177 | "start_time": "2020-04-11T22:38:10.707061Z"
178 | }
179 | },
180 | "outputs": [],
181 | "source": [
182 | "nb = NaiveBayes()\n",
183 | "nb.fit(X_train, y_train.ravel())\n",
184 | "y_pred = nb.predict(X_test)"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 8,
190 | "metadata": {
191 | "ExecuteTime": {
192 | "end_time": "2020-04-11T22:38:10.969671Z",
193 | "start_time": "2020-04-11T22:38:10.849490Z"
194 | }
195 | },
196 | "outputs": [
197 | {
198 | "name": "stdout",
199 | "output_type": "stream",
200 | "text": [
201 | "Naive Bayes accuracy: 0.8947368421052632\n"
202 | ]
203 | }
204 | ],
205 | "source": [
206 | "print(f\"Naive Bayes accuracy: {accuracy(y_test, y_pred)}\")"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 9,
212 | "metadata": {
213 | "ExecuteTime": {
214 | "end_time": "2020-04-11T22:38:11.108872Z",
215 | "start_time": "2020-04-11T22:38:10.977849Z"
216 | }
217 | },
218 | "outputs": [],
219 | "source": [
220 | "# Diabetes Dataset"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 10,
226 | "metadata": {
227 | "ExecuteTime": {
228 | "end_time": "2020-04-11T22:38:11.985904Z",
229 | "start_time": "2020-04-11T22:38:11.118415Z"
230 | }
231 | },
232 | "outputs": [
233 | {
234 | "data": {
235 | "text/html": [
236 | "\n",
237 | "\n",
250 | "
\n",
251 | " \n",
252 | " \n",
253 | " | \n",
254 | " Pregnancies | \n",
255 | " Glucose | \n",
256 | " BloodPressure | \n",
257 | " SkinThickness | \n",
258 | " Insulin | \n",
259 | " BMI | \n",
260 | " DiabetesPedigreeFunction | \n",
261 | " Age | \n",
262 | " Outcome | \n",
263 | "
\n",
264 | " \n",
265 | " \n",
266 | " \n",
267 | " | 0 | \n",
268 | " 6 | \n",
269 | " 148 | \n",
270 | " 72 | \n",
271 | " 35 | \n",
272 | " 0 | \n",
273 | " 33.6 | \n",
274 | " 0.627 | \n",
275 | " 50 | \n",
276 | " 1 | \n",
277 | "
\n",
278 | " \n",
279 | " | 1 | \n",
280 | " 1 | \n",
281 | " 85 | \n",
282 | " 66 | \n",
283 | " 29 | \n",
284 | " 0 | \n",
285 | " 26.6 | \n",
286 | " 0.351 | \n",
287 | " 31 | \n",
288 | " 0 | \n",
289 | "
\n",
290 | " \n",
291 | " | 2 | \n",
292 | " 8 | \n",
293 | " 183 | \n",
294 | " 64 | \n",
295 | " 0 | \n",
296 | " 0 | \n",
297 | " 23.3 | \n",
298 | " 0.672 | \n",
299 | " 32 | \n",
300 | " 1 | \n",
301 | "
\n",
302 | " \n",
303 | " | 3 | \n",
304 | " 1 | \n",
305 | " 89 | \n",
306 | " 66 | \n",
307 | " 23 | \n",
308 | " 94 | \n",
309 | " 28.1 | \n",
310 | " 0.167 | \n",
311 | " 21 | \n",
312 | " 0 | \n",
313 | "
\n",
314 | " \n",
315 | " | 4 | \n",
316 | " 0 | \n",
317 | " 137 | \n",
318 | " 40 | \n",
319 | " 35 | \n",
320 | " 168 | \n",
321 | " 43.1 | \n",
322 | " 2.288 | \n",
323 | " 33 | \n",
324 | " 1 | \n",
325 | "
\n",
326 | " \n",
327 | "
\n",
328 | "
"
329 | ],
330 | "text/plain": [
331 | " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
332 | "0 6 148 72 35 0 33.6 \n",
333 | "1 1 85 66 29 0 26.6 \n",
334 | "2 8 183 64 0 0 23.3 \n",
335 | "3 1 89 66 23 94 28.1 \n",
336 | "4 0 137 40 35 168 43.1 \n",
337 | "\n",
338 | " DiabetesPedigreeFunction Age Outcome \n",
339 | "0 0.627 50 1 \n",
340 | "1 0.351 31 0 \n",
341 | "2 0.672 32 1 \n",
342 | "3 0.167 21 0 \n",
343 | "4 2.288 33 1 "
344 | ]
345 | },
346 | "execution_count": 10,
347 | "metadata": {},
348 | "output_type": "execute_result"
349 | }
350 | ],
351 | "source": [
352 | "cols = [\"Pregnancies\" ,\"Glucose\" ,\"BloodPressure\" ,\"SkinThickness\" ,\"Insulin\" ,\"BMI\" ,\"DiabetesPedigreeFunction\" ,\"Age\" ,\"Outcome\"]\n",
353 | "url = \"https://gist.githubusercontent.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f/raw/819b69b5736821ccee93d05b51de0510bea00294/pima-indians-diabetes.csv\"\n",
354 | "\n",
355 | "diabetes_data = pd.read_csv(url, skiprows=9, header=None, names=cols)\n",
356 | "diabetes_data.head()"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 11,
362 | "metadata": {
363 | "ExecuteTime": {
364 | "end_time": "2020-04-11T22:38:11.999427Z",
365 | "start_time": "2020-04-11T22:38:11.989369Z"
366 | }
367 | },
368 | "outputs": [
369 | {
370 | "data": {
371 | "text/plain": [
372 | "(768, 9)"
373 | ]
374 | },
375 | "execution_count": 11,
376 | "metadata": {},
377 | "output_type": "execute_result"
378 | }
379 | ],
380 | "source": [
381 | "diabetes_data.shape"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": 12,
387 | "metadata": {
388 | "ExecuteTime": {
389 | "end_time": "2020-04-11T22:38:12.130899Z",
390 | "start_time": "2020-04-11T22:38:12.003158Z"
391 | }
392 | },
393 | "outputs": [],
394 | "source": [
395 | "X = diabetes_data[cols[:-1]].values\n",
396 | "y = diabetes_data[cols[-1]].values"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": 13,
402 | "metadata": {
403 | "ExecuteTime": {
404 | "end_time": "2020-04-11T22:38:12.285074Z",
405 | "start_time": "2020-04-11T22:38:12.134911Z"
406 | }
407 | },
408 | "outputs": [],
409 | "source": [
410 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 14,
416 | "metadata": {
417 | "ExecuteTime": {
418 | "end_time": "2020-04-11T22:38:12.441193Z",
419 | "start_time": "2020-04-11T22:38:12.293223Z"
420 | }
421 | },
422 | "outputs": [],
423 | "source": [
424 | "nb = NaiveBayes()\n",
425 | "nb.fit(X_train, y_train.ravel())\n",
426 | "y_pred = nb.predict(X_test)"
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": 15,
432 | "metadata": {
433 | "ExecuteTime": {
434 | "end_time": "2020-04-11T22:38:12.614652Z",
435 | "start_time": "2020-04-11T22:38:12.449128Z"
436 | }
437 | },
438 | "outputs": [
439 | {
440 | "name": "stdout",
441 | "output_type": "stream",
442 | "text": [
443 | "Naive Bayes accuracy: 0.7532467532467533\n"
444 | ]
445 | }
446 | ],
447 | "source": [
448 | "print(f\"Naive Bayes accuracy: {accuracy(y_test, y_pred)}\")"
449 | ]
450 | },
451 | {
452 | "cell_type": "code",
453 | "execution_count": 16,
454 | "metadata": {
455 | "ExecuteTime": {
456 | "end_time": "2020-04-11T22:38:12.746000Z",
457 | "start_time": "2020-04-11T22:38:12.617873Z"
458 | }
459 | },
460 | "outputs": [],
461 | "source": [
462 | "from sklearn.metrics import precision_score, recall_score, f1_score"
463 | ]
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": 17,
468 | "metadata": {
469 | "ExecuteTime": {
470 | "end_time": "2020-04-11T22:38:13.191309Z",
471 | "start_time": "2020-04-11T22:38:12.750193Z"
472 | }
473 | },
474 | "outputs": [
475 | {
476 | "name": "stdout",
477 | "output_type": "stream",
478 | "text": [
479 | "Precision: 0.6428571428571429\n",
480 | "Recall: 0.6666666666666666\n",
481 | "F1-Score: 0.6545454545454545\n"
482 | ]
483 | }
484 | ],
485 | "source": [
486 | "print(f\"Precision: {precision_score(y_test, y_pred)}\")\n",
487 | "print(f\"Recall: {recall_score(y_test, y_pred)}\")\n",
488 | "print(f\"F1-Score: {f1_score(y_test, y_pred)}\")"
489 | ]
490 | }
491 | ],
492 | "metadata": {
493 | "kernelspec": {
494 | "display_name": "Python 3",
495 | "language": "python",
496 | "name": "python3"
497 | },
498 | "language_info": {
499 | "codemirror_mode": {
500 | "name": "ipython",
501 | "version": 3
502 | },
503 | "file_extension": ".py",
504 | "mimetype": "text/x-python",
505 | "name": "python",
506 | "nbconvert_exporter": "python",
507 | "pygments_lexer": "ipython3",
508 | "version": "3.7.6"
509 | },
510 | "toc": {
511 | "base_numbering": 1,
512 | "nav_menu": {},
513 | "number_sections": true,
514 | "sideBar": true,
515 | "skip_h1_title": false,
516 | "title_cell": "Table of Contents",
517 | "title_sidebar": "Contents",
518 | "toc_cell": false,
519 | "toc_position": {},
520 | "toc_section_display": true,
521 | "toc_window_display": false
522 | },
523 | "varInspector": {
524 | "cols": {
525 | "lenName": 16,
526 | "lenType": 16,
527 | "lenVar": 40
528 | },
529 | "kernels_config": {
530 | "python": {
531 | "delete_cmd_postfix": "",
532 | "delete_cmd_prefix": "del ",
533 | "library": "var_list.py",
534 | "varRefreshCmd": "print(var_dic_list())"
535 | },
536 | "r": {
537 | "delete_cmd_postfix": ") ",
538 | "delete_cmd_prefix": "rm(",
539 | "library": "var_list.r",
540 | "varRefreshCmd": "cat(var_dic_list()) "
541 | }
542 | },
543 | "types_to_exclude": [
544 | "module",
545 | "function",
546 | "builtin_function_or_method",
547 | "instance",
548 | "_Feature"
549 | ],
550 | "window_display": false
551 | }
552 | },
553 | "nbformat": 4,
554 | "nbformat_minor": 4
555 | }
556 |
--------------------------------------------------------------------------------
/datasets/diabetes_data.csv:
--------------------------------------------------------------------------------
1 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
2 | 6,148,72,35,0,33.6,0.627,50,1
3 | 1,85,66,29,0,26.6,0.35100000000000003,31,0
4 | 8,183,64,0,0,23.3,0.672,32,1
5 | 1,89,66,23,94,28.1,0.16699999999999998,21,0
6 | 0,137,40,35,168,43.1,2.2880000000000003,33,1
7 | 5,116,74,0,0,25.6,0.201,30,0
8 | 3,78,50,32,88,31.0,0.248,26,1
9 | 10,115,0,0,0,35.3,0.134,29,0
10 | 2,197,70,45,543,30.5,0.158,53,1
11 | 8,125,96,0,0,0.0,0.23199999999999998,54,1
12 | 4,110,92,0,0,37.6,0.191,30,0
13 | 10,168,74,0,0,38.0,0.537,34,1
14 | 10,139,80,0,0,27.1,1.4409999999999998,57,0
15 | 1,189,60,23,846,30.1,0.39799999999999996,59,1
16 | 5,166,72,19,175,25.8,0.5870000000000001,51,1
17 | 7,100,0,0,0,30.0,0.484,32,1
18 | 0,118,84,47,230,45.8,0.551,31,1
19 | 7,107,74,0,0,29.6,0.254,31,1
20 | 1,103,30,38,83,43.3,0.183,33,0
21 | 1,115,70,30,96,34.6,0.529,32,1
22 | 3,126,88,41,235,39.3,0.7040000000000001,27,0
23 | 8,99,84,0,0,35.4,0.38799999999999996,50,0
24 | 7,196,90,0,0,39.8,0.451,41,1
25 | 9,119,80,35,0,29.0,0.263,29,1
26 | 11,143,94,33,146,36.6,0.254,51,1
27 | 10,125,70,26,115,31.1,0.205,41,1
28 | 7,147,76,0,0,39.4,0.257,43,1
29 | 1,97,66,15,140,23.2,0.48700000000000004,22,0
30 | 13,145,82,19,110,22.2,0.245,57,0
31 | 5,117,92,0,0,34.1,0.337,38,0
32 | 5,109,75,26,0,36.0,0.546,60,0
33 | 3,158,76,36,245,31.6,0.851,28,1
34 | 3,88,58,11,54,24.8,0.267,22,0
35 | 6,92,92,0,0,19.9,0.188,28,0
36 | 10,122,78,31,0,27.6,0.512,45,0
37 | 4,103,60,33,192,24.0,0.966,33,0
38 | 11,138,76,0,0,33.2,0.42,35,0
39 | 9,102,76,37,0,32.9,0.665,46,1
40 | 2,90,68,42,0,38.2,0.503,27,1
41 | 4,111,72,47,207,37.1,1.39,56,1
42 | 3,180,64,25,70,34.0,0.271,26,0
43 | 7,133,84,0,0,40.2,0.696,37,0
44 | 7,106,92,18,0,22.7,0.235,48,0
45 | 9,171,110,24,240,45.4,0.721,54,1
46 | 7,159,64,0,0,27.4,0.294,40,0
47 | 0,180,66,39,0,42.0,1.893,25,1
48 | 1,146,56,0,0,29.7,0.564,29,0
49 | 2,71,70,27,0,28.0,0.586,22,0
50 | 7,103,66,32,0,39.1,0.344,31,1
51 | 7,105,0,0,0,0.0,0.305,24,0
52 | 1,103,80,11,82,19.4,0.491,22,0
53 | 1,101,50,15,36,24.2,0.526,26,0
54 | 5,88,66,21,23,24.4,0.342,30,0
55 | 8,176,90,34,300,33.7,0.467,58,1
56 | 7,150,66,42,342,34.7,0.718,42,0
57 | 1,73,50,10,0,23.0,0.248,21,0
58 | 7,187,68,39,304,37.7,0.254,41,1
59 | 0,100,88,60,110,46.8,0.9620000000000001,31,0
60 | 0,146,82,0,0,40.5,1.781,44,0
61 | 0,105,64,41,142,41.5,0.17300000000000001,22,0
62 | 2,84,0,0,0,0.0,0.304,21,0
63 | 8,133,72,0,0,32.9,0.27,39,1
64 | 5,44,62,0,0,25.0,0.5870000000000001,36,0
65 | 2,141,58,34,128,25.4,0.6990000000000001,24,0
66 | 7,114,66,0,0,32.8,0.258,42,1
67 | 5,99,74,27,0,29.0,0.203,32,0
68 | 0,109,88,30,0,32.5,0.855,38,1
69 | 2,109,92,0,0,42.7,0.845,54,0
70 | 1,95,66,13,38,19.6,0.33399999999999996,25,0
71 | 4,146,85,27,100,28.9,0.18899999999999997,27,0
72 | 2,100,66,20,90,32.9,0.867,28,1
73 | 5,139,64,35,140,28.6,0.41100000000000003,26,0
74 | 13,126,90,0,0,43.4,0.583,42,1
75 | 4,129,86,20,270,35.1,0.231,23,0
76 | 1,79,75,30,0,32.0,0.396,22,0
77 | 1,0,48,20,0,24.7,0.14,22,0
78 | 7,62,78,0,0,32.6,0.391,41,0
79 | 5,95,72,33,0,37.7,0.37,27,0
80 | 0,131,0,0,0,43.2,0.27,26,1
81 | 2,112,66,22,0,25.0,0.307,24,0
82 | 3,113,44,13,0,22.4,0.14,22,0
83 | 2,74,0,0,0,0.0,0.102,22,0
84 | 7,83,78,26,71,29.3,0.767,36,0
85 | 0,101,65,28,0,24.6,0.237,22,0
86 | 5,137,108,0,0,48.8,0.22699999999999998,37,1
87 | 2,110,74,29,125,32.4,0.698,27,0
88 | 13,106,72,54,0,36.6,0.17800000000000002,45,0
89 | 2,100,68,25,71,38.5,0.324,26,0
90 | 15,136,70,32,110,37.1,0.153,43,1
91 | 1,107,68,19,0,26.5,0.165,24,0
92 | 1,80,55,0,0,19.1,0.258,21,0
93 | 4,123,80,15,176,32.0,0.44299999999999995,34,0
94 | 7,81,78,40,48,46.7,0.261,42,0
95 | 4,134,72,0,0,23.8,0.27699999999999997,60,1
96 | 2,142,82,18,64,24.7,0.7609999999999999,21,0
97 | 6,144,72,27,228,33.9,0.255,40,0
98 | 2,92,62,28,0,31.6,0.13,24,0
99 | 1,71,48,18,76,20.4,0.32299999999999995,22,0
100 | 6,93,50,30,64,28.7,0.35600000000000004,23,0
101 | 1,122,90,51,220,49.7,0.325,31,1
102 | 1,163,72,0,0,39.0,1.222,33,1
103 | 1,151,60,0,0,26.1,0.179,22,0
104 | 0,125,96,0,0,22.5,0.262,21,0
105 | 1,81,72,18,40,26.6,0.28300000000000003,24,0
106 | 2,85,65,0,0,39.6,0.93,27,0
107 | 1,126,56,29,152,28.7,0.8009999999999999,21,0
108 | 1,96,122,0,0,22.4,0.207,27,0
109 | 4,144,58,28,140,29.5,0.287,37,0
110 | 3,83,58,31,18,34.3,0.336,25,0
111 | 0,95,85,25,36,37.4,0.247,24,1
112 | 3,171,72,33,135,33.3,0.19899999999999998,24,1
113 | 8,155,62,26,495,34.0,0.5429999999999999,46,1
114 | 1,89,76,34,37,31.2,0.192,23,0
115 | 4,76,62,0,0,34.0,0.391,25,0
116 | 7,160,54,32,175,30.5,0.588,39,1
117 | 4,146,92,0,0,31.2,0.539,61,1
118 | 5,124,74,0,0,34.0,0.22,38,1
119 | 5,78,48,0,0,33.7,0.654,25,0
120 | 4,97,60,23,0,28.2,0.44299999999999995,22,0
121 | 4,99,76,15,51,23.2,0.223,21,0
122 | 0,162,76,56,100,53.2,0.759,25,1
123 | 6,111,64,39,0,34.2,0.26,24,0
124 | 2,107,74,30,100,33.6,0.40399999999999997,23,0
125 | 5,132,80,0,0,26.8,0.18600000000000003,69,0
126 | 0,113,76,0,0,33.3,0.278,23,1
127 | 1,88,30,42,99,55.0,0.496,26,1
128 | 3,120,70,30,135,42.9,0.452,30,0
129 | 1,118,58,36,94,33.3,0.261,23,0
130 | 1,117,88,24,145,34.5,0.40299999999999997,40,1
131 | 0,105,84,0,0,27.9,0.741,62,1
132 | 4,173,70,14,168,29.7,0.361,33,1
133 | 9,122,56,0,0,33.3,1.114,33,1
134 | 3,170,64,37,225,34.5,0.35600000000000004,30,1
135 | 8,84,74,31,0,38.3,0.457,39,0
136 | 2,96,68,13,49,21.1,0.647,26,0
137 | 2,125,60,20,140,33.8,0.08800000000000001,31,0
138 | 0,100,70,26,50,30.8,0.597,21,0
139 | 0,93,60,25,92,28.7,0.532,22,0
140 | 0,129,80,0,0,31.2,0.703,29,0
141 | 5,105,72,29,325,36.9,0.159,28,0
142 | 3,128,78,0,0,21.1,0.268,55,0
143 | 5,106,82,30,0,39.5,0.28600000000000003,38,0
144 | 2,108,52,26,63,32.5,0.318,22,0
145 | 10,108,66,0,0,32.4,0.272,42,1
146 | 4,154,62,31,284,32.8,0.237,23,0
147 | 0,102,75,23,0,0.0,0.5720000000000001,21,0
148 | 9,57,80,37,0,32.8,0.096,41,0
149 | 2,106,64,35,119,30.5,1.4,34,0
150 | 5,147,78,0,0,33.7,0.218,65,0
151 | 2,90,70,17,0,27.3,0.085,22,0
152 | 1,136,74,50,204,37.4,0.39899999999999997,24,0
153 | 4,114,65,0,0,21.9,0.43200000000000005,37,0
154 | 9,156,86,28,155,34.3,1.189,42,1
155 | 1,153,82,42,485,40.6,0.687,23,0
156 | 8,188,78,0,0,47.9,0.13699999999999998,43,1
157 | 7,152,88,44,0,50.0,0.337,36,1
158 | 2,99,52,15,94,24.6,0.637,21,0
159 | 1,109,56,21,135,25.2,0.833,23,0
160 | 2,88,74,19,53,29.0,0.22899999999999998,22,0
161 | 17,163,72,41,114,40.9,0.8170000000000001,47,1
162 | 4,151,90,38,0,29.7,0.294,36,0
163 | 7,102,74,40,105,37.2,0.204,45,0
164 | 0,114,80,34,285,44.2,0.16699999999999998,27,0
165 | 2,100,64,23,0,29.7,0.368,21,0
166 | 0,131,88,0,0,31.6,0.743,32,1
167 | 6,104,74,18,156,29.9,0.722,41,1
168 | 3,148,66,25,0,32.5,0.256,22,0
169 | 4,120,68,0,0,29.6,0.7090000000000001,34,0
170 | 4,110,66,0,0,31.9,0.47100000000000003,29,0
171 | 3,111,90,12,78,28.4,0.495,29,0
172 | 6,102,82,0,0,30.8,0.18,36,1
173 | 6,134,70,23,130,35.4,0.542,29,1
174 | 2,87,0,23,0,28.9,0.773,25,0
175 | 1,79,60,42,48,43.5,0.6779999999999999,23,0
176 | 2,75,64,24,55,29.7,0.37,33,0
177 | 8,179,72,42,130,32.7,0.7190000000000001,36,1
178 | 6,85,78,0,0,31.2,0.382,42,0
179 | 0,129,110,46,130,67.1,0.319,26,1
180 | 5,143,78,0,0,45.0,0.19,47,0
181 | 5,130,82,0,0,39.1,0.956,37,1
182 | 6,87,80,0,0,23.2,0.084,32,0
183 | 0,119,64,18,92,34.9,0.725,23,0
184 | 1,0,74,20,23,27.7,0.299,21,0
185 | 5,73,60,0,0,26.8,0.268,27,0
186 | 4,141,74,0,0,27.6,0.244,40,0
187 | 7,194,68,28,0,35.9,0.745,41,1
188 | 8,181,68,36,495,30.1,0.615,60,1
189 | 1,128,98,41,58,32.0,1.321,33,1
190 | 8,109,76,39,114,27.9,0.64,31,1
191 | 5,139,80,35,160,31.6,0.361,25,1
192 | 3,111,62,0,0,22.6,0.142,21,0
193 | 9,123,70,44,94,33.1,0.374,40,0
194 | 7,159,66,0,0,30.4,0.38299999999999995,36,1
195 | 11,135,0,0,0,52.3,0.578,40,1
196 | 8,85,55,20,0,24.4,0.136,42,0
197 | 5,158,84,41,210,39.4,0.395,29,1
198 | 1,105,58,0,0,24.3,0.187,21,0
199 | 3,107,62,13,48,22.9,0.6779999999999999,23,1
200 | 4,109,64,44,99,34.8,0.905,26,1
201 | 4,148,60,27,318,30.9,0.15,29,1
202 | 0,113,80,16,0,31.0,0.8740000000000001,21,0
203 | 1,138,82,0,0,40.1,0.23600000000000002,28,0
204 | 0,108,68,20,0,27.3,0.787,32,0
205 | 2,99,70,16,44,20.4,0.235,27,0
206 | 6,103,72,32,190,37.7,0.324,55,0
207 | 5,111,72,28,0,23.9,0.40700000000000003,27,0
208 | 8,196,76,29,280,37.5,0.605,57,1
209 | 5,162,104,0,0,37.7,0.151,52,1
210 | 1,96,64,27,87,33.2,0.289,21,0
211 | 7,184,84,33,0,35.5,0.355,41,1
212 | 2,81,60,22,0,27.7,0.29,25,0
213 | 0,147,85,54,0,42.8,0.375,24,0
214 | 7,179,95,31,0,34.2,0.16399999999999998,60,0
215 | 0,140,65,26,130,42.6,0.431,24,1
216 | 9,112,82,32,175,34.2,0.26,36,1
217 | 12,151,70,40,271,41.8,0.742,38,1
218 | 5,109,62,41,129,35.8,0.514,25,1
219 | 6,125,68,30,120,30.0,0.46399999999999997,32,0
220 | 5,85,74,22,0,29.0,1.224,32,1
221 | 5,112,66,0,0,37.8,0.261,41,1
222 | 0,177,60,29,478,34.6,1.072,21,1
223 | 2,158,90,0,0,31.6,0.805,66,1
224 | 7,119,0,0,0,25.2,0.209,37,0
225 | 7,142,60,33,190,28.8,0.687,61,0
226 | 1,100,66,15,56,23.6,0.6659999999999999,26,0
227 | 1,87,78,27,32,34.6,0.10099999999999999,22,0
228 | 0,101,76,0,0,35.7,0.198,26,0
229 | 3,162,52,38,0,37.2,0.652,24,1
230 | 4,197,70,39,744,36.7,2.329,31,0
231 | 0,117,80,31,53,45.2,0.08900000000000001,24,0
232 | 4,142,86,0,0,44.0,0.645,22,1
233 | 6,134,80,37,370,46.2,0.23800000000000002,46,1
234 | 1,79,80,25,37,25.4,0.583,22,0
235 | 4,122,68,0,0,35.0,0.39399999999999996,29,0
236 | 3,74,68,28,45,29.7,0.293,23,0
237 | 4,171,72,0,0,43.6,0.479,26,1
238 | 7,181,84,21,192,35.9,0.586,51,1
239 | 0,179,90,27,0,44.1,0.6859999999999999,23,1
240 | 9,164,84,21,0,30.8,0.831,32,1
241 | 0,104,76,0,0,18.4,0.5820000000000001,27,0
242 | 1,91,64,24,0,29.2,0.192,21,0
243 | 4,91,70,32,88,33.1,0.446,22,0
244 | 3,139,54,0,0,25.6,0.402,22,1
245 | 6,119,50,22,176,27.1,1.318,33,1
246 | 2,146,76,35,194,38.2,0.32899999999999996,29,0
247 | 9,184,85,15,0,30.0,1.213,49,1
248 | 10,122,68,0,0,31.2,0.258,41,0
249 | 0,165,90,33,680,52.3,0.42700000000000005,23,0
250 | 9,124,70,33,402,35.4,0.282,34,0
251 | 1,111,86,19,0,30.1,0.14300000000000002,23,0
252 | 9,106,52,0,0,31.2,0.38,42,0
253 | 2,129,84,0,0,28.0,0.284,27,0
254 | 2,90,80,14,55,24.4,0.249,24,0
255 | 0,86,68,32,0,35.8,0.23800000000000002,25,0
256 | 12,92,62,7,258,27.6,0.9259999999999999,44,1
257 | 1,113,64,35,0,33.6,0.5429999999999999,21,1
258 | 3,111,56,39,0,30.1,0.557,30,0
259 | 2,114,68,22,0,28.7,0.092,25,0
260 | 1,193,50,16,375,25.9,0.655,24,0
261 | 11,155,76,28,150,33.3,1.3530000000000002,51,1
262 | 3,191,68,15,130,30.9,0.299,34,0
263 | 3,141,0,0,0,30.0,0.7609999999999999,27,1
264 | 4,95,70,32,0,32.1,0.612,24,0
265 | 3,142,80,15,0,32.4,0.2,63,0
266 | 4,123,62,0,0,32.0,0.226,35,1
267 | 5,96,74,18,67,33.6,0.997,43,0
268 | 0,138,0,0,0,36.3,0.9329999999999999,25,1
269 | 2,128,64,42,0,40.0,1.101,24,0
270 | 0,102,52,0,0,25.1,0.078,21,0
271 | 2,146,0,0,0,27.5,0.24,28,1
272 | 10,101,86,37,0,45.6,1.136,38,1
273 | 2,108,62,32,56,25.2,0.128,21,0
274 | 3,122,78,0,0,23.0,0.254,40,0
275 | 1,71,78,50,45,33.2,0.42200000000000004,21,0
276 | 13,106,70,0,0,34.2,0.251,52,0
277 | 2,100,70,52,57,40.5,0.677,25,0
278 | 7,106,60,24,0,26.5,0.29600000000000004,29,1
279 | 0,104,64,23,116,27.8,0.45399999999999996,23,0
280 | 5,114,74,0,0,24.9,0.7440000000000001,57,0
281 | 2,108,62,10,278,25.3,0.8809999999999999,22,0
282 | 0,146,70,0,0,37.9,0.33399999999999996,28,1
283 | 10,129,76,28,122,35.9,0.28,39,0
284 | 7,133,88,15,155,32.4,0.262,37,0
285 | 7,161,86,0,0,30.4,0.165,47,1
286 | 2,108,80,0,0,27.0,0.259,52,1
287 | 7,136,74,26,135,26.0,0.647,51,0
288 | 5,155,84,44,545,38.7,0.619,34,0
289 | 1,119,86,39,220,45.6,0.8079999999999999,29,1
290 | 4,96,56,17,49,20.8,0.34,26,0
291 | 5,108,72,43,75,36.1,0.263,33,0
292 | 0,78,88,29,40,36.9,0.434,21,0
293 | 0,107,62,30,74,36.6,0.757,25,1
294 | 2,128,78,37,182,43.3,1.224,31,1
295 | 1,128,48,45,194,40.5,0.613,24,1
296 | 0,161,50,0,0,21.9,0.254,65,0
297 | 6,151,62,31,120,35.5,0.6920000000000001,28,0
298 | 2,146,70,38,360,28.0,0.337,29,1
299 | 0,126,84,29,215,30.7,0.52,24,0
300 | 14,100,78,25,184,36.6,0.41200000000000003,46,1
301 | 8,112,72,0,0,23.6,0.84,58,0
302 | 0,167,0,0,0,32.3,0.8390000000000001,30,1
303 | 2,144,58,33,135,31.6,0.42200000000000004,25,1
304 | 5,77,82,41,42,35.8,0.156,35,0
305 | 5,115,98,0,0,52.9,0.209,28,1
306 | 3,150,76,0,0,21.0,0.207,37,0
307 | 2,120,76,37,105,39.7,0.215,29,0
308 | 10,161,68,23,132,25.5,0.326,47,1
309 | 0,137,68,14,148,24.8,0.14300000000000002,21,0
310 | 0,128,68,19,180,30.5,1.391,25,1
311 | 2,124,68,28,205,32.9,0.875,30,1
312 | 6,80,66,30,0,26.2,0.313,41,0
313 | 0,106,70,37,148,39.4,0.605,22,0
314 | 2,155,74,17,96,26.6,0.433,27,1
315 | 3,113,50,10,85,29.5,0.626,25,0
316 | 7,109,80,31,0,35.9,1.127,43,1
317 | 2,112,68,22,94,34.1,0.315,26,0
318 | 3,99,80,11,64,19.3,0.284,30,0
319 | 3,182,74,0,0,30.5,0.345,29,1
320 | 3,115,66,39,140,38.1,0.15,28,0
321 | 6,194,78,0,0,23.5,0.129,59,1
322 | 4,129,60,12,231,27.5,0.527,31,0
323 | 3,112,74,30,0,31.6,0.19699999999999998,25,1
324 | 0,124,70,20,0,27.4,0.254,36,1
325 | 13,152,90,33,29,26.8,0.731,43,1
326 | 2,112,75,32,0,35.7,0.14800000000000002,21,0
327 | 1,157,72,21,168,25.6,0.12300000000000001,24,0
328 | 1,122,64,32,156,35.1,0.6920000000000001,30,1
329 | 10,179,70,0,0,35.1,0.2,37,0
330 | 2,102,86,36,120,45.5,0.127,23,1
331 | 6,105,70,32,68,30.8,0.122,37,0
332 | 8,118,72,19,0,23.1,1.476,46,0
333 | 2,87,58,16,52,32.7,0.166,25,0
334 | 1,180,0,0,0,43.3,0.282,41,1
335 | 12,106,80,0,0,23.6,0.13699999999999998,44,0
336 | 1,95,60,18,58,23.9,0.26,22,0
337 | 0,165,76,43,255,47.9,0.259,26,0
338 | 0,117,0,0,0,33.8,0.932,44,0
339 | 5,115,76,0,0,31.2,0.34299999999999997,44,1
340 | 9,152,78,34,171,34.2,0.893,33,1
341 | 7,178,84,0,0,39.9,0.331,41,1
342 | 1,130,70,13,105,25.9,0.47200000000000003,22,0
343 | 1,95,74,21,73,25.9,0.6729999999999999,36,0
344 | 1,0,68,35,0,32.0,0.389,22,0
345 | 5,122,86,0,0,34.7,0.29,33,0
346 | 8,95,72,0,0,36.8,0.485,57,0
347 | 8,126,88,36,108,38.5,0.349,49,0
348 | 1,139,46,19,83,28.7,0.654,22,0
349 | 3,116,0,0,0,23.5,0.187,23,0
350 | 3,99,62,19,74,21.8,0.27899999999999997,26,0
351 | 5,0,80,32,0,41.0,0.34600000000000003,37,1
352 | 4,92,80,0,0,42.2,0.237,29,0
353 | 4,137,84,0,0,31.2,0.252,30,0
354 | 3,61,82,28,0,34.4,0.243,46,0
355 | 1,90,62,12,43,27.2,0.58,24,0
356 | 3,90,78,0,0,42.7,0.5589999999999999,21,0
357 | 9,165,88,0,0,30.4,0.302,49,1
358 | 1,125,50,40,167,33.3,0.9620000000000001,28,1
359 | 13,129,0,30,0,39.9,0.569,44,1
360 | 12,88,74,40,54,35.3,0.37799999999999995,48,0
361 | 1,196,76,36,249,36.5,0.875,29,1
362 | 5,189,64,33,325,31.2,0.583,29,1
363 | 5,158,70,0,0,29.8,0.207,63,0
364 | 5,103,108,37,0,39.2,0.305,65,0
365 | 4,146,78,0,0,38.5,0.52,67,1
366 | 4,147,74,25,293,34.9,0.385,30,0
367 | 5,99,54,28,83,34.0,0.499,30,0
368 | 6,124,72,0,0,27.6,0.368,29,1
369 | 0,101,64,17,0,21.0,0.252,21,0
370 | 3,81,86,16,66,27.5,0.306,22,0
371 | 1,133,102,28,140,32.8,0.23399999999999999,45,1
372 | 3,173,82,48,465,38.4,2.137,25,1
373 | 0,118,64,23,89,0.0,1.7309999999999999,21,0
374 | 0,84,64,22,66,35.8,0.545,21,0
375 | 2,105,58,40,94,34.9,0.225,25,0
376 | 2,122,52,43,158,36.2,0.816,28,0
377 | 12,140,82,43,325,39.2,0.528,58,1
378 | 0,98,82,15,84,25.2,0.299,22,0
379 | 1,87,60,37,75,37.2,0.509,22,0
380 | 4,156,75,0,0,48.3,0.23800000000000002,32,1
381 | 0,93,100,39,72,43.4,1.021,35,0
382 | 1,107,72,30,82,30.8,0.821,24,0
383 | 0,105,68,22,0,20.0,0.23600000000000002,22,0
384 | 1,109,60,8,182,25.4,0.9470000000000001,21,0
385 | 1,90,62,18,59,25.1,1.268,25,0
386 | 1,125,70,24,110,24.3,0.221,25,0
387 | 1,119,54,13,50,22.3,0.205,24,0
388 | 5,116,74,29,0,32.3,0.66,35,1
389 | 8,105,100,36,0,43.3,0.239,45,1
390 | 5,144,82,26,285,32.0,0.452,58,1
391 | 3,100,68,23,81,31.6,0.9490000000000001,28,0
392 | 1,100,66,29,196,32.0,0.444,42,0
393 | 5,166,76,0,0,45.7,0.34,27,1
394 | 1,131,64,14,415,23.7,0.389,21,0
395 | 4,116,72,12,87,22.1,0.46299999999999997,37,0
396 | 4,158,78,0,0,32.9,0.8029999999999999,31,1
397 | 2,127,58,24,275,27.7,1.6,25,0
398 | 3,96,56,34,115,24.7,0.9440000000000001,39,0
399 | 0,131,66,40,0,34.3,0.196,22,1
400 | 3,82,70,0,0,21.1,0.389,25,0
401 | 3,193,70,31,0,34.9,0.24100000000000002,25,1
402 | 4,95,64,0,0,32.0,0.161,31,1
403 | 6,137,61,0,0,24.2,0.151,55,0
404 | 5,136,84,41,88,35.0,0.28600000000000003,35,1
405 | 9,72,78,25,0,31.6,0.28,38,0
406 | 5,168,64,0,0,32.9,0.135,41,1
407 | 2,123,48,32,165,42.1,0.52,26,0
408 | 4,115,72,0,0,28.9,0.376,46,1
409 | 0,101,62,0,0,21.9,0.336,25,0
410 | 8,197,74,0,0,25.9,1.1909999999999998,39,1
411 | 1,172,68,49,579,42.4,0.7020000000000001,28,1
412 | 6,102,90,39,0,35.7,0.674,28,0
413 | 1,112,72,30,176,34.4,0.528,25,0
414 | 1,143,84,23,310,42.4,1.0759999999999998,22,0
415 | 1,143,74,22,61,26.2,0.256,21,0
416 | 0,138,60,35,167,34.6,0.534,21,1
417 | 3,173,84,33,474,35.7,0.258,22,1
418 | 1,97,68,21,0,27.2,1.095,22,0
419 | 4,144,82,32,0,38.5,0.5539999999999999,37,1
420 | 1,83,68,0,0,18.2,0.624,27,0
421 | 3,129,64,29,115,26.4,0.21899999999999997,28,1
422 | 1,119,88,41,170,45.3,0.507,26,0
423 | 2,94,68,18,76,26.0,0.561,21,0
424 | 0,102,64,46,78,40.6,0.496,21,0
425 | 2,115,64,22,0,30.8,0.42100000000000004,21,0
426 | 8,151,78,32,210,42.9,0.516,36,1
427 | 4,184,78,39,277,37.0,0.264,31,1
428 | 0,94,0,0,0,0.0,0.256,25,0
429 | 1,181,64,30,180,34.1,0.32799999999999996,38,1
430 | 0,135,94,46,145,40.6,0.284,26,0
431 | 1,95,82,25,180,35.0,0.233,43,1
432 | 2,99,0,0,0,22.2,0.10800000000000001,23,0
433 | 3,89,74,16,85,30.4,0.551,38,0
434 | 1,80,74,11,60,30.0,0.527,22,0
435 | 2,139,75,0,0,25.6,0.16699999999999998,29,0
436 | 1,90,68,8,0,24.5,1.138,36,0
437 | 0,141,0,0,0,42.4,0.205,29,1
438 | 12,140,85,33,0,37.4,0.244,41,0
439 | 5,147,75,0,0,29.9,0.434,28,0
440 | 1,97,70,15,0,18.2,0.147,21,0
441 | 6,107,88,0,0,36.8,0.727,31,0
442 | 0,189,104,25,0,34.3,0.435,41,1
443 | 2,83,66,23,50,32.2,0.49700000000000005,22,0
444 | 4,117,64,27,120,33.2,0.23,24,0
445 | 8,108,70,0,0,30.5,0.955,33,1
446 | 4,117,62,12,0,29.7,0.38,30,1
447 | 0,180,78,63,14,59.4,2.42,25,1
448 | 1,100,72,12,70,25.3,0.6579999999999999,28,0
449 | 0,95,80,45,92,36.5,0.33,26,0
450 | 0,104,64,37,64,33.6,0.51,22,1
451 | 0,120,74,18,63,30.5,0.285,26,0
452 | 1,82,64,13,95,21.2,0.415,23,0
453 | 2,134,70,0,0,28.9,0.542,23,1
454 | 0,91,68,32,210,39.9,0.381,25,0
455 | 2,119,0,0,0,19.6,0.8320000000000001,72,0
456 | 2,100,54,28,105,37.8,0.498,24,0
457 | 14,175,62,30,0,33.6,0.212,38,1
458 | 1,135,54,0,0,26.7,0.687,62,0
459 | 5,86,68,28,71,30.2,0.364,24,0
460 | 10,148,84,48,237,37.6,1.001,51,1
461 | 9,134,74,33,60,25.9,0.46,81,0
462 | 9,120,72,22,56,20.8,0.733,48,0
463 | 1,71,62,0,0,21.8,0.41600000000000004,26,0
464 | 8,74,70,40,49,35.3,0.705,39,0
465 | 5,88,78,30,0,27.6,0.258,37,0
466 | 10,115,98,0,0,24.0,1.022,34,0
467 | 0,124,56,13,105,21.8,0.452,21,0
468 | 0,74,52,10,36,27.8,0.26899999999999996,22,0
469 | 0,97,64,36,100,36.8,0.6,25,0
470 | 8,120,0,0,0,30.0,0.183,38,1
471 | 6,154,78,41,140,46.1,0.5710000000000001,27,0
472 | 1,144,82,40,0,41.3,0.607,28,0
473 | 0,137,70,38,0,33.2,0.17,22,0
474 | 0,119,66,27,0,38.8,0.259,22,0
475 | 7,136,90,0,0,29.9,0.21,50,0
476 | 4,114,64,0,0,28.9,0.126,24,0
477 | 0,137,84,27,0,27.3,0.231,59,0
478 | 2,105,80,45,191,33.7,0.711,29,1
479 | 7,114,76,17,110,23.8,0.466,31,0
480 | 8,126,74,38,75,25.9,0.162,39,0
481 | 4,132,86,31,0,28.0,0.419,63,0
482 | 3,158,70,30,328,35.5,0.344,35,1
483 | 0,123,88,37,0,35.2,0.19699999999999998,29,0
484 | 4,85,58,22,49,27.8,0.306,28,0
485 | 0,84,82,31,125,38.2,0.233,23,0
486 | 0,145,0,0,0,44.2,0.63,31,1
487 | 0,135,68,42,250,42.3,0.365,24,1
488 | 1,139,62,41,480,40.7,0.536,21,0
489 | 0,173,78,32,265,46.5,1.159,58,0
490 | 4,99,72,17,0,25.6,0.294,28,0
491 | 8,194,80,0,0,26.1,0.551,67,0
492 | 2,83,65,28,66,36.8,0.629,24,0
493 | 2,89,90,30,0,33.5,0.292,42,0
494 | 4,99,68,38,0,32.8,0.145,33,0
495 | 4,125,70,18,122,28.9,1.1440000000000001,45,1
496 | 3,80,0,0,0,0.0,0.174,22,0
497 | 6,166,74,0,0,26.6,0.304,66,0
498 | 5,110,68,0,0,26.0,0.292,30,0
499 | 2,81,72,15,76,30.1,0.547,25,0
500 | 7,195,70,33,145,25.1,0.163,55,1
501 | 6,154,74,32,193,29.3,0.8390000000000001,39,0
502 | 2,117,90,19,71,25.2,0.313,21,0
503 | 3,84,72,32,0,37.2,0.267,28,0
504 | 6,0,68,41,0,39.0,0.727,41,1
505 | 7,94,64,25,79,33.3,0.738,41,0
506 | 3,96,78,39,0,37.3,0.23800000000000002,40,0
507 | 10,75,82,0,0,33.3,0.263,38,0
508 | 0,180,90,26,90,36.5,0.314,35,1
509 | 1,130,60,23,170,28.6,0.6920000000000001,21,0
510 | 2,84,50,23,76,30.4,0.968,21,0
511 | 8,120,78,0,0,25.0,0.409,64,0
512 | 12,84,72,31,0,29.7,0.297,46,1
513 | 0,139,62,17,210,22.1,0.207,21,0
514 | 9,91,68,0,0,24.2,0.2,58,0
515 | 2,91,62,0,0,27.3,0.525,22,0
516 | 3,99,54,19,86,25.6,0.154,24,0
517 | 3,163,70,18,105,31.6,0.268,28,1
518 | 9,145,88,34,165,30.3,0.7709999999999999,53,1
519 | 7,125,86,0,0,37.6,0.304,51,0
520 | 13,76,60,0,0,32.8,0.18,41,0
521 | 6,129,90,7,326,19.6,0.5820000000000001,60,0
522 | 2,68,70,32,66,25.0,0.187,25,0
523 | 3,124,80,33,130,33.2,0.305,26,0
524 | 6,114,0,0,0,0.0,0.18899999999999997,26,0
525 | 9,130,70,0,0,34.2,0.652,45,1
526 | 3,125,58,0,0,31.6,0.151,24,0
527 | 3,87,60,18,0,21.8,0.444,21,0
528 | 1,97,64,19,82,18.2,0.299,21,0
529 | 3,116,74,15,105,26.3,0.107,24,0
530 | 0,117,66,31,188,30.8,0.493,22,0
531 | 0,111,65,0,0,24.6,0.66,31,0
532 | 2,122,60,18,106,29.8,0.7170000000000001,22,0
533 | 0,107,76,0,0,45.3,0.6859999999999999,24,0
534 | 1,86,66,52,65,41.3,0.917,29,0
535 | 6,91,0,0,0,29.8,0.501,31,0
536 | 1,77,56,30,56,33.3,1.251,24,0
537 | 4,132,0,0,0,32.9,0.302,23,1
538 | 0,105,90,0,0,29.6,0.19699999999999998,46,0
539 | 0,57,60,0,0,21.7,0.735,67,0
540 | 0,127,80,37,210,36.3,0.804,23,0
541 | 3,129,92,49,155,36.4,0.968,32,1
542 | 8,100,74,40,215,39.4,0.6609999999999999,43,1
543 | 3,128,72,25,190,32.4,0.5489999999999999,27,1
544 | 10,90,85,32,0,34.9,0.825,56,1
545 | 4,84,90,23,56,39.5,0.159,25,0
546 | 1,88,78,29,76,32.0,0.365,29,0
547 | 8,186,90,35,225,34.5,0.423,37,1
548 | 5,187,76,27,207,43.6,1.034,53,1
549 | 4,131,68,21,166,33.1,0.16,28,0
550 | 1,164,82,43,67,32.8,0.341,50,0
551 | 4,189,110,31,0,28.5,0.68,37,0
552 | 1,116,70,28,0,27.4,0.204,21,0
553 | 3,84,68,30,106,31.9,0.591,25,0
554 | 6,114,88,0,0,27.8,0.247,66,0
555 | 1,88,62,24,44,29.9,0.42200000000000004,23,0
556 | 1,84,64,23,115,36.9,0.47100000000000003,28,0
557 | 7,124,70,33,215,25.5,0.161,37,0
558 | 1,97,70,40,0,38.1,0.218,30,0
559 | 8,110,76,0,0,27.8,0.237,58,0
560 | 11,103,68,40,0,46.2,0.126,42,0
561 | 11,85,74,0,0,30.1,0.3,35,0
562 | 6,125,76,0,0,33.8,0.121,54,1
563 | 0,198,66,32,274,41.3,0.502,28,1
564 | 1,87,68,34,77,37.6,0.401,24,0
565 | 6,99,60,19,54,26.9,0.49700000000000005,32,0
566 | 0,91,80,0,0,32.4,0.601,27,0
567 | 2,95,54,14,88,26.1,0.748,22,0
568 | 1,99,72,30,18,38.6,0.41200000000000003,21,0
569 | 6,92,62,32,126,32.0,0.085,46,0
570 | 4,154,72,29,126,31.3,0.33799999999999997,37,0
571 | 0,121,66,30,165,34.3,0.203,33,1
572 | 3,78,70,0,0,32.5,0.27,39,0
573 | 2,130,96,0,0,22.6,0.268,21,0
574 | 3,111,58,31,44,29.5,0.43,22,0
575 | 2,98,60,17,120,34.7,0.198,22,0
576 | 1,143,86,30,330,30.1,0.892,23,0
577 | 1,119,44,47,63,35.5,0.28,25,0
578 | 6,108,44,20,130,24.0,0.813,35,0
579 | 2,118,80,0,0,42.9,0.693,21,1
580 | 10,133,68,0,0,27.0,0.245,36,0
581 | 2,197,70,99,0,34.7,0.575,62,1
582 | 0,151,90,46,0,42.1,0.371,21,1
583 | 6,109,60,27,0,25.0,0.20600000000000002,27,0
584 | 12,121,78,17,0,26.5,0.259,62,0
585 | 8,100,76,0,0,38.7,0.19,42,0
586 | 8,124,76,24,600,28.7,0.687,52,1
587 | 1,93,56,11,0,22.5,0.41700000000000004,22,0
588 | 8,143,66,0,0,34.9,0.129,41,1
589 | 6,103,66,0,0,24.3,0.249,29,0
590 | 3,176,86,27,156,33.3,1.1540000000000001,52,1
591 | 0,73,0,0,0,21.1,0.342,25,0
592 | 11,111,84,40,0,46.8,0.925,45,1
593 | 2,112,78,50,140,39.4,0.175,24,0
594 | 3,132,80,0,0,34.4,0.402,44,1
595 | 2,82,52,22,115,28.5,1.699,25,0
596 | 6,123,72,45,230,33.6,0.733,34,0
597 | 0,188,82,14,185,32.0,0.682,22,1
598 | 0,67,76,0,0,45.3,0.19399999999999998,46,0
599 | 1,89,24,19,25,27.8,0.5589999999999999,21,0
600 | 1,173,74,0,0,36.8,0.08800000000000001,38,1
601 | 1,109,38,18,120,23.1,0.40700000000000003,26,0
602 | 1,108,88,19,0,27.1,0.4,24,0
603 | 6,96,0,0,0,23.7,0.19,28,0
604 | 1,124,74,36,0,27.8,0.1,30,0
605 | 7,150,78,29,126,35.2,0.6920000000000001,54,1
606 | 4,183,0,0,0,28.4,0.212,36,1
607 | 1,124,60,32,0,35.8,0.514,21,0
608 | 1,181,78,42,293,40.0,1.258,22,1
609 | 1,92,62,25,41,19.5,0.48200000000000004,25,0
610 | 0,152,82,39,272,41.5,0.27,27,0
611 | 1,111,62,13,182,24.0,0.138,23,0
612 | 3,106,54,21,158,30.9,0.292,24,0
613 | 3,174,58,22,194,32.9,0.593,36,1
614 | 7,168,88,42,321,38.2,0.787,40,1
615 | 6,105,80,28,0,32.5,0.878,26,0
616 | 11,138,74,26,144,36.1,0.557,50,1
617 | 3,106,72,0,0,25.8,0.207,27,0
618 | 6,117,96,0,0,28.7,0.157,30,0
619 | 2,68,62,13,15,20.1,0.257,23,0
620 | 9,112,82,24,0,28.2,1.2819999999999998,50,1
621 | 0,119,0,0,0,32.4,0.141,24,1
622 | 2,112,86,42,160,38.4,0.24600000000000002,28,0
623 | 2,92,76,20,0,24.2,1.6980000000000002,28,0
624 | 6,183,94,0,0,40.8,1.4609999999999999,45,0
625 | 0,94,70,27,115,43.5,0.34700000000000003,21,0
626 | 2,108,64,0,0,30.8,0.158,21,0
627 | 4,90,88,47,54,37.7,0.36200000000000004,29,0
628 | 0,125,68,0,0,24.7,0.20600000000000002,21,0
629 | 0,132,78,0,0,32.4,0.39299999999999996,21,0
630 | 5,128,80,0,0,34.6,0.14400000000000002,45,0
631 | 4,94,65,22,0,24.7,0.14800000000000002,21,0
632 | 7,114,64,0,0,27.4,0.732,34,1
633 | 0,102,78,40,90,34.5,0.23800000000000002,24,0
634 | 2,111,60,0,0,26.2,0.34299999999999997,23,0
635 | 1,128,82,17,183,27.5,0.115,22,0
636 | 10,92,62,0,0,25.9,0.16699999999999998,31,0
637 | 13,104,72,0,0,31.2,0.465,38,1
638 | 5,104,74,0,0,28.8,0.153,48,0
639 | 2,94,76,18,66,31.6,0.649,23,0
640 | 7,97,76,32,91,40.9,0.871,32,1
641 | 1,100,74,12,46,19.5,0.149,28,0
642 | 0,102,86,17,105,29.3,0.695,27,0
643 | 4,128,70,0,0,34.3,0.303,24,0
644 | 6,147,80,0,0,29.5,0.17800000000000002,50,1
645 | 4,90,0,0,0,28.0,0.61,31,0
646 | 3,103,72,30,152,27.6,0.73,27,0
647 | 2,157,74,35,440,39.4,0.134,30,0
648 | 1,167,74,17,144,23.4,0.447,33,1
649 | 0,179,50,36,159,37.8,0.455,22,1
650 | 11,136,84,35,130,28.3,0.26,42,1
651 | 0,107,60,25,0,26.4,0.133,23,0
652 | 1,91,54,25,100,25.2,0.23399999999999999,23,0
653 | 1,117,60,23,106,33.8,0.466,27,0
654 | 5,123,74,40,77,34.1,0.26899999999999996,28,0
655 | 2,120,54,0,0,26.8,0.455,27,0
656 | 1,106,70,28,135,34.2,0.142,22,0
657 | 2,155,52,27,540,38.7,0.24,25,1
658 | 2,101,58,35,90,21.8,0.155,22,0
659 | 1,120,80,48,200,38.9,1.162,41,0
660 | 11,127,106,0,0,39.0,0.19,51,0
661 | 3,80,82,31,70,34.2,1.2919999999999998,27,1
662 | 10,162,84,0,0,27.7,0.182,54,0
663 | 1,199,76,43,0,42.9,1.3940000000000001,22,1
664 | 8,167,106,46,231,37.6,0.165,43,1
665 | 9,145,80,46,130,37.9,0.637,40,1
666 | 6,115,60,39,0,33.7,0.245,40,1
667 | 1,112,80,45,132,34.8,0.217,24,0
668 | 4,145,82,18,0,32.5,0.235,70,1
669 | 10,111,70,27,0,27.5,0.141,40,1
670 | 6,98,58,33,190,34.0,0.43,43,0
671 | 9,154,78,30,100,30.9,0.16399999999999998,45,0
672 | 6,165,68,26,168,33.6,0.631,49,0
673 | 1,99,58,10,0,25.4,0.551,21,0
674 | 10,68,106,23,49,35.5,0.285,47,0
675 | 3,123,100,35,240,57.3,0.88,22,0
676 | 8,91,82,0,0,35.6,0.5870000000000001,68,0
677 | 6,195,70,0,0,30.9,0.32799999999999996,31,1
678 | 9,156,86,0,0,24.8,0.23,53,1
679 | 0,93,60,0,0,35.3,0.263,25,0
680 | 3,121,52,0,0,36.0,0.127,25,1
681 | 2,101,58,17,265,24.2,0.614,23,0
682 | 2,56,56,28,45,24.2,0.332,22,0
683 | 0,162,76,36,0,49.6,0.364,26,1
684 | 0,95,64,39,105,44.6,0.366,22,0
685 | 4,125,80,0,0,32.3,0.536,27,1
686 | 5,136,82,0,0,0.0,0.64,69,0
687 | 2,129,74,26,205,33.2,0.591,25,0
688 | 3,130,64,0,0,23.1,0.314,22,0
689 | 1,107,50,19,0,28.3,0.18100000000000002,29,0
690 | 1,140,74,26,180,24.1,0.828,23,0
691 | 1,144,82,46,180,46.1,0.335,46,1
692 | 8,107,80,0,0,24.6,0.856,34,0
693 | 13,158,114,0,0,42.3,0.257,44,1
694 | 2,121,70,32,95,39.1,0.8859999999999999,23,0
695 | 7,129,68,49,125,38.5,0.439,43,1
696 | 2,90,60,0,0,23.5,0.191,25,0
697 | 7,142,90,24,480,30.4,0.128,43,1
698 | 3,169,74,19,125,29.9,0.268,31,1
699 | 0,99,0,0,0,25.0,0.253,22,0
700 | 4,127,88,11,155,34.5,0.598,28,0
701 | 4,118,70,0,0,44.5,0.904,26,0
702 | 2,122,76,27,200,35.9,0.483,26,0
703 | 6,125,78,31,0,27.6,0.565,49,1
704 | 1,168,88,29,0,35.0,0.905,52,1
705 | 2,129,0,0,0,38.5,0.304,41,0
706 | 4,110,76,20,100,28.4,0.11800000000000001,27,0
707 | 6,80,80,36,0,39.8,0.177,28,0
708 | 10,115,0,0,0,0.0,0.261,30,1
709 | 2,127,46,21,335,34.4,0.17600000000000002,22,0
710 | 9,164,78,0,0,32.8,0.14800000000000002,45,1
711 | 2,93,64,32,160,38.0,0.674,23,1
712 | 3,158,64,13,387,31.2,0.295,24,0
713 | 5,126,78,27,22,29.6,0.439,40,0
714 | 10,129,62,36,0,41.2,0.441,38,1
715 | 0,134,58,20,291,26.4,0.35200000000000004,21,0
716 | 3,102,74,0,0,29.5,0.121,32,0
717 | 7,187,50,33,392,33.9,0.826,34,1
718 | 3,173,78,39,185,33.8,0.97,31,1
719 | 10,94,72,18,0,23.1,0.595,56,0
720 | 1,108,60,46,178,35.5,0.415,24,0
721 | 5,97,76,27,0,35.6,0.37799999999999995,52,1
722 | 4,83,86,19,0,29.3,0.317,34,0
723 | 1,114,66,36,200,38.1,0.289,21,0
724 | 1,149,68,29,127,29.3,0.349,42,1
725 | 5,117,86,30,105,39.1,0.251,42,0
726 | 1,111,94,0,0,32.8,0.265,45,0
727 | 4,112,78,40,0,39.4,0.23600000000000002,38,0
728 | 1,116,78,29,180,36.1,0.496,25,0
729 | 0,141,84,26,0,32.4,0.433,22,0
730 | 2,175,88,0,0,22.9,0.326,22,0
731 | 2,92,52,0,0,30.1,0.141,22,0
732 | 3,130,78,23,79,28.4,0.32299999999999995,34,1
733 | 8,120,86,0,0,28.4,0.259,22,1
734 | 2,174,88,37,120,44.5,0.6459999999999999,24,1
735 | 2,106,56,27,165,29.0,0.426,22,0
736 | 2,105,75,0,0,23.3,0.56,53,0
737 | 4,95,60,32,0,35.4,0.284,28,0
738 | 0,126,86,27,120,27.4,0.515,21,0
739 | 8,65,72,23,0,32.0,0.6,42,0
740 | 2,99,60,17,160,36.6,0.45299999999999996,21,0
741 | 1,102,74,0,0,39.5,0.293,42,1
742 | 11,120,80,37,150,42.3,0.785,48,1
743 | 3,102,44,20,94,30.8,0.4,26,0
744 | 1,109,58,18,116,28.5,0.21899999999999997,22,0
745 | 9,140,94,0,0,32.7,0.7340000000000001,45,1
746 | 13,153,88,37,140,40.6,1.1740000000000002,39,0
747 | 12,100,84,33,105,30.0,0.488,46,0
748 | 1,147,94,41,0,49.3,0.358,27,1
749 | 1,81,74,41,57,46.3,1.0959999999999999,32,0
750 | 3,187,70,22,200,36.4,0.408,36,1
751 | 6,162,62,0,0,24.3,0.17800000000000002,50,1
752 | 4,136,70,0,0,31.2,1.182,22,1
753 | 1,121,78,39,74,39.0,0.261,28,0
754 | 3,108,62,24,0,26.0,0.223,25,0
755 | 0,181,88,44,510,43.3,0.222,26,1
756 | 8,154,78,32,0,32.4,0.44299999999999995,45,1
757 | 1,128,88,39,110,36.5,1.057,37,1
758 | 7,137,90,41,0,32.0,0.391,39,0
759 | 0,123,72,0,0,36.3,0.258,52,1
760 | 1,106,76,0,0,37.5,0.19699999999999998,26,0
761 | 6,190,92,0,0,35.5,0.278,66,1
762 | 2,88,58,26,16,28.4,0.7659999999999999,22,0
763 | 9,170,74,31,0,44.0,0.40299999999999997,43,1
764 | 9,89,62,0,0,22.5,0.142,33,0
765 | 10,101,76,48,180,32.9,0.171,63,0
766 | 2,122,70,27,0,36.8,0.34,27,0
767 | 5,121,72,23,112,26.2,0.245,30,0
768 | 1,126,60,0,0,30.1,0.349,47,1
769 | 1,93,70,31,0,30.4,0.315,23,0
770 |
--------------------------------------------------------------------------------
/Principal Component Analysis/dimensionality reduction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "toc": true
7 | },
8 | "source": [
9 | "Table of Contents
\n",
10 | ""
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {
17 | "ExecuteTime": {
18 | "end_time": "2020-04-17T16:53:43.976333Z",
19 | "start_time": "2020-04-17T16:53:40.941030Z"
20 | }
21 | },
22 | "outputs": [],
23 | "source": [
24 | "import numpy as np\n",
25 | "import matplotlib.pyplot as plt\n",
26 | "from sklearn.datasets import load_iris\n",
27 | "import pandas as pd\n",
28 | "from sklearn.model_selection import train_test_split\n",
29 | "from sklearn.linear_model import LogisticRegression\n",
30 | "from sklearn.metrics import accuracy_score"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 2,
36 | "metadata": {
37 | "ExecuteTime": {
38 | "end_time": "2020-04-17T16:53:43.992027Z",
39 | "start_time": "2020-04-17T16:53:43.981690Z"
40 | }
41 | },
42 | "outputs": [],
43 | "source": [
44 | "import warnings\n",
45 | "warnings.filterwarnings(\"ignore\")"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "# Dimensionality reduction Using PCA"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 3,
58 | "metadata": {
59 | "ExecuteTime": {
60 | "end_time": "2020-04-17T16:53:44.209748Z",
61 | "start_time": "2020-04-17T16:53:43.997649Z"
62 | }
63 | },
64 | "outputs": [],
65 | "source": [
66 | "class PCA:\n",
67 | " \n",
68 | " def __init__(self, n_component, solver=\"svd\"):\n",
69 | " self.n_component = n_component\n",
70 | " self.solver=solver\n",
71 | " self.components = None\n",
72 | " self.mean = None\n",
73 | " \n",
74 | " \n",
75 | " def fit(self, X):\n",
76 | " self.mean = X.mean(axis=0)\n",
77 | " X = X - self.mean\n",
78 | " \n",
79 | " # expects row=feature, column=sample \n",
80 | " # cov = np.cov(X.T)\n",
81 | " cov = (X - X.mean(axis=0)).T.dot(X - X.mean(axis=0)) / (X.shape[0] - 1)\n",
82 | " \n",
83 | " \n",
84 | " if self.solver == \"eig\":\n",
85 | " # eigenvalue[i] -> eigenvector[:, i]\n",
86 | " eigenvalues, eigenvectors = np.linalg.eig(cov)\n",
87 | " \n",
88 | " eigenvectors = eigenvectors.T\n",
89 | " \n",
90 | " idxs = np.argsort(eigenvalues)[::-1]\n",
91 | " eigenvalues = eigenvalues[idxs]\n",
92 | " eigenvectors = eigenvectors[idxs]\n",
93 | " \n",
94 | " self.components = eigenvectors[0:self.n_component]\n",
95 | " \n",
96 | " \n",
97 | " else: \n",
98 | " # SVD\n",
99 | " _, S, Vt = np.linalg.svd(X)\n",
100 | " idxs = np.argsort(S)[::-1]\n",
101 | " \n",
102 | " S = S[idxs]\n",
103 | " Vt = Vt[idxs]\n",
104 | " \n",
105 | " self.components = Vt[0:self.n_component]\n",
106 | " \n",
107 | " \n",
108 | " def transform(self, X):\n",
109 | " X = X - self.mean\n",
110 | " return np.dot(X, self.components.T)\n",
111 | " \n",
112 | " def fit_transform(self, X):\n",
113 | " self.fit(X)\n",
114 | " return self.transform(X)"
115 | ]
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {},
120 | "source": [
121 | "# Feature Extraction on Diabetes dataset"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 4,
127 | "metadata": {
128 | "ExecuteTime": {
129 | "end_time": "2020-04-17T16:53:44.419215Z",
130 | "start_time": "2020-04-17T16:53:44.218687Z"
131 | }
132 | },
133 | "outputs": [
134 | {
135 | "data": {
136 | "text/html": [
137 | "\n",
138 | "\n",
151 | "
\n",
152 | " \n",
153 | " \n",
154 | " | \n",
155 | " Pregnancies | \n",
156 | " Glucose | \n",
157 | " BloodPressure | \n",
158 | " SkinThickness | \n",
159 | " Insulin | \n",
160 | " BMI | \n",
161 | " DiabetesPedigreeFunction | \n",
162 | " Age | \n",
163 | " Outcome | \n",
164 | "
\n",
165 | " \n",
166 | " \n",
167 | " \n",
168 | " | 0 | \n",
169 | " 6 | \n",
170 | " 148 | \n",
171 | " 72 | \n",
172 | " 35 | \n",
173 | " 0 | \n",
174 | " 33.6 | \n",
175 | " 0.627 | \n",
176 | " 50 | \n",
177 | " 1 | \n",
178 | "
\n",
179 | " \n",
180 | " | 1 | \n",
181 | " 1 | \n",
182 | " 85 | \n",
183 | " 66 | \n",
184 | " 29 | \n",
185 | " 0 | \n",
186 | " 26.6 | \n",
187 | " 0.351 | \n",
188 | " 31 | \n",
189 | " 0 | \n",
190 | "
\n",
191 | " \n",
192 | " | 2 | \n",
193 | " 8 | \n",
194 | " 183 | \n",
195 | " 64 | \n",
196 | " 0 | \n",
197 | " 0 | \n",
198 | " 23.3 | \n",
199 | " 0.672 | \n",
200 | " 32 | \n",
201 | " 1 | \n",
202 | "
\n",
203 | " \n",
204 | " | 3 | \n",
205 | " 1 | \n",
206 | " 89 | \n",
207 | " 66 | \n",
208 | " 23 | \n",
209 | " 94 | \n",
210 | " 28.1 | \n",
211 | " 0.167 | \n",
212 | " 21 | \n",
213 | " 0 | \n",
214 | "
\n",
215 | " \n",
216 | " | 4 | \n",
217 | " 0 | \n",
218 | " 137 | \n",
219 | " 40 | \n",
220 | " 35 | \n",
221 | " 168 | \n",
222 | " 43.1 | \n",
223 | " 2.288 | \n",
224 | " 33 | \n",
225 | " 1 | \n",
226 | "
\n",
227 | " \n",
228 | "
\n",
229 | "
"
230 | ],
231 | "text/plain": [
232 | " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
233 | "0 6 148 72 35 0 33.6 \n",
234 | "1 1 85 66 29 0 26.6 \n",
235 | "2 8 183 64 0 0 23.3 \n",
236 | "3 1 89 66 23 94 28.1 \n",
237 | "4 0 137 40 35 168 43.1 \n",
238 | "\n",
239 | " DiabetesPedigreeFunction Age Outcome \n",
240 | "0 0.627 50 1 \n",
241 | "1 0.351 31 0 \n",
242 | "2 0.672 32 1 \n",
243 | "3 0.167 21 0 \n",
244 | "4 2.288 33 1 "
245 | ]
246 | },
247 | "execution_count": 4,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "diabetes_data = pd.read_csv(r'../datasets/diabetes_data.csv')\n",
254 | "diabetes_data.head()"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 5,
260 | "metadata": {
261 | "ExecuteTime": {
262 | "end_time": "2020-04-17T16:53:44.499226Z",
263 | "start_time": "2020-04-17T16:53:44.428118Z"
264 | }
265 | },
266 | "outputs": [],
267 | "source": [
268 | "X = diabetes_data[diabetes_data.columns[:-1]].values\n",
269 | "y = diabetes_data[diabetes_data.columns[-1]].values"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {
275 | "ExecuteTime": {
276 | "end_time": "2020-04-17T16:43:53.350520Z",
277 | "start_time": "2020-04-17T16:43:53.342122Z"
278 | }
279 | },
280 | "source": [
281 | "## Accuracy Before applying PCA"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 6,
287 | "metadata": {
288 | "ExecuteTime": {
289 | "end_time": "2020-04-17T16:53:44.652486Z",
290 | "start_time": "2020-04-17T16:53:44.507580Z"
291 | }
292 | },
293 | "outputs": [],
294 | "source": [
295 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)"
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": 7,
301 | "metadata": {
302 | "ExecuteTime": {
303 | "end_time": "2020-04-17T16:53:44.948070Z",
304 | "start_time": "2020-04-17T16:53:44.666777Z"
305 | }
306 | },
307 | "outputs": [
308 | {
309 | "name": "stdout",
310 | "output_type": "stream",
311 | "text": [
312 | "Before feature extraction\n",
313 | "Number of features of X: 8\n",
314 | "Accuracy: 0.7142857142857143\n"
315 | ]
316 | }
317 | ],
318 | "source": [
319 | "# using Logistic Regression\n",
320 | "\n",
321 | "lr = LogisticRegression()\n",
322 | "lr.fit(X_train, y_train)\n",
323 | "y_pred = lr.predict(X_test)\n",
324 | "print(\"Before feature extraction\")\n",
325 | "print(f\"Number of features of X: {X_train.shape[1]}\")\n",
326 | "print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "## Accuracy After applying PCA"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 8,
339 | "metadata": {
340 | "ExecuteTime": {
341 | "end_time": "2020-04-17T16:53:45.049918Z",
342 | "start_time": "2020-04-17T16:53:44.960489Z"
343 | }
344 | },
345 | "outputs": [],
346 | "source": [
347 | "# Transforming Dataset\n",
348 | "\n",
349 | "pca = PCA(n_component=6)\n",
350 | "X_transformed = pca.fit_transform(X)\n",
351 | "X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, stratify=y, random_state=42)"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 9,
357 | "metadata": {
358 | "ExecuteTime": {
359 | "end_time": "2020-04-17T16:53:45.162775Z",
360 | "start_time": "2020-04-17T16:53:45.049918Z"
361 | }
362 | },
363 | "outputs": [
364 | {
365 | "name": "stdout",
366 | "output_type": "stream",
367 | "text": [
368 | "After feature extraction\n",
369 | "Number of features of X: 6\n",
370 | "Accuracy: 0.7337662337662337\n"
371 | ]
372 | }
373 | ],
374 | "source": [
375 | "lr = LogisticRegression()\n",
376 | "lr.fit(X_train, y_train)\n",
377 | "y_pred = lr.predict(X_test)\n",
378 | "print(\"After feature extraction\")\n",
379 | "print(f\"Number of features of X: {X_train.shape[1]}\")\n",
380 | "print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")"
381 | ]
382 | },
383 | {
384 | "cell_type": "markdown",
385 | "metadata": {
386 | "ExecuteTime": {
387 | "end_time": "2020-04-17T16:53:17.733258Z",
388 | "start_time": "2020-04-17T16:53:17.726273Z"
389 | }
390 | },
391 | "source": [
392 | "# On Mnist Dataset\n",
393 | "\n",
394 | "Reducing number of features for visualization"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 10,
400 | "metadata": {
401 | "ExecuteTime": {
402 | "end_time": "2020-04-17T16:53:45.271370Z",
403 | "start_time": "2020-04-17T16:53:45.165615Z"
404 | }
405 | },
406 | "outputs": [],
407 | "source": [
408 | "data = load_iris()\n",
409 | "X = data.data\n",
410 | "y = data.target"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 11,
416 | "metadata": {
417 | "ExecuteTime": {
418 | "end_time": "2020-04-17T16:53:45.397396Z",
419 | "start_time": "2020-04-17T16:53:45.276300Z"
420 | }
421 | },
422 | "outputs": [],
423 | "source": [
424 | "pca = PCA(n_component=2)\n",
425 | "X_projected = pca.fit_transform(X)"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": 12,
431 | "metadata": {
432 | "ExecuteTime": {
433 | "end_time": "2020-04-17T16:53:45.527547Z",
434 | "start_time": "2020-04-17T16:53:45.404998Z"
435 | }
436 | },
437 | "outputs": [
438 | {
439 | "name": "stdout",
440 | "output_type": "stream",
441 | "text": [
442 | "Shape of X: (150, 4)\n",
443 | "Shape of transformed X: (150, 2)\n"
444 | ]
445 | }
446 | ],
447 | "source": [
448 | "print(f\"Shape of X: {X.shape}\")\n",
449 | "print(f\"Shape of transformed X: {X_projected.shape}\")"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 13,
455 | "metadata": {
456 | "ExecuteTime": {
457 | "end_time": "2020-04-17T16:53:45.655688Z",
458 | "start_time": "2020-04-17T16:53:45.537553Z"
459 | }
460 | },
461 | "outputs": [],
462 | "source": [
463 | "x1 = X_projected[:, 0]\n",
464 | "x2 = X_projected[:, 1]"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 14,
470 | "metadata": {
471 | "ExecuteTime": {
472 | "end_time": "2020-04-17T16:53:46.104685Z",
473 | "start_time": "2020-04-17T16:53:45.665223Z"
474 | }
475 | },
476 | "outputs": [
477 | {
478 | "data": {
479 | "text/plain": [
480 | "(150, 3)"
481 | ]
482 | },
483 | "execution_count": 14,
484 | "metadata": {},
485 | "output_type": "execute_result"
486 | }
487 | ],
488 | "source": [
489 | "X_trans = np.c_[x1, x2, y]\n",
490 | "X_trans.shape"
491 | ]
492 | },
493 | {
494 | "cell_type": "code",
495 | "execution_count": 15,
496 | "metadata": {
497 | "ExecuteTime": {
498 | "end_time": "2020-04-17T16:53:47.087110Z",
499 | "start_time": "2020-04-17T16:53:46.115345Z"
500 | }
501 | },
502 | "outputs": [
503 | {
504 | "data": {
505 | "image/png": "\n",
506 | "text/plain": [
507 | ""
508 | ]
509 | },
510 | "metadata": {
511 | "needs_background": "light"
512 | },
513 | "output_type": "display_data"
514 | }
515 | ],
516 | "source": [
517 | "colors = [\"red\", \"blue\", \"green\"]\n",
518 | "for i in range(3):\n",
519 | " plt.scatter(X_trans[X_trans[:, 2] == i][:, 0], X_trans[X_trans[:, 2] == i][:, 1], c=colors[i], \n",
520 | " edgecolors=\"k\", alpha=0.7, label=data.target_names[i])\n",
521 | "\n",
522 | "plt.xlabel(\"Principal Component 1\")\n",
523 | "plt.ylabel(\"Principal Component 2\")\n",
524 | "plt.legend()\n",
525 | "plt.show()"
526 | ]
527 | }
528 | ],
529 | "metadata": {
530 | "kernelspec": {
531 | "display_name": "Python 3",
532 | "language": "python",
533 | "name": "python3"
534 | },
535 | "language_info": {
536 | "codemirror_mode": {
537 | "name": "ipython",
538 | "version": 3
539 | },
540 | "file_extension": ".py",
541 | "mimetype": "text/x-python",
542 | "name": "python",
543 | "nbconvert_exporter": "python",
544 | "pygments_lexer": "ipython3",
545 | "version": "3.7.6"
546 | },
547 | "toc": {
548 | "base_numbering": 1,
549 | "nav_menu": {},
550 | "number_sections": true,
551 | "sideBar": true,
552 | "skip_h1_title": false,
553 | "title_cell": "Table of Contents",
554 | "title_sidebar": "Contents",
555 | "toc_cell": true,
556 | "toc_position": {},
557 | "toc_section_display": true,
558 | "toc_window_display": false
559 | },
560 | "varInspector": {
561 | "cols": {
562 | "lenName": 16,
563 | "lenType": 16,
564 | "lenVar": 40
565 | },
566 | "kernels_config": {
567 | "python": {
568 | "delete_cmd_postfix": "",
569 | "delete_cmd_prefix": "del ",
570 | "library": "var_list.py",
571 | "varRefreshCmd": "print(var_dic_list())"
572 | },
573 | "r": {
574 | "delete_cmd_postfix": ") ",
575 | "delete_cmd_prefix": "rm(",
576 | "library": "var_list.r",
577 | "varRefreshCmd": "cat(var_dic_list()) "
578 | }
579 | },
580 | "types_to_exclude": [
581 | "module",
582 | "function",
583 | "builtin_function_or_method",
584 | "instance",
585 | "_Feature"
586 | ],
587 | "window_display": false
588 | }
589 | },
590 | "nbformat": 4,
591 | "nbformat_minor": 4
592 | }
593 |
--------------------------------------------------------------------------------
/K-Nearest Neigbors/KNN_weighted_classification.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "ExecuteTime": {
8 | "end_time": "2020-02-16T23:07:58.179814Z",
9 | "start_time": "2020-02-16T23:07:56.458785Z"
10 | }
11 | },
12 | "outputs": [],
13 | "source": [
14 | "import pandas as pd\n",
15 | "import numpy as np\n",
16 | "import matplotlib.pyplot as plt\n",
17 | "from sklearn.model_selection import StratifiedShuffleSplit\n",
18 | "import operator"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {
25 | "ExecuteTime": {
26 | "end_time": "2020-02-16T23:07:58.195684Z",
27 | "start_time": "2020-02-16T23:07:58.185150Z"
28 | }
29 | },
30 | "outputs": [],
31 | "source": [
32 | "def getDataset(loc):\n",
33 | " columns = ['sepal_length', 'sepal_width','petal_length','petal_width', 'class']\n",
34 | " data = pd.read_csv(loc, header=None, names=columns)\n",
35 | " return data"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 3,
41 | "metadata": {
42 | "ExecuteTime": {
43 | "end_time": "2020-02-16T23:07:58.447880Z",
44 | "start_time": "2020-02-16T23:07:58.201359Z"
45 | }
46 | },
47 | "outputs": [],
48 | "source": [
49 | "def splitDataset(dataset, ratio):\n",
50 | " split = StratifiedShuffleSplit(n_splits=1, test_size=ratio, random_state=42)\n",
51 | " \n",
52 | " for train_index, test_index in split.split(dataset, dataset['class']):\n",
53 | " train_data = dataset.loc[train_index]\n",
54 | " test_data = dataset.loc[test_index]\n",
55 | " \n",
56 | " \n",
57 | " return train_data, test_data"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 4,
63 | "metadata": {
64 | "ExecuteTime": {
65 | "end_time": "2020-02-16T23:07:58.630056Z",
66 | "start_time": "2020-02-16T23:07:58.450499Z"
67 | }
68 | },
69 | "outputs": [],
70 | "source": [
71 | "def euclideanDistance(instance1, instance2):\n",
72 | " instance1 = np.array(instance1)\n",
73 | " instance2 = np.array(instance2)\n",
74 | " distance = np.sum(np.power(instance1 - instance2, 2))\n",
75 | " return np.sqrt(distance)"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 5,
81 | "metadata": {
82 | "ExecuteTime": {
83 | "end_time": "2020-02-16T23:07:58.768916Z",
84 | "start_time": "2020-02-16T23:07:58.632827Z"
85 | }
86 | },
87 | "outputs": [],
88 | "source": [
89 | "# using pandas indexing methods\n",
90 | "\n",
91 | "def getKNeighbors(training_set, test_instance, k):\n",
92 | " distances = []\n",
93 | " classes = training_set['class'].unique()\n",
94 | " for row in range(len(training_set)):\n",
95 | " dist = euclideanDistance(training_set.iloc[row].values[:-1], test_instance[:-1])\n",
96 | " \n",
97 | " distances.append((training_set.iloc[row]['class'], dist))\n",
98 | "\n",
99 | " distances = sorted(distances, key=operator.itemgetter(1))[:k]\n",
100 | " inv_class_freq = {x:0 for x in classes}\n",
101 | " \n",
102 | " for cls, dist in distances:\n",
103 | " inv_class_freq[cls] += (1 / dist)\n",
104 | "\n",
105 | " return inv_class_freq"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 6,
111 | "metadata": {
112 | "ExecuteTime": {
113 | "end_time": "2020-02-16T23:07:58.941440Z",
114 | "start_time": "2020-02-16T23:07:58.771905Z"
115 | }
116 | },
117 | "outputs": [],
118 | "source": [
119 | "# faster cause turning the data into a list of lists\n",
120 | "# as the dataset is smaller in size\n",
121 | "\n",
122 | "def getKNeighbors(training_set, test_instance, k):\n",
123 | " distances = []\n",
124 | " classes = training_set['class'].unique()\n",
125 | " \n",
126 | " training_set = training_set.values\n",
127 | " \n",
128 | " for row in range(len(training_set)):\n",
129 | " dist = euclideanDistance(training_set[row][:-1], test_instance[:-1])\n",
130 | " \n",
131 | " distances.append((training_set[row][-1], dist))\n",
132 | " distances = sorted(distances, key=operator.itemgetter(1))[:k]\n",
133 | " inv_class_freq = {x:0 for x in classes}\n",
134 | " \n",
135 | " for cls, dist in distances:\n",
136 | " inv_class_freq[cls] += (1 / dist)\n",
137 | "\n",
138 | " return inv_class_freq"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 7,
144 | "metadata": {
145 | "ExecuteTime": {
146 | "end_time": "2020-02-16T23:07:59.117381Z",
147 | "start_time": "2020-02-16T23:07:58.951616Z"
148 | }
149 | },
150 | "outputs": [],
151 | "source": [
152 | "def getResponse(inv_freq):\n",
153 | " predicted_class = max(inv_freq, key=inv_freq.get)\n",
154 | " return predicted_class"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 8,
160 | "metadata": {
161 | "ExecuteTime": {
162 | "end_time": "2020-02-16T23:07:59.266774Z",
163 | "start_time": "2020-02-16T23:07:59.123520Z"
164 | }
165 | },
166 | "outputs": [],
167 | "source": [
168 | "def getAccuracy(testSet, predictions):\n",
169 | " correct = 0\n",
170 | " for x in range(len(testSet)):\n",
171 | " if testSet.iloc[x]['class'] == predictions[x]:\n",
172 | " correct += 1\n",
173 | " return (correct/len(testSet)) * 100.0"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 9,
179 | "metadata": {
180 | "ExecuteTime": {
181 | "end_time": "2020-02-16T23:08:05.002270Z",
182 | "start_time": "2020-02-16T23:07:59.271965Z"
183 | }
184 | },
185 | "outputs": [],
186 | "source": [
187 | "if __name__ == '__main__':\n",
188 | " # get data\n",
189 | " data = getDataset(r'../datasets/iris_data.csv')\n",
190 | " \n",
191 | " # split data into stratified subsets\n",
192 | " ratio = 0.2\n",
193 | " trainingSet, testSet = splitDataset(data, ratio)\n",
194 | "\n",
195 | " trainingSet = trainingSet.reset_index(drop=True)\n",
196 | " testSet = testSet.reset_index(drop=True)\n",
197 | " accuracy_scores = []\n",
198 | " # generate predictions\n",
199 | " for k in range(1, 31):\n",
200 | " predictions = []\n",
201 | " for row in range(len(testSet)):\n",
202 | " inv_freq = getKNeighbors(trainingSet, testSet.iloc[row].values, k)\n",
203 | " result = getResponse(inv_freq)\n",
204 | " predictions.append(result)\n",
205 | "\n",
206 | " accuracy = getAccuracy(testSet, predictions)\n",
207 | " accuracy_scores.append(accuracy)\n",
208 | "# print(f'k: {k}, Accuracy: {round(accuracy,3)}%')"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 10,
214 | "metadata": {
215 | "ExecuteTime": {
216 | "end_time": "2020-02-16T23:08:05.817093Z",
217 | "start_time": "2020-02-16T23:08:05.006180Z"
218 | }
219 | },
220 | "outputs": [
221 | {
222 | "data": {
223 | "text/plain": [
224 | ""
225 | ]
226 | },
227 | "execution_count": 10,
228 | "metadata": {},
229 | "output_type": "execute_result"
230 | },
231 | {
232 | "data": {
233 | "image/png": "\n",
234 | "text/plain": [
235 | ""
236 | ]
237 | },
238 | "metadata": {
239 | "needs_background": "light"
240 | },
241 | "output_type": "display_data"
242 | }
243 | ],
244 | "source": [
245 | "plt.figure(figsize=(12, 10))\n",
246 | "plt.plot(list(range(1, 31)), accuracy_scores)\n",
247 | "plt.scatter(list(range(1, 31)), accuracy_scores, label=\"Accuracy\")\n",
248 | "plt.title('K vs Accuracy')\n",
249 | "plt.xticks(ticks=range(1, 31))\n",
250 | "plt.xlabel('K')\n",
251 | "plt.ylabel('Accuracy', rotation=0)\n",
252 | "plt.legend()"
253 | ]
254 | }
255 | ],
256 | "metadata": {
257 | "kernelspec": {
258 | "display_name": "Python 3",
259 | "language": "python",
260 | "name": "python3"
261 | },
262 | "language_info": {
263 | "codemirror_mode": {
264 | "name": "ipython",
265 | "version": 3
266 | },
267 | "file_extension": ".py",
268 | "mimetype": "text/x-python",
269 | "name": "python",
270 | "nbconvert_exporter": "python",
271 | "pygments_lexer": "ipython3",
272 | "version": "3.7.6"
273 | },
274 | "toc": {
275 | "base_numbering": 1,
276 | "nav_menu": {},
277 | "number_sections": true,
278 | "sideBar": true,
279 | "skip_h1_title": false,
280 | "title_cell": "Table of Contents",
281 | "title_sidebar": "Contents",
282 | "toc_cell": false,
283 | "toc_position": {},
284 | "toc_section_display": true,
285 | "toc_window_display": false
286 | },
287 | "varInspector": {
288 | "cols": {
289 | "lenName": 16,
290 | "lenType": 16,
291 | "lenVar": 40
292 | },
293 | "kernels_config": {
294 | "python": {
295 | "delete_cmd_postfix": "",
296 | "delete_cmd_prefix": "del ",
297 | "library": "var_list.py",
298 | "varRefreshCmd": "print(var_dic_list())"
299 | },
300 | "r": {
301 | "delete_cmd_postfix": ") ",
302 | "delete_cmd_prefix": "rm(",
303 | "library": "var_list.r",
304 | "varRefreshCmd": "cat(var_dic_list()) "
305 | }
306 | },
307 | "types_to_exclude": [
308 | "module",
309 | "function",
310 | "builtin_function_or_method",
311 | "instance",
312 | "_Feature"
313 | ],
314 | "window_display": false
315 | }
316 | },
317 | "nbformat": 4,
318 | "nbformat_minor": 2
319 | }
320 |
--------------------------------------------------------------------------------