├── .gitattributes ├── .ipynb_checkpoints ├── 01-How To Build a Machine Learning Model-checkpoint.ipynb ├── 02-Scikit-Learn Part 1-checkpoint.ipynb ├── 03-Scikit-Learn Part 2-checkpoint.ipynb ├── 04-Feature Engineering-checkpoint.ipynb ├── 05-K Nearest Neighbor (KNN) Algorithm-checkpoint.ipynb ├── 06-Linear Regression-checkpoint.ipynb ├── 07-Linear Regression in Action-checkpoint.ipynb ├── 08-Ridge & Lasso Regression-checkpoint.ipynb ├── 09-Linear Models for Classification-checkpoint.ipynb ├── 10-Logistic Regression in Action-checkpoint.ipynb ├── 11-Naive Bayes Classification-checkpoint.ipynb ├── 12-Support Vector Machines-checkpoint.ipynb ├── 13-Decision Trees-checkpoint.ipynb ├── 14-Ensemble Learning-checkpoint.ipynb ├── 15-Artificial Neural Network-checkpoint.ipynb ├── 16-Unsupervised Learning & Data Scaling-checkpoint.ipynb ├── 17-Principal Component Analysis (PCA)-checkpoint.ipynb ├── 18-Manifold Learning-checkpoint.ipynb ├── 19-K Means Clustering-checkpoint.ipynb ├── 20-Agglomerative & Hierarchica & DBSCAN Clustering-checkpoint.ipynb ├── 21-Gaussion Mixture Models-checkpoint.ipynb ├── 22-Model Evaluation-checkpoint.ipynb ├── 23-Model Improvement-checkpoint.ipynb ├── 24-Evaluation Metrics -checkpoint.ipynb ├── 25-Pipelines-checkpoint.ipynb ├── 26-Text Analysis-checkpoint.ipynb ├── 27-Feature Engineering-checkpoint.ipynb ├── 28-Feature Selection-checkpoint.ipynb ├── 29-Introduction Machine Learning-checkpoint.ipynb ├── 30-Linear Regression From Scratch-checkpoint.ipynb ├── 31-Data Preprocessing - Part 1-checkpoint.ipynb ├── 32-Data Preprocessing - Part 2-checkpoint.ipynb ├── 33-Data Scaling & Regularization & Feature Engineering-checkpoint.ipynb ├── 34-Introduction to Scikit-Learn-checkpoint.ipynb ├── 35-Machine Learning with PyCaret-checkpoint.ipynb ├── 36-Principal Component Analysis-checkpoint.ipynb └── 37-Visualizing data with t-SNE-checkpoint.ipynb ├── 01-How To Build a Machine Learning Model.ipynb ├── 02-Scikit-Learn Part 1.ipynb ├── 03-Scikit-Learn Part 2.ipynb ├── 04-FEATURE ENGINEERING.ipynb ├── 05-K Nearest Neighbor (KNN) Algorithm.ipynb ├── 06-LINEAR REGRESSION.ipynb ├── 07-LINEAR REGRESSION in ACTION.ipynb ├── 08-Ridge & Lasso Regression.ipynb ├── 09-LINEAR MODELS FOR CLASSIFICATION.ipynb ├── 10-LOGISTIC REGRESSION in ACTION.ipynb ├── 11-NAIVE BAYES CLASSIFICATION.ipynb ├── 12-SUPPORT VECTOR MACHINES.ipynb ├── 13-DECISION TREES.ipynb ├── 14-ENSEMBLE LEARNING.ipynb ├── 15-ARTIFICIAL NEURAL NETWORK.ipynb ├── 16-Unsupervised Learning & Data Scaling.ipynb ├── 17-Principal Component Analysis (PCA).ipynb ├── 18-MANIFOLD LEARNING.ipynb ├── 19-K Means Clustering.ipynb ├── 20-Agglomerative & Hierarchica & DBSCAN Clustering.ipynb ├── 21-Gaussion Mixture Models.ipynb ├── 22-MODEL EVALUATION.ipynb ├── 23-MODEL IMPROVEMENT.ipynb ├── 24-Evaluation Metrics .ipynb ├── 25-PIPELINES.ipynb ├── 26-TEXT ANALYSIS.ipynb ├── 27-FEATURE ENGINEERING.ipynb ├── 28-FEATURE SELECTION.ipynb ├── 29-INTRODUCTION MACHINE LEARNING.ipynb ├── 30-LINEAR REGRESSION FROM SCRATCH.ipynb ├── 31-Data Preprocessing - Part 1.ipynb ├── 32-Data Preprocessing - Part 2.ipynb ├── 33-Data Scaling & Regularization & Feature Engineering.ipynb ├── 34-Introduction to Scikit-Learn.ipynb ├── 35-Machine Learning with PyCaret.ipynb ├── 36-Housing Prices Prediction with Random Forest.ipynb ├── Datasets ├── BLI.csv ├── cars.csv ├── data.csv ├── heart.csv ├── housing.csv ├── insurance.csv ├── iris.txt ├── list.csv ├── student-mat.csv ├── tree.dot └── wine.data ├── Images ├── ML-tutorials.png └── Sklearn-Classification-Algorithms.png ├── Machine-Learning-101.ipynb └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/13-Decision Trees-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Decision Trees" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about decision trees.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Importing the data" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.datasets import load_breast_cancer\n", 35 | "kanser=load_breast_cancer()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Preprocessing the data" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from sklearn.model_selection import train_test_split\n", 52 | "X_train,X_test,y_train,y_test=train_test_split(kanser.data,kanser.target,stratify=kanser.target)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Building the model" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", 71 | " max_features=None, max_leaf_nodes=None,\n", 72 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 73 | " min_samples_leaf=1, min_samples_split=2,\n", 74 | " min_weight_fraction_leaf=0.0, presort=False,\n", 75 | " random_state=None, splitter='best')" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "from sklearn.tree import DecisionTreeClassifier\n", 85 | "tree=DecisionTreeClassifier()\n", 86 | "tree.fit(X_train,y_train)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "## Evaluating the model" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "1.0\n", 106 | "0.9370629370629371\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "print(tree.score(X_train,y_train))\n", 112 | "print(tree.score(X_test,y_test))" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Tuning the model" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n", 131 | " max_features=None, max_leaf_nodes=None,\n", 132 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 133 | " min_samples_leaf=1, min_samples_split=2,\n", 134 | " min_weight_fraction_leaf=0.0, presort=False,\n", 135 | " random_state=None, splitter='best')" 136 | ] 137 | }, 138 | "execution_count": 5, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "tree=DecisionTreeClassifier(max_depth=4)\n", 145 | "tree.fit(X_train,y_train)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 6, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "0.9929577464788732\n", 158 | "0.9440559440559441\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "print(tree.score(X_train,y_train))\n", 164 | "print(tree.score(X_test,y_test))" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## Importing the data" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 11, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "from sklearn.datasets import load_iris\n", 181 | "iris=load_iris()" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## Preprocessing the data" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 12, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "X=iris.data[:,2:]\n", 198 | "y=iris.target" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "## Building the model" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 13, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2,\n", 217 | " max_features=None, max_leaf_nodes=None,\n", 218 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 219 | " min_samples_leaf=1, min_samples_split=2,\n", 220 | " min_weight_fraction_leaf=0.0, presort=False,\n", 221 | " random_state=None, splitter='best')" 222 | ] 223 | }, 224 | "execution_count": 13, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "tree=DecisionTreeClassifier(max_depth=2)\n", 231 | "tree.fit(X,y)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## Data visualization" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 14, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "from sklearn.tree import export_graphviz" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 15, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "export_graphviz(tree,\n", 257 | " out_file='tree.dot',\n", 258 | " feature_names=iris.feature_names[2:],\n", 259 | " class_names=True,\n", 260 | " filled=True)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 17, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "import graphvizwith\n", 270 | "open('tree.dot') as f:\n", 271 | " dot_graph=f.read()\n", 272 | "graphviz.Source(dot_graph)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 18, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/plain": [ 283 | "DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,\n", 284 | " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", 285 | " min_impurity_split=None, min_samples_leaf=1,\n", 286 | " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", 287 | " presort=False, random_state=None, splitter='best')" 288 | ] 289 | }, 290 | "execution_count": 18, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "from sklearn.tree import DecisionTreeRegressor\n", 297 | "tree_reg=DecisionTreeRegressor(max_depth=2)\n", 298 | "tree_reg.fit(X,y)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 306 | ] 307 | } 308 | ], 309 | "metadata": { 310 | "kernelspec": { 311 | "display_name": "Python 3 (ipykernel)", 312 | "language": "python", 313 | "name": "python3" 314 | }, 315 | "language_info": { 316 | "codemirror_mode": { 317 | "name": "ipython", 318 | "version": 3 319 | }, 320 | "file_extension": ".py", 321 | "mimetype": "text/x-python", 322 | "name": "python", 323 | "nbconvert_exporter": "python", 324 | "pygments_lexer": "ipython3", 325 | "version": "3.8.12" 326 | } 327 | }, 328 | "nbformat": 4, 329 | "nbformat_minor": 2 330 | } 331 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/14-Ensemble Learning-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ensemble Learning" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about ensemble learning.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Voting Classifiers" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.datasets import make_moons" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "X,y =make_moons(n_samples=100, noise=0.25, random_state=3)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "from sklearn.model_selection import train_test_split\n", 53 | "X_train, X_test, y_train, y_test=train_test_split(X,y,stratify=y)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "from sklearn.ensemble import RandomForestClassifier\n", 63 | "from sklearn.ensemble import VotingClassifier\n", 64 | "from sklearn.linear_model import LogisticRegression\n", 65 | "from sklearn.svm import SVC" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 18, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "log=LogisticRegression(solver='lbfgs').fit(X_train,y_train)\n", 75 | "rnd=RandomForestClassifier(n_estimators=10).fit(X_train,y_train)\n", 76 | "svm=SVC(gamma='auto').fit(X_train,y_train)\n", 77 | "voting=VotingClassifier(estimators=[('lr',log),('rf',rnd),('svc',svm)],\n", 78 | " voting='hard').fit(X_train,y_train)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 19, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "0.76\n", 91 | "0.84\n", 92 | "0.88\n", 93 | "0.88\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "print(log.score(X_test,y_test))\n", 99 | "print(rnd.score(X_test,y_test))\n", 100 | "print(svm.score(X_test,y_test))\n", 101 | "print(voting.score(X_test,y_test))" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## Bagging and Pasting" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 20, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "from sklearn.ensemble import BaggingClassifier\n", 118 | "from sklearn.tree import DecisionTreeClassifier" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 21, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "from sklearn.datasets import make_blobs\n", 128 | "X,y=make_blobs(n_samples=300,centers=4,random_state=0, cluster_std=1)\n", 129 | "X_train,X_test,y_train,y_test=train_test_split(X,y,stratify=y)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 22, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "tree=DecisionTreeClassifier().fit(X_train,y_train)\n", 139 | "bag=BaggingClassifier(tree,\n", 140 | " n_estimators=100,\n", 141 | " max_samples=0.8,\n", 142 | " n_jobs=-1,\n", 143 | " random_state=1).fit(X_train,y_train)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 23, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "0.9066666666666666\n", 156 | "0.9466666666666667\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "print(tree.score(X_test,y_test))\n", 162 | "print(bag.score(X_test,y_test))" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Random Forests" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 24, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "X,y =make_moons(n_samples=100, noise=0.25, random_state=3)\n", 179 | "X_train, X_test, y_train, y_test=train_test_split(X,y,stratify=y)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 25, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "forest=RandomForestClassifier(n_estimators=5).fit(X_train,y_train)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 26, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "from sklearn.datasets import load_breast_cancer\n", 198 | "kanser=load_breast_cancer()\n", 199 | "X_train,X_test,y_train,y_test=train_test_split(kanser.data,kanser.target, random_state=0)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 27, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "0.972027972027972\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "forest=RandomForestClassifier(n_estimators=100).fit(X_train,y_train)\n", 217 | "print(forest.score(X_test,y_test))" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "## Gradient Boosting" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 32, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "from sklearn.ensemble import GradientBoostingClassifier\n", 234 | "gbrt=GradientBoostingClassifier(learning_rate=0.01,random_state=0).fit(X_train,y_train)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 33, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "0.9882629107981221\n", 247 | "0.965034965034965\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "print(gbrt.score(X_train,y_train))\n", 253 | "print(gbrt.score(X_test,y_test))" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 261 | ] 262 | } 263 | ], 264 | "metadata": { 265 | "kernelspec": { 266 | "display_name": "Python 3 (ipykernel)", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.8.12" 281 | } 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 2 285 | } 286 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/15-Artificial Neural Network-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Artificial Neural Network (ANN)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about ANN.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from sklearn.neural_network import MLPClassifier\n", 28 | "from sklearn.datasets import make_moons" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "X,y=make_moons(n_samples=100, noise=0.25, random_state=3)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "from sklearn.model_selection import train_test_split" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "X_egitim,X_test,y_egitim,y_test=train_test_split(X,y,stratify=y, random_state=42)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 10, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "mlp=MLPClassifier(hidden_layer_sizes=[10],max_iter=10000,random_state=0).fit(X_egitim,y_egitim)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 11, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "from sklearn.datasets import load_breast_cancer\n", 74 | "kanser=load_breast_cancer()\n", 75 | "X_egitim,X_test,y_egitim,y_test=train_test_split(kanser.data,kanser.target,random_state=0)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 12, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n", 87 | " beta_2=0.999, early_stopping=False, epsilon=1e-08,\n", 88 | " hidden_layer_sizes=(100,), learning_rate='constant',\n", 89 | " learning_rate_init=0.001, max_iter=200, momentum=0.9,\n", 90 | " n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,\n", 91 | " random_state=42, shuffle=True, solver='adam', tol=0.0001,\n", 92 | " validation_fraction=0.1, verbose=False, warm_start=False)" 93 | ] 94 | }, 95 | "execution_count": 12, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "mlp=MLPClassifier(random_state=42)\n", 102 | "mlp.fit(X_egitim,y_egitim)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 13, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "0.9389671361502347\n", 115 | "0.916083916083916\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "print(mlp.score(X_egitim,y_egitim))\n", 121 | "print(mlp.score(X_test,y_test))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 16, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "from sklearn.preprocessing import StandardScaler\n", 131 | "scaler=StandardScaler()\n", 132 | "scaler.fit(X_egitim)\n", 133 | "scaler.fit(X_test)\n", 134 | "X_egitim_scaled=scaler.transform(X_egitim)\n", 135 | "X_test_scaled=scaler.transform(X_test)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 21, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "0.9882629107981221\n", 148 | "0.9790209790209791\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "mlp=MLPClassifier(alpha=1,max_iter=1000,random_state=42)\n", 154 | "mlp.fit(X_egitim_scaled,y_egitim)\n", 155 | "print(mlp.score(X_egitim_scaled,y_egitim))\n", 156 | "print(mlp.score(X_test_scaled,y_test))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 164 | ] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python 3 (ipykernel)", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.8.12" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/16-Unsupervised Learning & Data Scaling-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Unsupervised Learning & Data Scaling" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about unsupervised learning & data scaling.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from sklearn.datasets import load_breast_cancer" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "kanser=load_breast_cancer()\n", 37 | "from sklearn.model_selection import train_test_split\n", 38 | "X_egitim, X_test, y_egitim, y_test=train_test_split(kanser.data,kanser.target,\n", 39 | " random_state=1)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "MinMaxScaler(copy=True, feature_range=(0, 1))" 51 | ] 52 | }, 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "from sklearn.preprocessing import MinMaxScaler\n", 60 | "scaler=MinMaxScaler()\n", 61 | "scaler.fit(X_egitim)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "X_egitim_olcekli=scaler.transform(X_egitim)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 7, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "array([2.811e+01, 3.928e+01, 1.885e+02, 2.501e+03, 1.634e-01, 2.867e-01,\n", 82 | " 4.268e-01, 2.012e-01, 3.040e-01, 9.575e-02, 2.873e+00, 4.885e+00,\n", 83 | " 2.198e+01, 5.422e+02, 3.113e-02, 1.354e-01, 3.960e-01, 5.279e-02,\n", 84 | " 6.146e-02, 2.984e-02, 3.604e+01, 4.954e+01, 2.512e+02, 4.254e+03,\n", 85 | " 2.226e-01, 9.379e-01, 1.170e+00, 2.910e-01, 5.774e-01, 1.486e-01])" 86 | ] 87 | }, 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "X_egitim.max(axis=0)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", 106 | " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 107 | ] 108 | }, 109 | "execution_count": 8, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "X_egitim_olcekli.max(axis=0)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 9, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "X_test_olcekli=scaler.transform(X_test)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 10, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "array([ 0.0336031 , 0.0226581 , 0.03144219, 0.01141039, 0.14128374,\n", 136 | " 0.04406704, 0. , 0. , 0.1540404 , -0.00615249,\n", 137 | " -0.00137796, 0.00594501, 0.00430665, 0.00079567, 0.03919502,\n", 138 | " 0.0112206 , 0. , 0. , -0.03191387, 0.00664013,\n", 139 | " 0.02660975, 0.05810235, 0.02031974, 0.00943767, 0.1094235 ,\n", 140 | " 0.02637792, 0. , 0. , -0.00023764, -0.00182032])" 141 | ] 142 | }, 143 | "execution_count": 10, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "X_test_olcekli.min(axis=0)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 11, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "array([0.9578778 , 0.81501522, 0.95577362, 0.89353128, 0.81132075,\n", 161 | " 1.21958701, 0.87956888, 0.9333996 , 0.93232323, 1.0371347 ,\n", 162 | " 0.42669616, 0.49765736, 0.44117231, 0.28371044, 0.48703131,\n", 163 | " 0.73863671, 0.76717172, 0.62928585, 1.33685792, 0.39057253,\n", 164 | " 0.89612238, 0.79317697, 0.84859804, 0.74488793, 0.9154725 ,\n", 165 | " 1.13188961, 1.07008547, 0.92371134, 1.20532319, 1.63068851])" 166 | ] 167 | }, 168 | "execution_count": 11, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "X_test_olcekli.max(axis=0)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 16, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "0.6293706293706294" 186 | ] 187 | }, 188 | "execution_count": 16, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "from sklearn.svm import SVC\n", 195 | "X_egitim, X_test, y_egitim, y_test=train_test_split(kanser.data,kanser.target,\n", 196 | " random_state=0)\n", 197 | "svm=SVC(C=100,gamma='auto')\n", 198 | "svm.fit(X_egitim,y_egitim)\n", 199 | "svm.score(X_test,y_test)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 17, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "scaler=MinMaxScaler()\n", 209 | "scaler.fit(X_egitim)\n", 210 | "X_egitim_olcekli=scaler.transform(X_egitim)\n", 211 | "X_test_olcekli=scaler.transform(X_test)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 18, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "0.965034965034965" 223 | ] 224 | }, 225 | "execution_count": 18, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "svm.fit(X_egitim_olcekli,y_egitim)\n", 232 | "svm.score(X_test_olcekli,y_test)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 240 | ] 241 | } 242 | ], 243 | "metadata": { 244 | "kernelspec": { 245 | "display_name": "Python 3 (ipykernel)", 246 | "language": "python", 247 | "name": "python3" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.8.12" 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 2 264 | } 265 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/25-Pipelines-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pipelines" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about pipelines.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from sklearn.datasets import samples_generator" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "X,y=samples_generator.make_classification(n_features=20,n_informative=3,\n", 37 | " n_redundant=0, n_classes=4,\n", 38 | " n_clusters_per_class=2)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "(100, 20)\n", 51 | "(100,)\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "print(X.shape)\n", 57 | "print(y.shape)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "array([0, 1, 3, 2, 0, 3, 1, 2, 2, 0, 2, 0, 0, 1, 1, 1, 2, 1, 0, 1, 2, 2,\n", 69 | " 3, 2, 0, 3, 0, 3, 3, 2, 3, 2, 0, 2, 0, 2, 0, 0, 3, 3, 2, 1, 3, 2,\n", 70 | " 0, 3, 2, 0, 0, 1, 0, 3, 3, 3, 1, 3, 3, 2, 1, 1, 1, 0, 0, 1, 2, 2,\n", 71 | " 1, 1, 2, 2, 3, 3, 3, 3, 0, 1, 3, 0, 1, 1, 1, 3, 3, 2, 0, 2, 1, 2,\n", 72 | " 2, 3, 1, 3, 1, 1, 1, 0, 1, 0, 0, 2])" 73 | ] 74 | }, 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "y" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "from sklearn.model_selection import train_test_split\n", 91 | "X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "from sklearn.feature_selection import SelectKBest, f_regression" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "anova_filter = SelectKBest(f_regression,k=3)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 8, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "from sklearn.svm import LinearSVC\n", 119 | "clf=LinearSVC()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "from sklearn.pipeline import make_pipeline" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 10, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "anova_svm=make_pipeline(anova_filter,clf)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 11, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "Pipeline(memory=None,\n", 149 | " steps=[('selectkbest',\n", 150 | " SelectKBest(k=3,\n", 151 | " score_func=)),\n", 152 | " ('linearsvc',\n", 153 | " LinearSVC(C=1.0, class_weight=None, dual=True,\n", 154 | " fit_intercept=True, intercept_scaling=1,\n", 155 | " loss='squared_hinge', max_iter=1000,\n", 156 | " multi_class='ovr', penalty='l2', random_state=None,\n", 157 | " tol=0.0001, verbose=0))],\n", 158 | " verbose=False)" 159 | ] 160 | }, 161 | "execution_count": 11, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "anova_svm.fit(X_train,y_train)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 12, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "array([2, 3, 1, 2, 0, 3, 3, 1, 1, 2, 1, 2, 2, 1, 1, 1, 3, 0, 2, 2, 3, 2,\n", 179 | " 1, 3, 2])" 180 | ] 181 | }, 182 | "execution_count": 12, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "y_pred=anova_svm.predict(X_test)\n", 189 | "y_pred" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 13, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "0.56" 201 | ] 202 | }, 203 | "execution_count": 13, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "anova_svm.score(X_test,y_test)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 14, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "from sklearn.metrics import classification_report" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 15, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "name": "stdout", 228 | "output_type": "stream", 229 | "text": [ 230 | " precision recall f1-score support\n", 231 | "\n", 232 | " 0 1.00 0.29 0.44 7\n", 233 | " 1 0.25 1.00 0.40 2\n", 234 | " 2 0.44 0.67 0.53 6\n", 235 | " 3 1.00 0.60 0.75 10\n", 236 | "\n", 237 | " accuracy 0.56 25\n", 238 | " macro avg 0.67 0.64 0.53 25\n", 239 | "weighted avg 0.81 0.56 0.58 25\n", 240 | "\n" 241 | ] 242 | } 243 | ], 244 | "source": [ 245 | "print(classification_report(y_test,y_pred))" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "## Practice" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 16, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "from sklearn.datasets import load_breast_cancer" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 17, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "cancer=load_breast_cancer()" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 18, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "X_train,X_test,y_train,y_test=train_test_split(cancer.data,cancer.target, random_state=0)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 19, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "from sklearn.svm import SVC" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 20, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "from sklearn.preprocessing import MinMaxScaler" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 22, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "pp=make_pipeline(MinMaxScaler(),SVC(gamma='auto'))" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 23, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "text/plain": [ 317 | "Pipeline(memory=None,\n", 318 | " steps=[('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1))),\n", 319 | " ('svc',\n", 320 | " SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", 321 | " decision_function_shape='ovr', degree=3, gamma='auto',\n", 322 | " kernel='rbf', max_iter=-1, probability=False,\n", 323 | " random_state=None, shrinking=True, tol=0.001,\n", 324 | " verbose=False))],\n", 325 | " verbose=False)" 326 | ] 327 | }, 328 | "execution_count": 23, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "pp.fit(X_train,y_train)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 24, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/plain": [ 345 | "0.951048951048951" 346 | ] 347 | }, 348 | "execution_count": 24, 349 | "metadata": {}, 350 | "output_type": "execute_result" 351 | } 352 | ], 353 | "source": [ 354 | "pp.score(X_test,y_test)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 25, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "from sklearn.datasets import load_boston" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 26, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "boston=load_boston()" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 27, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "X_train,X_test,y_train,y_test=train_test_split(boston.data,boston.target, random_state=0)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 28, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "from sklearn.preprocessing import PolynomialFeatures\n", 391 | "from sklearn.preprocessing import StandardScaler\n", 392 | "from sklearn.linear_model import Ridge" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 29, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "pp=make_pipeline(StandardScaler(),PolynomialFeatures(),Ridge())" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 42, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "param_grid={'polynomialfeatures__degree':[1,2,3],\n", 411 | " 'ridge__alpha':[0.001,0.01,0.1,1,10,100]}" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 43, 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [ 420 | "from sklearn.model_selection import GridSearchCV" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 44, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "grid=GridSearchCV(pp, param_grid=param_grid,cv=5,n_jobs=-1)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 45, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "data": { 439 | "text/plain": [ 440 | "GridSearchCV(cv=5, error_score='raise-deprecating',\n", 441 | " estimator=Pipeline(memory=None,\n", 442 | " steps=[('standardscaler',\n", 443 | " StandardScaler(copy=True,\n", 444 | " with_mean=True,\n", 445 | " with_std=True)),\n", 446 | " ('polynomialfeatures',\n", 447 | " PolynomialFeatures(degree=2,\n", 448 | " include_bias=True,\n", 449 | " interaction_only=False,\n", 450 | " order='C')),\n", 451 | " ('ridge',\n", 452 | " Ridge(alpha=1.0, copy_X=True,\n", 453 | " fit_intercept=True, max_iter=None,\n", 454 | " normalize=False,\n", 455 | " random_state=None, solver='auto',\n", 456 | " tol=0.001))],\n", 457 | " verbose=False),\n", 458 | " iid='warn', n_jobs=-1,\n", 459 | " param_grid={'polynomialfeatures__degree': [1, 2, 3],\n", 460 | " 'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]},\n", 461 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n", 462 | " scoring=None, verbose=0)" 463 | ] 464 | }, 465 | "execution_count": 45, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "grid.fit(X_train,y_train)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 46, 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/plain": [ 482 | "{'polynomialfeatures__degree': 2, 'ridge__alpha': 10}" 483 | ] 484 | }, 485 | "execution_count": 46, 486 | "metadata": {}, 487 | "output_type": "execute_result" 488 | } 489 | ], 490 | "source": [ 491 | "grid.best_params_" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 47, 497 | "metadata": {}, 498 | "outputs": [ 499 | { 500 | "data": { 501 | "text/plain": [ 502 | "0.7683045464100146" 503 | ] 504 | }, 505 | "execution_count": 47, 506 | "metadata": {}, 507 | "output_type": "execute_result" 508 | } 509 | ], 510 | "source": [ 511 | "grid.score(X_test,y_test)" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": {}, 517 | "source": [ 518 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 519 | ] 520 | } 521 | ], 522 | "metadata": { 523 | "kernelspec": { 524 | "display_name": "Python 3 (ipykernel)", 525 | "language": "python", 526 | "name": "python3" 527 | }, 528 | "language_info": { 529 | "codemirror_mode": { 530 | "name": "ipython", 531 | "version": 3 532 | }, 533 | "file_extension": ".py", 534 | "mimetype": "text/x-python", 535 | "name": "python", 536 | "nbconvert_exporter": "python", 537 | "pygments_lexer": "ipython3", 538 | "version": "3.8.12" 539 | } 540 | }, 541 | "nbformat": 4, 542 | "nbformat_minor": 2 543 | } 544 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/26-Text Analysis-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Text Analysis" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about text analysis.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 9, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "categories=['rec.motorcycles','rec.sport.baseball','comp.graphics','rec.sport.hockey']" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 10, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from sklearn.datasets import load_files" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 32, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "twenty_train=load_files('Data/20newsbydate/20news-bydate-train/',\n", 46 | " categories=categories,\n", 47 | " shuffle=True,\n", 48 | " random_state=42,\n", 49 | " encoding='utf-8',\n", 50 | " decode_error='ignore')" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 33, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "sklearn.utils.Bunch" 62 | ] 63 | }, 64 | "execution_count": 33, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "type(twenty_train)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 34, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "['comp.graphics', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey']" 82 | ] 83 | }, 84 | "execution_count": 34, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "twenty_train.target_names" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 14, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "2379" 102 | ] 103 | }, 104 | "execution_count": 14, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "len(twenty_train.data)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 15, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "array([1, 1, 1, 3, 2, 2, 3, 1, 0, 0])" 122 | ] 123 | }, 124 | "execution_count": 15, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "twenty_train.target[:10]" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 16, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "from sklearn.feature_extraction.text import CountVectorizer\n", 140 | "count_vect=CountVectorizer()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 17, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "X_train_counts=count_vect.fit_transform(twenty_train.data)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 18, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "(2379, 32550)" 161 | ] 162 | }, 163 | "execution_count": 18, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "X_train_counts.shape" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## tf–idf technic" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 19, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "from sklearn.feature_extraction.text import TfidfTransformer\n", 186 | "tf_transformer=TfidfTransformer(use_idf=False).fit(X_train_counts)\n", 187 | "X_train_tf=tf_transformer.transform(X_train_counts)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 20, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "(2379, 32550)" 199 | ] 200 | }, 201 | "execution_count": 20, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "X_train_tf.shape" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "## Building the model" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 21, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "from sklearn.naive_bayes import MultinomialNB\n", 224 | "clf=MultinomialNB().fit(X_train_tf, twenty_train.target)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 22, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "docs_new=['brake-lamp is good','this computer is fast']" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 23, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "X_new_count=count_vect.transform(docs_new)\n", 243 | "X_new_tf=tf_transformer.transform(X_new_count)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## Predicting the data" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 24, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "predicted=clf.predict(X_new_tf)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 27, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "'brake-lamp is good'=>rec.motorcycles\n", 272 | "'this computer is fast'=>comp.graphics\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "for doc, category in zip(docs_new, predicted):\n", 278 | " print('%r=>%s' %(doc,twenty_train.target_names[category]))" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "## Pipeline" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 28, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "from sklearn.pipeline import Pipeline" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 29, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "text_clf=Pipeline([('vect', CountVectorizer()),\n", 304 | " ('tfidf', TfidfTransformer()),\n", 305 | " ('clf',MultinomialNB())])" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 30, 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "data": { 315 | "text/plain": [ 316 | "Pipeline(memory=None,\n", 317 | " steps=[('vect',\n", 318 | " CountVectorizer(analyzer='word', binary=False,\n", 319 | " decode_error='strict',\n", 320 | " dtype=, encoding='utf-8',\n", 321 | " input='content', lowercase=True, max_df=1.0,\n", 322 | " max_features=None, min_df=1,\n", 323 | " ngram_range=(1, 1), preprocessor=None,\n", 324 | " stop_words=None, strip_accents=None,\n", 325 | " token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n", 326 | " tokenizer=None, vocabulary=None)),\n", 327 | " ('tfidf',\n", 328 | " TfidfTransformer(norm='l2', smooth_idf=True,\n", 329 | " sublinear_tf=False, use_idf=True)),\n", 330 | " ('clf',\n", 331 | " MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))],\n", 332 | " verbose=False)" 333 | ] 334 | }, 335 | "execution_count": 30, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "text_clf.fit(twenty_train.data, twenty_train.target)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 35, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "twenty_test=load_files('Data/20newsbydate/20news-bydate-test/',\n", 351 | " categories=categories,\n", 352 | " shuffle=True,\n", 353 | " random_state=42,\n", 354 | " encoding='utf-8',\n", 355 | " decode_error='ignore')" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 36, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "docs_test=twenty_test.data" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "## Predicting the model" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 37, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "predicted=text_clf.predict(docs_test)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 39, 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "text/plain": [ 391 | "0.9576753000631711" 392 | ] 393 | }, 394 | "execution_count": 39, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "import numpy as np\n", 401 | "np.mean(predicted==twenty_test.target)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "## SVM" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 40, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "from sklearn.linear_model import SGDClassifier" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 41, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "text_clf=Pipeline([('vect', CountVectorizer()),\n", 427 | " ('tfidf', TfidfTransformer()),\n", 428 | " ('clf',SGDClassifier(loss='hinge',\n", 429 | " penalty='l2',\n", 430 | " alpha=1e-3,\n", 431 | " random_state=42,\n", 432 | " max_iter=5,\n", 433 | " tol=None))])" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 42, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "0.9696778269109286" 445 | ] 446 | }, 447 | "execution_count": 42, 448 | "metadata": {}, 449 | "output_type": "execute_result" 450 | } 451 | ], 452 | "source": [ 453 | "text_clf.fit(twenty_train.data, twenty_train.target)\n", 454 | "predicted=text_clf.predict(docs_test)\n", 455 | "np.mean(predicted==twenty_test.target)" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 43, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "data": { 465 | "text/plain": [ 466 | "array([[382, 2, 5, 0],\n", 467 | " [ 3, 393, 1, 1],\n", 468 | " [ 6, 3, 369, 19],\n", 469 | " [ 1, 1, 6, 391]], dtype=int64)" 470 | ] 471 | }, 472 | "execution_count": 43, 473 | "metadata": {}, 474 | "output_type": "execute_result" 475 | } 476 | ], 477 | "source": [ 478 | "from sklearn import metrics\n", 479 | "metrics.confusion_matrix(twenty_test.target, predicted)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 487 | ] 488 | } 489 | ], 490 | "metadata": { 491 | "kernelspec": { 492 | "display_name": "Python 3 (ipykernel)", 493 | "language": "python", 494 | "name": "python3" 495 | }, 496 | "language_info": { 497 | "codemirror_mode": { 498 | "name": "ipython", 499 | "version": 3 500 | }, 501 | "file_extension": ".py", 502 | "mimetype": "text/x-python", 503 | "name": "python", 504 | "nbconvert_exporter": "python", 505 | "pygments_lexer": "ipython3", 506 | "version": "3.8.12" 507 | } 508 | }, 509 | "nbformat": 4, 510 | "nbformat_minor": 2 511 | } 512 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/28-Feature Selection-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Feature Selection" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about feature selection.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## VarianceThreshold" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.feature_selection import VarianceThreshold" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "from sklearn.datasets import load_iris\n", 44 | "iris=load_iris()\n", 45 | "X=iris.data\n", 46 | "y=iris.target" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from sklearn.linear_model import LogisticRegression\n", 56 | "logreg = LogisticRegression()\n", 57 | "logreg=LogisticRegression(solver='lbfgs',\n", 58 | " multi_class='auto',\n", 59 | " max_iter=1000) " 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 7, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "array([0., 0., 0.])" 71 | ] 72 | }, 73 | "execution_count": 7, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "from sklearn.model_selection import KFold\n", 80 | "from sklearn.model_selection import cross_val_score\n", 81 | "kfold = KFold(n_splits=3, shuffle=True, random_state=0)\n", 82 | "cross_val_score(logreg, iris.data, iris.target, cv=kfold)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 14, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "thresholder=VarianceThreshold(threshold=.6)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 15, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "X_high_variance=thresholder.fit_transform(X)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 16, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "(150, 4)\n", 113 | "(150, 2)\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "print(X.shape)\n", 119 | "print(X_high_variance.shape)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 17, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "from sklearn.feature_selection import SelectKBest\n", 129 | "from sklearn.feature_selection import chi2" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 18, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "X_new=SelectKBest(chi2,k=2).fit_transform(X,y)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 19, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "(150, 4)\n", 151 | "(150, 2)\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "print(X.shape)\n", 157 | "print(X_new.shape)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "## Univariate feature selection" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 20, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "from sklearn.feature_selection import f_classif" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 21, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "from sklearn.feature_selection import SelectPercentile" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 22, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "X_new2=SelectPercentile(f_classif, percentile=75)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 23, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "X_new2=X_new2.fit_transform(X,y)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 24, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "(150, 4)\n", 213 | "(150, 3)\n" 214 | ] 215 | } 216 | ], 217 | "source": [ 218 | "print(X.shape)\n", 219 | "print(X_new2.shape)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 15, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "array([0.5 , 0.66666667, 1. ])" 231 | ] 232 | }, 233 | "execution_count": 15, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "from sklearn.model_selection import GroupKFold\n", 240 | "from sklearn.datasets import make_blobs\n", 241 | "X, y = make_blobs(n_samples=12, random_state=0)\n", 242 | "groups = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]\n", 243 | "scores = cross_val_score(logreg, X, y, groups, \n", 244 | " cv=GroupKFold(n_splits=3))\n", 245 | "scores" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 253 | ] 254 | } 255 | ], 256 | "metadata": { 257 | "kernelspec": { 258 | "display_name": "Python 3 (ipykernel)", 259 | "language": "python", 260 | "name": "python3" 261 | }, 262 | "language_info": { 263 | "codemirror_mode": { 264 | "name": "ipython", 265 | "version": 3 266 | }, 267 | "file_extension": ".py", 268 | "mimetype": "text/x-python", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "pygments_lexer": "ipython3", 272 | "version": "3.8.12" 273 | } 274 | }, 275 | "nbformat": 4, 276 | "nbformat_minor": 2 277 | } 278 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/34-Introduction to Scikit-Learn-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6364347b", 6 | "metadata": {}, 7 | "source": [ 8 | "# Introduction to Scikit-Learn " 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "f762c904", 14 | "metadata": {}, 15 | "source": [ 16 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 17 | "\n", 18 | "---\n", 19 | "I'm goint to talk about Scikit-Learn library.\n", 20 | "In short, the notebooks covers the following topics:\n", 21 | "- Buiding the model\n", 22 | "- Data scaling\n", 23 | "- Pipelines\n", 24 | "- Model evaluation\n", 25 | "- Automatic parameter searches\n", 26 | "---\n", 27 | "Happy Learning 😀" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "id": "30cdddb0", 33 | "metadata": {}, 34 | "source": [ 35 | "## Building the Model" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "id": "480fe85e", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "from sklearn.ensemble import RandomForestClassifier" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "id": "4cad5f14", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "clf = RandomForestClassifier(random_state=0)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "id": "32955226", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "X = [[1,2,3],\n", 66 | " [11,12,13]]\n", 67 | "y = [0,1]" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "id": "d62cc629", 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "RandomForestClassifier(random_state=0)" 80 | ] 81 | }, 82 | "execution_count": 4, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "clf.fit(X,y)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "id": "2ace347e", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "array([0, 1])" 101 | ] 102 | }, 103 | "execution_count": 5, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "clf.predict(X)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "id": "cee2fa98", 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "array([0, 1])" 122 | ] 123 | }, 124 | "execution_count": 6, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "clf.predict([[4,5,6],[14,15,16]])" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "66b5faca", 136 | "metadata": {}, 137 | "source": [ 138 | "## Data Scaling" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "id": "da27d7f2", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "from sklearn.preprocessing import StandardScaler" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "id": "65a57d41", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "X = [[0,15],\n", 159 | " [1,-10]]" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 9, 165 | "id": "cf25ea2e", 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "array([[-1., 1.],\n", 172 | " [ 1., -1.]])" 173 | ] 174 | }, 175 | "execution_count": 9, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "StandardScaler().fit(X).transform(X)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "id": "d73ec657", 187 | "metadata": {}, 188 | "source": [ 189 | "## Pipelines" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 10, 195 | "id": "dbd78b6d", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "from sklearn.pipeline import make_pipeline" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 11, 205 | "id": "bc0fdc58", 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "from sklearn.linear_model import LogisticRegression" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 12, 215 | "id": "569ef689", 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "pipe = make_pipeline(\n", 220 | " StandardScaler(),\n", 221 | " LogisticRegression())" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 13, 227 | "id": "0d65c2e6", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "from sklearn.datasets import load_iris" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 14, 237 | "id": "0bd2e277", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "X, y = load_iris(return_X_y=True)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 15, 247 | "id": "efb8fd0a", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "from sklearn.model_selection import train_test_split" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 16, 257 | "id": "7174c6b0", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 17, 267 | "id": "c29bb899", 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "Pipeline(steps=[('standardscaler', StandardScaler()),\n", 274 | " ('logisticregression', LogisticRegression())])" 275 | ] 276 | }, 277 | "execution_count": 17, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | } 281 | ], 282 | "source": [ 283 | "pipe.fit(X_train, y_train)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 18, 289 | "id": "d1f495da", 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "from sklearn.metrics import accuracy_score" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 19, 299 | "id": "73aad939", 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "0.9736842105263158" 306 | ] 307 | }, 308 | "execution_count": 19, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "accuracy_score(pipe.predict(X_test), y_test)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "id": "31cf4451", 320 | "metadata": {}, 321 | "source": [ 322 | "## Model Evaluation" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 20, 328 | "id": "596b5681", 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "from sklearn.datasets import make_regression" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 21, 338 | "id": "29dfa76e", 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "X, y = make_regression(n_samples=1000, random_state=0)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 22, 348 | "id": "195c4393", 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "from sklearn.linear_model import LinearRegression" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 23, 358 | "id": "641adfca", 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "lr = LinearRegression()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 24, 368 | "id": "8fe5ed29", 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "from sklearn.model_selection import cross_validate" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 25, 378 | "id": "c5be67b4", 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "result = cross_validate(lr, X, y)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 26, 388 | "id": "850270d8", 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "array([1., 1., 1., 1., 1.])" 395 | ] 396 | }, 397 | "execution_count": 26, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "result[\"test_score\"]" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "id": "0673e6ea", 409 | "metadata": {}, 410 | "source": [ 411 | "## Automatic Parameter Searches" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 27, 417 | "id": "38f558da", 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "from sklearn.datasets import fetch_california_housing " 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 28, 427 | "id": "4c726be6", 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "X, y = fetch_california_housing(return_X_y=True)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 29, 437 | "id": "d1711a04", 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 30, 447 | "id": "781a0520", 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "from sklearn.model_selection import RandomizedSearchCV" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 31, 457 | "id": "ae20b66b", 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "from scipy.stats import randint" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 33, 467 | "id": "8d39412a", 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "param_distributions = {\"n_estimators\" : randint(1,5),\n", 472 | " \"max_depth\" : randint(5,10)} " 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 34, 478 | "id": "225a140f", 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "from sklearn.ensemble import RandomForestRegressor\n", 483 | "search = RandomizedSearchCV(\n", 484 | " estimator = RandomForestRegressor(random_state=0),\n", 485 | " n_iter = 5, \n", 486 | " param_distributions = param_distributions,\n", 487 | " random_state=0)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 35, 493 | "id": "6cb5ab7a", 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "data": { 498 | "text/plain": [ 499 | "RandomizedSearchCV(estimator=RandomForestRegressor(random_state=0), n_iter=5,\n", 500 | " param_distributions={'max_depth': ,\n", 501 | " 'n_estimators': },\n", 502 | " random_state=0)" 503 | ] 504 | }, 505 | "execution_count": 35, 506 | "metadata": {}, 507 | "output_type": "execute_result" 508 | } 509 | ], 510 | "source": [ 511 | "search.fit(X_train, y_train)" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 36, 517 | "id": "150412c8", 518 | "metadata": {}, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/plain": [ 523 | "{'max_depth': 9, 'n_estimators': 4}" 524 | ] 525 | }, 526 | "execution_count": 36, 527 | "metadata": {}, 528 | "output_type": "execute_result" 529 | } 530 | ], 531 | "source": [ 532 | "search.best_params_" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 37, 538 | "id": "e18e5390", 539 | "metadata": {}, 540 | "outputs": [ 541 | { 542 | "data": { 543 | "text/plain": [ 544 | "0.735363411343253" 545 | ] 546 | }, 547 | "execution_count": 37, 548 | "metadata": {}, 549 | "output_type": "execute_result" 550 | } 551 | ], 552 | "source": [ 553 | "search.score(X_test, y_test)" 554 | ] 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "id": "6b493bac", 559 | "metadata": {}, 560 | "source": [ 561 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 562 | ] 563 | } 564 | ], 565 | "metadata": { 566 | "kernelspec": { 567 | "display_name": "Python 3 (ipykernel)", 568 | "language": "python", 569 | "name": "python3" 570 | }, 571 | "language_info": { 572 | "codemirror_mode": { 573 | "name": "ipython", 574 | "version": 3 575 | }, 576 | "file_extension": ".py", 577 | "mimetype": "text/x-python", 578 | "name": "python", 579 | "nbconvert_exporter": "python", 580 | "pygments_lexer": "ipython3", 581 | "version": "3.8.12" 582 | } 583 | }, 584 | "nbformat": 4, 585 | "nbformat_minor": 5 586 | } 587 | -------------------------------------------------------------------------------- /13-DECISION TREES.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Decision Trees" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about decision trees.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Importing the data" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.datasets import load_breast_cancer\n", 35 | "kanser=load_breast_cancer()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Preprocessing the data" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from sklearn.model_selection import train_test_split\n", 52 | "X_train,X_test,y_train,y_test=train_test_split(kanser.data,kanser.target,stratify=kanser.target)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Building the model" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", 71 | " max_features=None, max_leaf_nodes=None,\n", 72 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 73 | " min_samples_leaf=1, min_samples_split=2,\n", 74 | " min_weight_fraction_leaf=0.0, presort=False,\n", 75 | " random_state=None, splitter='best')" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "from sklearn.tree import DecisionTreeClassifier\n", 85 | "tree=DecisionTreeClassifier()\n", 86 | "tree.fit(X_train,y_train)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "## Evaluating the model" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "1.0\n", 106 | "0.9370629370629371\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "print(tree.score(X_train,y_train))\n", 112 | "print(tree.score(X_test,y_test))" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Tuning the model" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n", 131 | " max_features=None, max_leaf_nodes=None,\n", 132 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 133 | " min_samples_leaf=1, min_samples_split=2,\n", 134 | " min_weight_fraction_leaf=0.0, presort=False,\n", 135 | " random_state=None, splitter='best')" 136 | ] 137 | }, 138 | "execution_count": 5, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "tree=DecisionTreeClassifier(max_depth=4)\n", 145 | "tree.fit(X_train,y_train)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 6, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "0.9929577464788732\n", 158 | "0.9440559440559441\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "print(tree.score(X_train,y_train))\n", 164 | "print(tree.score(X_test,y_test))" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## Importing the data" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 11, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "from sklearn.datasets import load_iris\n", 181 | "iris=load_iris()" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## Preprocessing the data" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 12, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "X=iris.data[:,2:]\n", 198 | "y=iris.target" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "## Building the model" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 13, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2,\n", 217 | " max_features=None, max_leaf_nodes=None,\n", 218 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 219 | " min_samples_leaf=1, min_samples_split=2,\n", 220 | " min_weight_fraction_leaf=0.0, presort=False,\n", 221 | " random_state=None, splitter='best')" 222 | ] 223 | }, 224 | "execution_count": 13, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "tree=DecisionTreeClassifier(max_depth=2)\n", 231 | "tree.fit(X,y)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## Data visualization" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 14, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "from sklearn.tree import export_graphviz" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 15, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "export_graphviz(tree,\n", 257 | " out_file='tree.dot',\n", 258 | " feature_names=iris.feature_names[2:],\n", 259 | " class_names=True,\n", 260 | " filled=True)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 17, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "import graphvizwith\n", 270 | "open('tree.dot') as f:\n", 271 | " dot_graph=f.read()\n", 272 | "graphviz.Source(dot_graph)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 18, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/plain": [ 283 | "DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,\n", 284 | " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", 285 | " min_impurity_split=None, min_samples_leaf=1,\n", 286 | " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", 287 | " presort=False, random_state=None, splitter='best')" 288 | ] 289 | }, 290 | "execution_count": 18, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "from sklearn.tree import DecisionTreeRegressor\n", 297 | "tree_reg=DecisionTreeRegressor(max_depth=2)\n", 298 | "tree_reg.fit(X,y)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 306 | ] 307 | } 308 | ], 309 | "metadata": { 310 | "kernelspec": { 311 | "display_name": "Python 3 (ipykernel)", 312 | "language": "python", 313 | "name": "python3" 314 | }, 315 | "language_info": { 316 | "codemirror_mode": { 317 | "name": "ipython", 318 | "version": 3 319 | }, 320 | "file_extension": ".py", 321 | "mimetype": "text/x-python", 322 | "name": "python", 323 | "nbconvert_exporter": "python", 324 | "pygments_lexer": "ipython3", 325 | "version": "3.8.12" 326 | } 327 | }, 328 | "nbformat": 4, 329 | "nbformat_minor": 2 330 | } 331 | -------------------------------------------------------------------------------- /14-ENSEMBLE LEARNING.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ensemble Learning" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about ensemble learning.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Voting Classifiers" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.datasets import make_moons" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "X,y =make_moons(n_samples=100, noise=0.25, random_state=3)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "from sklearn.model_selection import train_test_split\n", 53 | "X_train, X_test, y_train, y_test=train_test_split(X,y,stratify=y)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "from sklearn.ensemble import RandomForestClassifier\n", 63 | "from sklearn.ensemble import VotingClassifier\n", 64 | "from sklearn.linear_model import LogisticRegression\n", 65 | "from sklearn.svm import SVC" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 18, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "log=LogisticRegression(solver='lbfgs').fit(X_train,y_train)\n", 75 | "rnd=RandomForestClassifier(n_estimators=10).fit(X_train,y_train)\n", 76 | "svm=SVC(gamma='auto').fit(X_train,y_train)\n", 77 | "voting=VotingClassifier(estimators=[('lr',log),('rf',rnd),('svc',svm)],\n", 78 | " voting='hard').fit(X_train,y_train)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 19, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "0.76\n", 91 | "0.84\n", 92 | "0.88\n", 93 | "0.88\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "print(log.score(X_test,y_test))\n", 99 | "print(rnd.score(X_test,y_test))\n", 100 | "print(svm.score(X_test,y_test))\n", 101 | "print(voting.score(X_test,y_test))" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## Bagging and Pasting" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 20, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "from sklearn.ensemble import BaggingClassifier\n", 118 | "from sklearn.tree import DecisionTreeClassifier" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 21, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "from sklearn.datasets import make_blobs\n", 128 | "X,y=make_blobs(n_samples=300,centers=4,random_state=0, cluster_std=1)\n", 129 | "X_train,X_test,y_train,y_test=train_test_split(X,y,stratify=y)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 22, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "tree=DecisionTreeClassifier().fit(X_train,y_train)\n", 139 | "bag=BaggingClassifier(tree,\n", 140 | " n_estimators=100,\n", 141 | " max_samples=0.8,\n", 142 | " n_jobs=-1,\n", 143 | " random_state=1).fit(X_train,y_train)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 23, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "0.9066666666666666\n", 156 | "0.9466666666666667\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "print(tree.score(X_test,y_test))\n", 162 | "print(bag.score(X_test,y_test))" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Random Forests" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 24, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "X,y =make_moons(n_samples=100, noise=0.25, random_state=3)\n", 179 | "X_train, X_test, y_train, y_test=train_test_split(X,y,stratify=y)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 25, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "forest=RandomForestClassifier(n_estimators=5).fit(X_train,y_train)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 26, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "from sklearn.datasets import load_breast_cancer\n", 198 | "kanser=load_breast_cancer()\n", 199 | "X_train,X_test,y_train,y_test=train_test_split(kanser.data,kanser.target, random_state=0)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 27, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "0.972027972027972\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "forest=RandomForestClassifier(n_estimators=100).fit(X_train,y_train)\n", 217 | "print(forest.score(X_test,y_test))" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "## Gradient Boosting" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 32, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "from sklearn.ensemble import GradientBoostingClassifier\n", 234 | "gbrt=GradientBoostingClassifier(learning_rate=0.01,random_state=0).fit(X_train,y_train)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 33, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "0.9882629107981221\n", 247 | "0.965034965034965\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "print(gbrt.score(X_train,y_train))\n", 253 | "print(gbrt.score(X_test,y_test))" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 261 | ] 262 | } 263 | ], 264 | "metadata": { 265 | "kernelspec": { 266 | "display_name": "Python 3 (ipykernel)", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.8.12" 281 | } 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 2 285 | } 286 | -------------------------------------------------------------------------------- /15-ARTIFICIAL NEURAL NETWORK.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Artificial Neural Network (ANN)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about ANN.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from sklearn.neural_network import MLPClassifier\n", 28 | "from sklearn.datasets import make_moons" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "X,y=make_moons(n_samples=100, noise=0.25, random_state=3)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "from sklearn.model_selection import train_test_split" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "X_egitim,X_test,y_egitim,y_test=train_test_split(X,y,stratify=y, random_state=42)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 10, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "mlp=MLPClassifier(hidden_layer_sizes=[10],max_iter=10000,random_state=0).fit(X_egitim,y_egitim)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 11, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "from sklearn.datasets import load_breast_cancer\n", 74 | "kanser=load_breast_cancer()\n", 75 | "X_egitim,X_test,y_egitim,y_test=train_test_split(kanser.data,kanser.target,random_state=0)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 12, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,\n", 87 | " beta_2=0.999, early_stopping=False, epsilon=1e-08,\n", 88 | " hidden_layer_sizes=(100,), learning_rate='constant',\n", 89 | " learning_rate_init=0.001, max_iter=200, momentum=0.9,\n", 90 | " n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,\n", 91 | " random_state=42, shuffle=True, solver='adam', tol=0.0001,\n", 92 | " validation_fraction=0.1, verbose=False, warm_start=False)" 93 | ] 94 | }, 95 | "execution_count": 12, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "mlp=MLPClassifier(random_state=42)\n", 102 | "mlp.fit(X_egitim,y_egitim)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 13, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "0.9389671361502347\n", 115 | "0.916083916083916\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "print(mlp.score(X_egitim,y_egitim))\n", 121 | "print(mlp.score(X_test,y_test))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 16, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "from sklearn.preprocessing import StandardScaler\n", 131 | "scaler=StandardScaler()\n", 132 | "scaler.fit(X_egitim)\n", 133 | "scaler.fit(X_test)\n", 134 | "X_egitim_scaled=scaler.transform(X_egitim)\n", 135 | "X_test_scaled=scaler.transform(X_test)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 21, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "0.9882629107981221\n", 148 | "0.9790209790209791\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "mlp=MLPClassifier(alpha=1,max_iter=1000,random_state=42)\n", 154 | "mlp.fit(X_egitim_scaled,y_egitim)\n", 155 | "print(mlp.score(X_egitim_scaled,y_egitim))\n", 156 | "print(mlp.score(X_test_scaled,y_test))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 164 | ] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python 3 (ipykernel)", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.8.12" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /16-Unsupervised Learning & Data Scaling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Unsupervised Learning & Data Scaling" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about unsupervised learning & data scaling.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from sklearn.datasets import load_breast_cancer" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "kanser=load_breast_cancer()\n", 37 | "from sklearn.model_selection import train_test_split\n", 38 | "X_egitim, X_test, y_egitim, y_test=train_test_split(kanser.data,kanser.target,\n", 39 | " random_state=1)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "MinMaxScaler(copy=True, feature_range=(0, 1))" 51 | ] 52 | }, 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "from sklearn.preprocessing import MinMaxScaler\n", 60 | "scaler=MinMaxScaler()\n", 61 | "scaler.fit(X_egitim)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "X_egitim_olcekli=scaler.transform(X_egitim)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 7, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "array([2.811e+01, 3.928e+01, 1.885e+02, 2.501e+03, 1.634e-01, 2.867e-01,\n", 82 | " 4.268e-01, 2.012e-01, 3.040e-01, 9.575e-02, 2.873e+00, 4.885e+00,\n", 83 | " 2.198e+01, 5.422e+02, 3.113e-02, 1.354e-01, 3.960e-01, 5.279e-02,\n", 84 | " 6.146e-02, 2.984e-02, 3.604e+01, 4.954e+01, 2.512e+02, 4.254e+03,\n", 85 | " 2.226e-01, 9.379e-01, 1.170e+00, 2.910e-01, 5.774e-01, 1.486e-01])" 86 | ] 87 | }, 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "X_egitim.max(axis=0)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", 106 | " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 107 | ] 108 | }, 109 | "execution_count": 8, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "X_egitim_olcekli.max(axis=0)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 9, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "X_test_olcekli=scaler.transform(X_test)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 10, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "array([ 0.0336031 , 0.0226581 , 0.03144219, 0.01141039, 0.14128374,\n", 136 | " 0.04406704, 0. , 0. , 0.1540404 , -0.00615249,\n", 137 | " -0.00137796, 0.00594501, 0.00430665, 0.00079567, 0.03919502,\n", 138 | " 0.0112206 , 0. , 0. , -0.03191387, 0.00664013,\n", 139 | " 0.02660975, 0.05810235, 0.02031974, 0.00943767, 0.1094235 ,\n", 140 | " 0.02637792, 0. , 0. , -0.00023764, -0.00182032])" 141 | ] 142 | }, 143 | "execution_count": 10, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "X_test_olcekli.min(axis=0)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 11, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "array([0.9578778 , 0.81501522, 0.95577362, 0.89353128, 0.81132075,\n", 161 | " 1.21958701, 0.87956888, 0.9333996 , 0.93232323, 1.0371347 ,\n", 162 | " 0.42669616, 0.49765736, 0.44117231, 0.28371044, 0.48703131,\n", 163 | " 0.73863671, 0.76717172, 0.62928585, 1.33685792, 0.39057253,\n", 164 | " 0.89612238, 0.79317697, 0.84859804, 0.74488793, 0.9154725 ,\n", 165 | " 1.13188961, 1.07008547, 0.92371134, 1.20532319, 1.63068851])" 166 | ] 167 | }, 168 | "execution_count": 11, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "X_test_olcekli.max(axis=0)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 16, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "0.6293706293706294" 186 | ] 187 | }, 188 | "execution_count": 16, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "from sklearn.svm import SVC\n", 195 | "X_egitim, X_test, y_egitim, y_test=train_test_split(kanser.data,kanser.target,\n", 196 | " random_state=0)\n", 197 | "svm=SVC(C=100,gamma='auto')\n", 198 | "svm.fit(X_egitim,y_egitim)\n", 199 | "svm.score(X_test,y_test)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 17, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "scaler=MinMaxScaler()\n", 209 | "scaler.fit(X_egitim)\n", 210 | "X_egitim_olcekli=scaler.transform(X_egitim)\n", 211 | "X_test_olcekli=scaler.transform(X_test)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 18, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "0.965034965034965" 223 | ] 224 | }, 225 | "execution_count": 18, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "svm.fit(X_egitim_olcekli,y_egitim)\n", 232 | "svm.score(X_test_olcekli,y_test)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 240 | ] 241 | } 242 | ], 243 | "metadata": { 244 | "kernelspec": { 245 | "display_name": "Python 3 (ipykernel)", 246 | "language": "python", 247 | "name": "python3" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.8.12" 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 2 264 | } 265 | -------------------------------------------------------------------------------- /25-PIPELINES.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pipelines" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about pipelines.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from sklearn.datasets import samples_generator" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "X,y=samples_generator.make_classification(n_features=20,n_informative=3,\n", 37 | " n_redundant=0, n_classes=4,\n", 38 | " n_clusters_per_class=2)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "(100, 20)\n", 51 | "(100,)\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "print(X.shape)\n", 57 | "print(y.shape)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "array([0, 1, 3, 2, 0, 3, 1, 2, 2, 0, 2, 0, 0, 1, 1, 1, 2, 1, 0, 1, 2, 2,\n", 69 | " 3, 2, 0, 3, 0, 3, 3, 2, 3, 2, 0, 2, 0, 2, 0, 0, 3, 3, 2, 1, 3, 2,\n", 70 | " 0, 3, 2, 0, 0, 1, 0, 3, 3, 3, 1, 3, 3, 2, 1, 1, 1, 0, 0, 1, 2, 2,\n", 71 | " 1, 1, 2, 2, 3, 3, 3, 3, 0, 1, 3, 0, 1, 1, 1, 3, 3, 2, 0, 2, 1, 2,\n", 72 | " 2, 3, 1, 3, 1, 1, 1, 0, 1, 0, 0, 2])" 73 | ] 74 | }, 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "y" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "from sklearn.model_selection import train_test_split\n", 91 | "X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "from sklearn.feature_selection import SelectKBest, f_regression" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "anova_filter = SelectKBest(f_regression,k=3)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 8, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "from sklearn.svm import LinearSVC\n", 119 | "clf=LinearSVC()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "from sklearn.pipeline import make_pipeline" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 10, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "anova_svm=make_pipeline(anova_filter,clf)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 11, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "Pipeline(memory=None,\n", 149 | " steps=[('selectkbest',\n", 150 | " SelectKBest(k=3,\n", 151 | " score_func=)),\n", 152 | " ('linearsvc',\n", 153 | " LinearSVC(C=1.0, class_weight=None, dual=True,\n", 154 | " fit_intercept=True, intercept_scaling=1,\n", 155 | " loss='squared_hinge', max_iter=1000,\n", 156 | " multi_class='ovr', penalty='l2', random_state=None,\n", 157 | " tol=0.0001, verbose=0))],\n", 158 | " verbose=False)" 159 | ] 160 | }, 161 | "execution_count": 11, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "anova_svm.fit(X_train,y_train)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 12, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "array([2, 3, 1, 2, 0, 3, 3, 1, 1, 2, 1, 2, 2, 1, 1, 1, 3, 0, 2, 2, 3, 2,\n", 179 | " 1, 3, 2])" 180 | ] 181 | }, 182 | "execution_count": 12, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "y_pred=anova_svm.predict(X_test)\n", 189 | "y_pred" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 13, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "0.56" 201 | ] 202 | }, 203 | "execution_count": 13, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "anova_svm.score(X_test,y_test)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 14, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "from sklearn.metrics import classification_report" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 15, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "name": "stdout", 228 | "output_type": "stream", 229 | "text": [ 230 | " precision recall f1-score support\n", 231 | "\n", 232 | " 0 1.00 0.29 0.44 7\n", 233 | " 1 0.25 1.00 0.40 2\n", 234 | " 2 0.44 0.67 0.53 6\n", 235 | " 3 1.00 0.60 0.75 10\n", 236 | "\n", 237 | " accuracy 0.56 25\n", 238 | " macro avg 0.67 0.64 0.53 25\n", 239 | "weighted avg 0.81 0.56 0.58 25\n", 240 | "\n" 241 | ] 242 | } 243 | ], 244 | "source": [ 245 | "print(classification_report(y_test,y_pred))" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "## Practice" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 16, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "from sklearn.datasets import load_breast_cancer" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 17, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "cancer=load_breast_cancer()" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 18, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "X_train,X_test,y_train,y_test=train_test_split(cancer.data,cancer.target, random_state=0)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 19, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "from sklearn.svm import SVC" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 20, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "from sklearn.preprocessing import MinMaxScaler" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 22, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "pp=make_pipeline(MinMaxScaler(),SVC(gamma='auto'))" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 23, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "text/plain": [ 317 | "Pipeline(memory=None,\n", 318 | " steps=[('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1))),\n", 319 | " ('svc',\n", 320 | " SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", 321 | " decision_function_shape='ovr', degree=3, gamma='auto',\n", 322 | " kernel='rbf', max_iter=-1, probability=False,\n", 323 | " random_state=None, shrinking=True, tol=0.001,\n", 324 | " verbose=False))],\n", 325 | " verbose=False)" 326 | ] 327 | }, 328 | "execution_count": 23, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "pp.fit(X_train,y_train)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 24, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/plain": [ 345 | "0.951048951048951" 346 | ] 347 | }, 348 | "execution_count": 24, 349 | "metadata": {}, 350 | "output_type": "execute_result" 351 | } 352 | ], 353 | "source": [ 354 | "pp.score(X_test,y_test)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 25, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "from sklearn.datasets import load_boston" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 26, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "boston=load_boston()" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 27, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "X_train,X_test,y_train,y_test=train_test_split(boston.data,boston.target, random_state=0)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 28, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "from sklearn.preprocessing import PolynomialFeatures\n", 391 | "from sklearn.preprocessing import StandardScaler\n", 392 | "from sklearn.linear_model import Ridge" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 29, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "pp=make_pipeline(StandardScaler(),PolynomialFeatures(),Ridge())" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 42, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "param_grid={'polynomialfeatures__degree':[1,2,3],\n", 411 | " 'ridge__alpha':[0.001,0.01,0.1,1,10,100]}" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 43, 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [ 420 | "from sklearn.model_selection import GridSearchCV" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 44, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "grid=GridSearchCV(pp, param_grid=param_grid,cv=5,n_jobs=-1)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 45, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "data": { 439 | "text/plain": [ 440 | "GridSearchCV(cv=5, error_score='raise-deprecating',\n", 441 | " estimator=Pipeline(memory=None,\n", 442 | " steps=[('standardscaler',\n", 443 | " StandardScaler(copy=True,\n", 444 | " with_mean=True,\n", 445 | " with_std=True)),\n", 446 | " ('polynomialfeatures',\n", 447 | " PolynomialFeatures(degree=2,\n", 448 | " include_bias=True,\n", 449 | " interaction_only=False,\n", 450 | " order='C')),\n", 451 | " ('ridge',\n", 452 | " Ridge(alpha=1.0, copy_X=True,\n", 453 | " fit_intercept=True, max_iter=None,\n", 454 | " normalize=False,\n", 455 | " random_state=None, solver='auto',\n", 456 | " tol=0.001))],\n", 457 | " verbose=False),\n", 458 | " iid='warn', n_jobs=-1,\n", 459 | " param_grid={'polynomialfeatures__degree': [1, 2, 3],\n", 460 | " 'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]},\n", 461 | " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n", 462 | " scoring=None, verbose=0)" 463 | ] 464 | }, 465 | "execution_count": 45, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "grid.fit(X_train,y_train)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 46, 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/plain": [ 482 | "{'polynomialfeatures__degree': 2, 'ridge__alpha': 10}" 483 | ] 484 | }, 485 | "execution_count": 46, 486 | "metadata": {}, 487 | "output_type": "execute_result" 488 | } 489 | ], 490 | "source": [ 491 | "grid.best_params_" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 47, 497 | "metadata": {}, 498 | "outputs": [ 499 | { 500 | "data": { 501 | "text/plain": [ 502 | "0.7683045464100146" 503 | ] 504 | }, 505 | "execution_count": 47, 506 | "metadata": {}, 507 | "output_type": "execute_result" 508 | } 509 | ], 510 | "source": [ 511 | "grid.score(X_test,y_test)" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": {}, 517 | "source": [ 518 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 519 | ] 520 | } 521 | ], 522 | "metadata": { 523 | "kernelspec": { 524 | "display_name": "Python 3 (ipykernel)", 525 | "language": "python", 526 | "name": "python3" 527 | }, 528 | "language_info": { 529 | "codemirror_mode": { 530 | "name": "ipython", 531 | "version": 3 532 | }, 533 | "file_extension": ".py", 534 | "mimetype": "text/x-python", 535 | "name": "python", 536 | "nbconvert_exporter": "python", 537 | "pygments_lexer": "ipython3", 538 | "version": "3.8.12" 539 | } 540 | }, 541 | "nbformat": 4, 542 | "nbformat_minor": 2 543 | } 544 | -------------------------------------------------------------------------------- /26-TEXT ANALYSIS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Text Analysis" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about text analysis.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 9, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "categories=['rec.motorcycles','rec.sport.baseball','comp.graphics','rec.sport.hockey']" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 10, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from sklearn.datasets import load_files" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 32, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "twenty_train=load_files('Data/20newsbydate/20news-bydate-train/',\n", 46 | " categories=categories,\n", 47 | " shuffle=True,\n", 48 | " random_state=42,\n", 49 | " encoding='utf-8',\n", 50 | " decode_error='ignore')" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 33, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "sklearn.utils.Bunch" 62 | ] 63 | }, 64 | "execution_count": 33, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "type(twenty_train)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 34, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "['comp.graphics', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey']" 82 | ] 83 | }, 84 | "execution_count": 34, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "twenty_train.target_names" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 14, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "2379" 102 | ] 103 | }, 104 | "execution_count": 14, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "len(twenty_train.data)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 15, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "array([1, 1, 1, 3, 2, 2, 3, 1, 0, 0])" 122 | ] 123 | }, 124 | "execution_count": 15, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "twenty_train.target[:10]" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 16, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "from sklearn.feature_extraction.text import CountVectorizer\n", 140 | "count_vect=CountVectorizer()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 17, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "X_train_counts=count_vect.fit_transform(twenty_train.data)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 18, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "(2379, 32550)" 161 | ] 162 | }, 163 | "execution_count": 18, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "X_train_counts.shape" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## tf–idf technic" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 19, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "from sklearn.feature_extraction.text import TfidfTransformer\n", 186 | "tf_transformer=TfidfTransformer(use_idf=False).fit(X_train_counts)\n", 187 | "X_train_tf=tf_transformer.transform(X_train_counts)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 20, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "(2379, 32550)" 199 | ] 200 | }, 201 | "execution_count": 20, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "X_train_tf.shape" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "## Building the model" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 21, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "from sklearn.naive_bayes import MultinomialNB\n", 224 | "clf=MultinomialNB().fit(X_train_tf, twenty_train.target)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 22, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "docs_new=['brake-lamp is good','this computer is fast']" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 23, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "X_new_count=count_vect.transform(docs_new)\n", 243 | "X_new_tf=tf_transformer.transform(X_new_count)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## Predicting the data" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 24, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "predicted=clf.predict(X_new_tf)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 27, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "'brake-lamp is good'=>rec.motorcycles\n", 272 | "'this computer is fast'=>comp.graphics\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "for doc, category in zip(docs_new, predicted):\n", 278 | " print('%r=>%s' %(doc,twenty_train.target_names[category]))" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "## Pipeline" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 28, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "from sklearn.pipeline import Pipeline" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 29, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "text_clf=Pipeline([('vect', CountVectorizer()),\n", 304 | " ('tfidf', TfidfTransformer()),\n", 305 | " ('clf',MultinomialNB())])" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 30, 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "data": { 315 | "text/plain": [ 316 | "Pipeline(memory=None,\n", 317 | " steps=[('vect',\n", 318 | " CountVectorizer(analyzer='word', binary=False,\n", 319 | " decode_error='strict',\n", 320 | " dtype=, encoding='utf-8',\n", 321 | " input='content', lowercase=True, max_df=1.0,\n", 322 | " max_features=None, min_df=1,\n", 323 | " ngram_range=(1, 1), preprocessor=None,\n", 324 | " stop_words=None, strip_accents=None,\n", 325 | " token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n", 326 | " tokenizer=None, vocabulary=None)),\n", 327 | " ('tfidf',\n", 328 | " TfidfTransformer(norm='l2', smooth_idf=True,\n", 329 | " sublinear_tf=False, use_idf=True)),\n", 330 | " ('clf',\n", 331 | " MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))],\n", 332 | " verbose=False)" 333 | ] 334 | }, 335 | "execution_count": 30, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "text_clf.fit(twenty_train.data, twenty_train.target)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 35, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "twenty_test=load_files('Data/20newsbydate/20news-bydate-test/',\n", 351 | " categories=categories,\n", 352 | " shuffle=True,\n", 353 | " random_state=42,\n", 354 | " encoding='utf-8',\n", 355 | " decode_error='ignore')" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 36, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "docs_test=twenty_test.data" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "## Predicting the model" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 37, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "predicted=text_clf.predict(docs_test)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 39, 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "text/plain": [ 391 | "0.9576753000631711" 392 | ] 393 | }, 394 | "execution_count": 39, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "import numpy as np\n", 401 | "np.mean(predicted==twenty_test.target)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "## SVM" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 40, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "from sklearn.linear_model import SGDClassifier" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 41, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "text_clf=Pipeline([('vect', CountVectorizer()),\n", 427 | " ('tfidf', TfidfTransformer()),\n", 428 | " ('clf',SGDClassifier(loss='hinge',\n", 429 | " penalty='l2',\n", 430 | " alpha=1e-3,\n", 431 | " random_state=42,\n", 432 | " max_iter=5,\n", 433 | " tol=None))])" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 42, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "0.9696778269109286" 445 | ] 446 | }, 447 | "execution_count": 42, 448 | "metadata": {}, 449 | "output_type": "execute_result" 450 | } 451 | ], 452 | "source": [ 453 | "text_clf.fit(twenty_train.data, twenty_train.target)\n", 454 | "predicted=text_clf.predict(docs_test)\n", 455 | "np.mean(predicted==twenty_test.target)" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 43, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "data": { 465 | "text/plain": [ 466 | "array([[382, 2, 5, 0],\n", 467 | " [ 3, 393, 1, 1],\n", 468 | " [ 6, 3, 369, 19],\n", 469 | " [ 1, 1, 6, 391]], dtype=int64)" 470 | ] 471 | }, 472 | "execution_count": 43, 473 | "metadata": {}, 474 | "output_type": "execute_result" 475 | } 476 | ], 477 | "source": [ 478 | "from sklearn import metrics\n", 479 | "metrics.confusion_matrix(twenty_test.target, predicted)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 487 | ] 488 | } 489 | ], 490 | "metadata": { 491 | "kernelspec": { 492 | "display_name": "Python 3 (ipykernel)", 493 | "language": "python", 494 | "name": "python3" 495 | }, 496 | "language_info": { 497 | "codemirror_mode": { 498 | "name": "ipython", 499 | "version": 3 500 | }, 501 | "file_extension": ".py", 502 | "mimetype": "text/x-python", 503 | "name": "python", 504 | "nbconvert_exporter": "python", 505 | "pygments_lexer": "ipython3", 506 | "version": "3.8.12" 507 | } 508 | }, 509 | "nbformat": 4, 510 | "nbformat_minor": 2 511 | } 512 | -------------------------------------------------------------------------------- /28-FEATURE SELECTION.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Feature Selection" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 15 | "
\n", 16 | "In this notebook, I'm going to talk about feature selection.\n", 17 | "
\n", 18 | "Happy learning 🐱‍🏍 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## VarianceThreshold" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from sklearn.feature_selection import VarianceThreshold" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "from sklearn.datasets import load_iris\n", 44 | "iris=load_iris()\n", 45 | "X=iris.data\n", 46 | "y=iris.target" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from sklearn.linear_model import LogisticRegression\n", 56 | "logreg = LogisticRegression()\n", 57 | "logreg=LogisticRegression(solver='lbfgs',\n", 58 | " multi_class='auto',\n", 59 | " max_iter=1000) " 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 7, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "array([0., 0., 0.])" 71 | ] 72 | }, 73 | "execution_count": 7, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "from sklearn.model_selection import KFold\n", 80 | "from sklearn.model_selection import cross_val_score\n", 81 | "kfold = KFold(n_splits=3, shuffle=True, random_state=0)\n", 82 | "cross_val_score(logreg, iris.data, iris.target, cv=kfold)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 14, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "thresholder=VarianceThreshold(threshold=.6)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 15, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "X_high_variance=thresholder.fit_transform(X)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 16, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "(150, 4)\n", 113 | "(150, 2)\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "print(X.shape)\n", 119 | "print(X_high_variance.shape)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 17, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "from sklearn.feature_selection import SelectKBest\n", 129 | "from sklearn.feature_selection import chi2" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 18, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "X_new=SelectKBest(chi2,k=2).fit_transform(X,y)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 19, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "(150, 4)\n", 151 | "(150, 2)\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "print(X.shape)\n", 157 | "print(X_new.shape)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "## Univariate feature selection" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 20, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "from sklearn.feature_selection import f_classif" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 21, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "from sklearn.feature_selection import SelectPercentile" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 22, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "X_new2=SelectPercentile(f_classif, percentile=75)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 23, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "X_new2=X_new2.fit_transform(X,y)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 24, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "(150, 4)\n", 213 | "(150, 3)\n" 214 | ] 215 | } 216 | ], 217 | "source": [ 218 | "print(X.shape)\n", 219 | "print(X_new2.shape)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 15, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "array([0.5 , 0.66666667, 1. ])" 231 | ] 232 | }, 233 | "execution_count": 15, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "from sklearn.model_selection import GroupKFold\n", 240 | "from sklearn.datasets import make_blobs\n", 241 | "X, y = make_blobs(n_samples=12, random_state=0)\n", 242 | "groups = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]\n", 243 | "scores = cross_val_score(logreg, X, y, groups, \n", 244 | " cv=GroupKFold(n_splits=3))\n", 245 | "scores" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 253 | ] 254 | } 255 | ], 256 | "metadata": { 257 | "kernelspec": { 258 | "display_name": "Python 3 (ipykernel)", 259 | "language": "python", 260 | "name": "python3" 261 | }, 262 | "language_info": { 263 | "codemirror_mode": { 264 | "name": "ipython", 265 | "version": 3 266 | }, 267 | "file_extension": ".py", 268 | "mimetype": "text/x-python", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "pygments_lexer": "ipython3", 272 | "version": "3.8.12" 273 | } 274 | }, 275 | "nbformat": 4, 276 | "nbformat_minor": 2 277 | } 278 | -------------------------------------------------------------------------------- /34-Introduction to Scikit-Learn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6364347b", 6 | "metadata": {}, 7 | "source": [ 8 | "# Introduction to Scikit-Learn " 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "f762c904", 14 | "metadata": {}, 15 | "source": [ 16 | "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n", 17 | "\n", 18 | "---\n", 19 | "I'm goint to talk about Scikit-Learn library.\n", 20 | "In short, the notebooks covers the following topics:\n", 21 | "- Buiding the model\n", 22 | "- Data scaling\n", 23 | "- Pipelines\n", 24 | "- Model evaluation\n", 25 | "- Automatic parameter searches\n", 26 | "---\n", 27 | "Happy Learning 😀" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "id": "30cdddb0", 33 | "metadata": {}, 34 | "source": [ 35 | "## Building the Model" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "id": "480fe85e", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "from sklearn.ensemble import RandomForestClassifier" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "id": "4cad5f14", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "clf = RandomForestClassifier(random_state=0)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "id": "32955226", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "X = [[1,2,3],\n", 66 | " [11,12,13]]\n", 67 | "y = [0,1]" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "id": "d62cc629", 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "RandomForestClassifier(random_state=0)" 80 | ] 81 | }, 82 | "execution_count": 4, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "clf.fit(X,y)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "id": "2ace347e", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "array([0, 1])" 101 | ] 102 | }, 103 | "execution_count": 5, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "clf.predict(X)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "id": "cee2fa98", 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "array([0, 1])" 122 | ] 123 | }, 124 | "execution_count": 6, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "clf.predict([[4,5,6],[14,15,16]])" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "66b5faca", 136 | "metadata": {}, 137 | "source": [ 138 | "## Data Scaling" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "id": "da27d7f2", 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "from sklearn.preprocessing import StandardScaler" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "id": "65a57d41", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "X = [[0,15],\n", 159 | " [1,-10]]" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 9, 165 | "id": "cf25ea2e", 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "array([[-1., 1.],\n", 172 | " [ 1., -1.]])" 173 | ] 174 | }, 175 | "execution_count": 9, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "StandardScaler().fit(X).transform(X)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "id": "d73ec657", 187 | "metadata": {}, 188 | "source": [ 189 | "## Pipelines" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 10, 195 | "id": "dbd78b6d", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "from sklearn.pipeline import make_pipeline" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 11, 205 | "id": "bc0fdc58", 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "from sklearn.linear_model import LogisticRegression" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 12, 215 | "id": "569ef689", 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "pipe = make_pipeline(\n", 220 | " StandardScaler(),\n", 221 | " LogisticRegression())" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 13, 227 | "id": "0d65c2e6", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "from sklearn.datasets import load_iris" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 14, 237 | "id": "0bd2e277", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "X, y = load_iris(return_X_y=True)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 15, 247 | "id": "efb8fd0a", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "from sklearn.model_selection import train_test_split" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 16, 257 | "id": "7174c6b0", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 17, 267 | "id": "c29bb899", 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "Pipeline(steps=[('standardscaler', StandardScaler()),\n", 274 | " ('logisticregression', LogisticRegression())])" 275 | ] 276 | }, 277 | "execution_count": 17, 278 | "metadata": {}, 279 | "output_type": "execute_result" 280 | } 281 | ], 282 | "source": [ 283 | "pipe.fit(X_train, y_train)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 18, 289 | "id": "d1f495da", 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "from sklearn.metrics import accuracy_score" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 19, 299 | "id": "73aad939", 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "0.9736842105263158" 306 | ] 307 | }, 308 | "execution_count": 19, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "accuracy_score(pipe.predict(X_test), y_test)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "id": "31cf4451", 320 | "metadata": {}, 321 | "source": [ 322 | "## Model Evaluation" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 20, 328 | "id": "596b5681", 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "from sklearn.datasets import make_regression" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 21, 338 | "id": "29dfa76e", 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "X, y = make_regression(n_samples=1000, random_state=0)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 22, 348 | "id": "195c4393", 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "from sklearn.linear_model import LinearRegression" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 23, 358 | "id": "641adfca", 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "lr = LinearRegression()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 24, 368 | "id": "8fe5ed29", 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "from sklearn.model_selection import cross_validate" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 25, 378 | "id": "c5be67b4", 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "result = cross_validate(lr, X, y)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 26, 388 | "id": "850270d8", 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "array([1., 1., 1., 1., 1.])" 395 | ] 396 | }, 397 | "execution_count": 26, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "result[\"test_score\"]" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "id": "0673e6ea", 409 | "metadata": {}, 410 | "source": [ 411 | "## Automatic Parameter Searches" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 27, 417 | "id": "38f558da", 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "from sklearn.datasets import fetch_california_housing " 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 28, 427 | "id": "4c726be6", 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "X, y = fetch_california_housing(return_X_y=True)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 29, 437 | "id": "d1711a04", 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 30, 447 | "id": "781a0520", 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "from sklearn.model_selection import RandomizedSearchCV" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 31, 457 | "id": "ae20b66b", 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "from scipy.stats import randint" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 33, 467 | "id": "8d39412a", 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "param_distributions = {\"n_estimators\" : randint(1,5),\n", 472 | " \"max_depth\" : randint(5,10)} " 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 34, 478 | "id": "225a140f", 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "from sklearn.ensemble import RandomForestRegressor\n", 483 | "search = RandomizedSearchCV(\n", 484 | " estimator = RandomForestRegressor(random_state=0),\n", 485 | " n_iter = 5, \n", 486 | " param_distributions = param_distributions,\n", 487 | " random_state=0)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 35, 493 | "id": "6cb5ab7a", 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "data": { 498 | "text/plain": [ 499 | "RandomizedSearchCV(estimator=RandomForestRegressor(random_state=0), n_iter=5,\n", 500 | " param_distributions={'max_depth': ,\n", 501 | " 'n_estimators': },\n", 502 | " random_state=0)" 503 | ] 504 | }, 505 | "execution_count": 35, 506 | "metadata": {}, 507 | "output_type": "execute_result" 508 | } 509 | ], 510 | "source": [ 511 | "search.fit(X_train, y_train)" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 36, 517 | "id": "150412c8", 518 | "metadata": {}, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/plain": [ 523 | "{'max_depth': 9, 'n_estimators': 4}" 524 | ] 525 | }, 526 | "execution_count": 36, 527 | "metadata": {}, 528 | "output_type": "execute_result" 529 | } 530 | ], 531 | "source": [ 532 | "search.best_params_" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 37, 538 | "id": "e18e5390", 539 | "metadata": {}, 540 | "outputs": [ 541 | { 542 | "data": { 543 | "text/plain": [ 544 | "0.735363411343253" 545 | ] 546 | }, 547 | "execution_count": 37, 548 | "metadata": {}, 549 | "output_type": "execute_result" 550 | } 551 | ], 552 | "source": [ 553 | "search.score(X_test, y_test)" 554 | ] 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "id": "6b493bac", 559 | "metadata": {}, 560 | "source": [ 561 | "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎" 562 | ] 563 | } 564 | ], 565 | "metadata": { 566 | "kernelspec": { 567 | "display_name": "Python 3 (ipykernel)", 568 | "language": "python", 569 | "name": "python3" 570 | }, 571 | "language_info": { 572 | "codemirror_mode": { 573 | "name": "ipython", 574 | "version": 3 575 | }, 576 | "file_extension": ".py", 577 | "mimetype": "text/x-python", 578 | "name": "python", 579 | "nbconvert_exporter": "python", 580 | "pygments_lexer": "ipython3", 581 | "version": "3.8.12" 582 | } 583 | }, 584 | "nbformat": 4, 585 | "nbformat_minor": 5 586 | } 587 | -------------------------------------------------------------------------------- /Datasets/heart.csv: -------------------------------------------------------------------------------- 1 | AGE_50,MD_50,SBP_50,DBP_50,HT_50,WT_50,CHOL_50,SES,CL_STATUS,MD_62,SBP_62,DBP_62,CHOL_62,WT_62,IHD_DX,DEATH 2 | 42,1,110,65,64,147,291,2,8,4,120,78,271,146,2,1 3 | 53,1,130,72,69,167,278,1,6,2,122,68,250,165,9,1 4 | 53,2,120,90,70,222,342,4,8,1,132,90,304,223,2,1 5 | 48,4,120,80,72,229,239,4,8,2,118,68,209,227,3,1 6 | 53,3,118,74,66,134,243,3,8,5,118,56,261,138,2,1 7 | 58,2,122,72,69,135,210,3,8,4,130,72,245,136,2,1 8 | 48,4,130,90,67,165,219,3,8,4,138,86,275,166,2,1 9 | 60,1,124,80,74,235,203,3,8,1,160,90,271,226,3,1 10 | 59,4,160,100,72,206,269,5,8,3,150,100,291,198,3,1 11 | 40,3,120,80,69,148,185,3,8,3,110,64,241,152,2,1 12 | 56,3,115,80,64,147,260,3,8,4,140,80,326,152,2,1 13 | 58,3,140,90,63,121,312,5,8,1,120,75,234,114,2,1 14 | 64,2,135,85,64,189,185,1,8,4,140,78,153,168,3,1 15 | 57,2,110,78,70,173,282,3,8,2,144,74,236,171,2,1 16 | 32,1,112,70,69,171,254,2,8,4,142,96,249,179,3,1 17 | 59,1,140,90,65,150,303,2,8,1,205,85,302,153,2,1 18 | 48,1,130,80,64,147,271,4,8,3,165,85,251,163,3,1 19 | 47,2,115,84,67,211,304,1,8,1,155,80,278,149,9,1 20 | 47,2,130,80,67,147,334,1,8,3,138,85,303,147,2,0 21 | 28,1,120,86,70,189,328,3,8,2,128,88,300,194,6,0 22 | 37,3,95,55,69,190,226,3,8,3,155,105,311,191,2,0 23 | 54,1,141,100,65,171,363,3,7,2,180,100,276,154,2,1 24 | 38,1,130,90,67,170,399,2,8,2,132,86,353,167,2,0 25 | 52,2,125,90,65,141,199,2,8,2,152,100,234,135,6,0 26 | 46,1,110,70,67,159,271,3,8,3,152,88,299,164,6,1 27 | 51,4,120,80,70,139,261,3,8,1,130,95,285,173,6,0 28 | 49,1,120,80,68,194,263,3,8,1,178,76,230,196,9,0 29 | 46,4,110,70,66,160,242,3,8,3,130,90,254,175,6,0 30 | 26,1,110,80,70,206,260,3,8,3,130,76,325,231,2,0 31 | 35,2,120,80,72,191,321,3,8,1,130,80,334,169,6,0 32 | 45,2,108,80,70,155,258,5,8,4,138,88,259,182,2,0 33 | 57,1,130,80,69,184,167,3,8,3,155,90,237,173,2,0 34 | 24,3,104,75,70,157,185,5,8,2,120,80,236,166,7,0 35 | 64,1,144,95,66,191,244,1,8,2,198,110,227,187,7,0 36 | 34,3,142,102,71,176,314,1,7,1,145,100,233,176,9,0 37 | 30,3,110,80,71,198,234,2,8,1,100,65,227,187,2,0 38 | 52,4,145,90,66,183,289,3,7,3,150,88,299,143,6,0 39 | 56,2,125,75,65,122,329,5,8,1,140,80,253,130,2,1 40 | 44,1,125,90,65,156,439,1,8,1,130,80,342,152,2,0 41 | 45,3,130,90,73,143,243,4,8,4,158,78,249,146,6,0 42 | 29,2,140,95,64,148,419,5,7,1,130,85,254,139,5,0 43 | 42,2,108,80,67,145,285,2,8,1,115,70,249,146,6,0 44 | 46,3,134,90,73,198,271,3,8,1,125,80,219,205,4,0 45 | 45,4,150,104,71,187,278,1,7,1,210,110,368,189,2,0 46 | 51,3,120,90,66,163,226,3,8,1,130,80,271,167,1,0 47 | 44,2,120,90,72,211,188,3,8,2,138,88,240,196,2,1 48 | 32,2,108,78,66,151,235,4,8,2,120,70,226,155,6,0 49 | 34,2,130,90,61,120,317,2,5,2,122,68,248,118,6,0 50 | 30,2,120,84,68,170,258,3,8,1,165,110,359,181,4,0 51 | 40,1,112,80,69,167,334,3,8,2,100,70,306,154,6,0 52 | 34,3,124,38,66,195,345,3,8,1,130,85,309,183,4,0 53 | 43,3,118,72,71,149,224,3,8,2,120,70,209,147,2,0 54 | 39,2,164,110,66,245,220,4,7,1,145,95,258,194,2,0 55 | 45,2,110,80,69,170,347,3,8,3,150,90,296,186,2,0 56 | 41,2,115,80,68,145,339,5,8,1,160,80,254,149,1,0 57 | 57,1,130,90,68,188,353,3,8,1,160,80,230,172,7,0 58 | 53,3,110,80,67,150,235,3,8,4,120,76,221,170,9,0 59 | 55,2,125,90,65,163,235,2,8,2,170,94,255,139,6,0 60 | 57,2,210,110,67,165,220,1,4,2,150,94,178,174,6,0 61 | 38,3,115,90,70,187,385,3,8,2,142,108,334,202,9,0 62 | 45,4,110,80,67,209,240,3,8,2,156,108,251,205,1,0 63 | 33,1,130,90,68,200,188,3,8,4,125,70,210,191,7,0 64 | 61,2,160,100,68,160,241,2,7,3,170,110,235,163,6,0 65 | 36,3,100,75,72,164,241,3,8,4,125,72,271,175,2,0 66 | 37,3,130,88,67,178,295,4,8,3,170,90,367,194,1,0 67 | 51,3,125,85,73,198,283,4,8,4,175,85,309,211,9,0 68 | 46,1,110,80,69,178,277,3,8,4,150,88,311,186,2,0 69 | 51,3,138,100,72,208,296,3,7,1,135,95,286,199,8,0 70 | 60,3,130,84,69,122,243,2,8,4,166,102,291,113,0,1 71 | 57,2,110,80,71,224,158,1,8,3,135,80,202,224,0,1 72 | 63,4,130,80,67,143,243,3,8,2,166,90,273,121,0,1 73 | 63,3,115,65,69,196,278,2,8,1,150,65,248,192,0,1 74 | 68,3,120,80,63,109,215,1,8,2,136,76,251,108,0,1 75 | 57,3,145,85,66,140,308,1,8,3,150,80,247,134,0,1 76 | 64,1,150,90,70,147,226,2,8,4,140,85,192,145,0,1 77 | 63,2,115,75,67,180,303,3,8,2,122,80,289,126,0,1 78 | 62,1,120,80,68,174,535,2,8,5,146,76,268,156,0,1 79 | 55,1,140,82,69,145,199,3,8,2,176,106,218,155,0,1 80 | 50,3,150,90,71,170,326,1,7,1,195,98,204,178,0,1 81 | 39,1,114,72,65,156,187,5,8,3,168,110,199,170,0,1 82 | 50,3,150,115,71,220,283,3,7,1,164,120,275,239,0,1 83 | 42,3,105,78,67,166,195,3,8,2,112,80,218,168,0,1 84 | 53,4,100,80,71,199,209,2,8,1,110,65,220,185,0,1 85 | 56,1,150,90,72,233,284,1,8,5,134,74,170,225,0,1 86 | 62,4,166,90,66,130,258,3,7,3,150,70,185,126,0,1 87 | 61,3,138,80,63,158,285,3,8,1,120,70,225,113,0,1 88 | 57,2,110,68,71,166,300,3,8,1,105,60,262,143,0,1 89 | 43,1,120,85,70,134,220,3,8,4,140,85,354,141,0,1 90 | 65,1,170,105,67,183,214,5,3,4,190,100,148,161,0,1 91 | 49,2,120,90,69,139,273,3,8,3,160,104,308,163,0,1 92 | 47,1,110,70,70,130,203,3,8,1,125,75,198,127,0,1 93 | 59,1,110,80,70,167,220,3,8,1,125,75,275,152,0,1 94 | 53,4,120,90,62,166,253,3,8,4,134,78,316,168,0,1 95 | 43,1,120,80,72,171,198,3,8,3,152,86,190,178,0,1 96 | 52,3,140,86,67,128,300,1,8,1,185,95,260,144,0,1 97 | 65,4,125,85,67,164,228,5,8,1,130,70,240,152,0,1 98 | 50,4,110,70,67,162,239,3,8,3,70,70,233,155,0,1 99 | 33,4,106,80,67,151,191,2,8,4,128,70,207,142,0,1 100 | 49,4,120,80,63,142,283,3,8,5,116,88,311,160,0,1 101 | 62,2,100,65,69,141,224,3,8,2,170,78,242,125,0,1 102 | 45,4,130,90,70,200,220,3,8,2,130,90,203,168,0,1 103 | 53,3,125,88,67,167,226,3,8,3,160,100,212,165,0,1 104 | 30,2,160,85,71,155,187,3,8,1,130,95,184,156,0,1 105 | 25,1,110,80,74,190,235,3,8,2,116,90,280,210,0,1 106 | 26,3,118,80,65,120,328,2,8,3,132,90,354,137,0,1 107 | 69,1,160,90,67,185,314,4,8,3,170,85,191,159,0,1 108 | 60,2,140,80,68,170,356,1,6,5,192,74,250,150,0,0 109 | 45,3,130,100,72,168,252,5,3,1,150,100,234,176,0,0 110 | 55,1,110,80,75,198,358,3,8,3,110,80,264,177,0,0 111 | 63,3,190,100,66,187,207,5,3,2,148,70,210,188,0,0 112 | 52,1,170,100,65,164,218,5,7,1,110,70,261,130,0,0 113 | 53,1,200,140,67,197,210,3,3,1,215,100,139,164,0,1 114 | 55,3,118,82,69,124,265,3,8,5,132,90,284,124,0,1 115 | 48,3,120,85,68,161,267,2,8,1,110,75,258,162,0,0 116 | 50,3,105,70,65,161,325,4,8,3,125,75,186,114,0,0 117 | 44,1,130,80,69,202,246,3,8,1,130,60,176,165,0,1 118 | 49,4,120,80,69,189,295,3,8,1,140,75,305,203,0,0 119 | 61,1,150,90,69,142,247,3,6,2,150,90,218,174,0,1 120 | 42,4,120,85,67,192,250,5,8,1,162,98,268,207,0,0 121 | 47,1,110,80,71,228,250,5,8,2,128,88,249,207,0,0 122 | 40,2,100,70,68,169,260,2,6,1,112,70,269,151,0,0 123 | 33,3,125,88,67,149,220,3,8,1,135,80,225,166,0,0 124 | 29,3,130,90,70,173,280,3,8,1,145,90,308,180,0,0 125 | 43,3,120,80,71,164,260,3,8,1,125,85,271,176,0,0 126 | 51,4,130,90,69,193,290,2,8,3,120,80,254,178,0,0 127 | 57,3,160,90,63,144,280,4,7,3,170,80,321,152,0,0 128 | 30,3,115,86,70,172,210,2,8,1,108,80,201,187,0,0 129 | 44,2,120,90,67,178,260,2,8,3,130,96,242,173,0,0 130 | 44,2,120,90,72,196,240,3,8,4,150,84,240,193,0,0 131 | 38,2,106,80,67,181,210,5,8,3,110,80,208,179,0,0 132 | 35,1,124,90,68,189,320,3,8,4,138,88,338,201,0,0 133 | 34,3,126,85,64,165,310,2,8,1,130,90,296,167,0,0 134 | 30,1,104,70,69,161,300,3,8,1,135,85,290,178,0,0 135 | 51,3,140,92,69,170,310,1,8,3,172,100,282,163,0,0 136 | 51,4,120,80,70,200,260,2,8,2,144,80,283,169,0,0 137 | 57,3,108,76,66,161,200,2,8,3,106,75,150,161,0,0 138 | 46,2,115,90,69,189,300,3,8,2,110,60,169,150,0,0 139 | 50,2,132,88,69,220,220,3,8,3,182,100,204,246,0,0 140 | 25,4,120,84,72,180,220,3,8,3,130,75,222,179,0,0 141 | 55,2,120,92,65,154,310,2,8,2,126,72,265,138,0,0 142 | 47,2,130,75,67,145,260,3,8,4,138,76,289,163,0,0 143 | 49,1,130,80,64,162,230,5,8,1,195,105,220,194,0,0 144 | 47,3,130,95,64,163,280,4,8,1,130,70,190,152,0,0 145 | 34,3,105,65,65,137,220,5,8,2,118,82,306,152,0,0 146 | 47,2,120,80,68,152,220,3,8,1,145,76,270,160,0,0 147 | 46,1,120,80,68,152,270,2,8,3,150,100,326,166,0,0 148 | 45,2,120,82,71,171,240,1,8,4,140,84,238,169,0,0 149 | 37,1,150,105,69,205,220,3,7,3,180,110,214,187,0,0 150 | 36,3,120,90,67,188,220,5,8,4,125,82,189,182,0,0 151 | 42,1,110,70,71,162,190,3,8,3,155,90,174,187,0,0 152 | 57,3,130,90,71,181,260,2,8,3,190,104,288,185,0,0 153 | 43,2,120,80,69,201,300,1,8,4,138,82,204,198,0,0 154 | 58,3,110,78,69,175,300,3,8,4,148,80,229,198,0,0 155 | 34,1,110,70,72,157,220,3,8,1,135,80,243,158,0,0 156 | 29,3,148,98,69,203,260,5,8,2,150,110,245,194,0,0 157 | 43,2,110,75,67,143,190,3,8,3,124,90,204,139,0,0 158 | 48,3,122,94,73,198,250,1,8,3,170,120,276,211,0,0 159 | 41,3,120,80,64,147,220,4,8,1,170,110,333,160,0,0 160 | 54,3,120,78,69,137,210,3,8,4,160,80,329,143,0,0 161 | 47,1,110,80,66,187,210,3,8,1,120,85,279,188,0,0 162 | 35,1,110,80,70,154,250,3,8,1,110,75,256,140,0,0 163 | 48,2,142,90,67,163,220,4,8,1,180,80,223,167,0,0 164 | 49,1,112,78,64,149,240,3,8,4,125,72,253,153,0,0 165 | 25,3,115,78,73,180,160,3,8,1,120,80,216,200,0,0 166 | 48,4,150,110,66,189,220,1,3,3,180,105,224,178,0,0 167 | 42,1,120,90,75,207,210,3,8,3,140,94,219,224,0,0 168 | 34,3,152,102,68,185,300,3,7,3,195,125,280,180,0,0 169 | 39,3,130,92,67,144,210,3,8,2,128,88,204,151,0,0 170 | 46,3,108,75,71,140,260,3,8,3,120,75,228,148,0,0 171 | 30,2,106,80,70,173,260,4,8,5,118,80,254,186,0,0 172 | 64,4,104,74,63,146,300,3,8,1,130,80,249,143,0,0 173 | 47,3,104,80,73,177,390,3,8,1,110,70,339,189,0,0 174 | 36,3,135,80,69,155,290,3,8,2,120,84,238,157,0,0 175 | 30,4,120,80,70,130,260,3,8,4,135,80,260,157,0,0 176 | 48,3,130,92,70,198,230,3,8,3,140,100,235,204,0,0 177 | 34,1,110,80,66,155,250,3,8,1,180,100,278,121,0,0 178 | 26,3,110,75,66,136,230,3,8,3,118,76,183,134,0,0 179 | 22,4,120,90,69,192,240,5,8,1,150,105,269,201,0,0 180 | 26,2,88,60,70,178,300,2,8,2,112,76,308,206,0,0 181 | 54,2,100,64,71,173,300,4,8,1,125,75,259,202,0,0 182 | 53,3,138,90,66,215,310,4,8,2,148,88,336,202,0,0 183 | 48,2,120,75,70,203,220,3,8,1,130,70,273,213,0,0 184 | 42,3,115,75,71,204,220,2,8,3,140,80,225,224,0,0 185 | 42,3,122,78,66,128,190,2,8,2,112,66,196,127,0,0 186 | 60,4,128,80,70,141,230,3,8,3,140,80,224,122,0,0 187 | 35,2,100,78,68,141,220,3,8,2,144,90,239,138,0,0 188 | 52,2,120,70,68,135,250,3,8,1,135,80,300,137,0,0 189 | 44,2,150,95,61,127,260,3,3,1,155,95,312,130,0,0 190 | 41,2,142,96,66,145,300,2,8,4,148,88,315,143,0,0 191 | 28,2,115,65,66,150,220,2,8,3,124,86,276,149,0,0 192 | 36,3,118,84,63,153,300,2,8,2,124,80,252,149,0,0 193 | 54,3,148,90,71,140,220,1,8,1,230,105,265,143,0,0 194 | 49,2,110,75,70,141,150,3,8,4,170,98,221,147,0,0 195 | 49,4,105,75,69,144,250,3,8,1,110,70,254,144,0,0 196 | 61,3,122,78,64,133,180,3,8,2,170,72,229,142,0,0 197 | 50,1,115,80,66,148,300,2,8,1,115,65,273,152,0,0 198 | 23,1,110,70,69,137,120,3,8,2,112,76,198,153,0,0 199 | 20,3,130,80,66,150,210,5,0,1,130,85,274,158,0,0 200 | 46,3,140,84,66,138,130,4,6,2,148,88,160,157,0,0 201 | 36,1,100,70,70,157,260,3,8,3,120,86,251,152,0,0 202 | -------------------------------------------------------------------------------- /Datasets/iris.txt: -------------------------------------------------------------------------------- 1 | "sepal_length" "sepal_width" "petal_length" "petal_width" "species" 2 | 5.1 3.5 1.4 0.2 "setosa" 3 | 4.9 3 1.4 0.2 "setosa" 4 | 4.7 3.2 1.3 0.2 "setosa" 5 | 4.6 3.1 1.5 0.2 "setosa" 6 | 5 3.6 1.4 0.2 "setosa" 7 | 5.4 3.9 1.7 0.4 "setosa" 8 | 4.6 3.4 1.4 0.3 "setosa" 9 | 5 3.4 1.5 0.2 "setosa" 10 | 4.4 2.9 1.4 0.2 "setosa" 11 | 4.9 3.1 1.5 0.1 "setosa" 12 | 5.4 3.7 1.5 0.2 "setosa" 13 | 4.8 3.4 1.6 0.2 "setosa" 14 | 4.8 3 1.4 0.1 "setosa" 15 | 4.3 3 1.1 0.1 "setosa" 16 | 5.8 4 1.2 0.2 "setosa" 17 | 5.7 4.4 1.5 0.4 "setosa" 18 | 5.4 3.9 1.3 0.4 "setosa" 19 | 5.1 3.5 1.4 0.3 "setosa" 20 | 5.7 3.8 1.7 0.3 "setosa" 21 | 5.1 3.8 1.5 0.3 "setosa" 22 | 5.4 3.4 1.7 0.2 "setosa" 23 | 5.1 3.7 1.5 0.4 "setosa" 24 | 4.6 3.6 1 0.2 "setosa" 25 | 5.1 3.3 1.7 0.5 "setosa" 26 | 4.8 3.4 1.9 0.2 "setosa" 27 | 5 3 1.6 0.2 "setosa" 28 | 5 3.4 1.6 0.4 "setosa" 29 | 5.2 3.5 1.5 0.2 "setosa" 30 | 5.2 3.4 1.4 0.2 "setosa" 31 | 4.7 3.2 1.6 0.2 "setosa" 32 | 4.8 3.1 1.6 0.2 "setosa" 33 | 5.4 3.4 1.5 0.4 "setosa" 34 | 5.2 4.1 1.5 0.1 "setosa" 35 | 5.5 4.2 1.4 0.2 "setosa" 36 | 4.9 3.1 1.5 0.2 "setosa" 37 | 5 3.2 1.2 0.2 "setosa" 38 | 5.5 3.5 1.3 0.2 "setosa" 39 | 4.9 3.6 1.4 0.1 "setosa" 40 | 4.4 3 1.3 0.2 "setosa" 41 | 5.1 3.4 1.5 0.2 "setosa" 42 | 5 3.5 1.3 0.3 "setosa" 43 | 4.5 2.3 1.3 0.3 "setosa" 44 | 4.4 3.2 1.3 0.2 "setosa" 45 | 5 3.5 1.6 0.6 "setosa" 46 | 5.1 3.8 1.9 0.4 "setosa" 47 | 4.8 3 1.4 0.3 "setosa" 48 | 5.1 3.8 1.6 0.2 "setosa" 49 | 4.6 3.2 1.4 0.2 "setosa" 50 | 5.3 3.7 1.5 0.2 "setosa" 51 | 5 3.3 1.4 0.2 "setosa" 52 | 7 3.2 4.7 1.4 "versicolor" 53 | 6.4 3.2 4.5 1.5 "versicolor" 54 | 6.9 3.1 4.9 1.5 "versicolor" 55 | 5.5 2.3 4 1.3 "versicolor" 56 | 6.5 2.8 4.6 1.5 "versicolor" 57 | 5.7 2.8 4.5 1.3 "versicolor" 58 | 6.3 3.3 4.7 1.6 "versicolor" 59 | 4.9 2.4 3.3 1 "versicolor" 60 | 6.6 2.9 4.6 1.3 "versicolor" 61 | 5.2 2.7 3.9 1.4 "versicolor" 62 | 5 2 3.5 1 "versicolor" 63 | 5.9 3 4.2 1.5 "versicolor" 64 | 6 2.2 4 1 "versicolor" 65 | 6.1 2.9 4.7 1.4 "versicolor" 66 | 5.6 2.9 3.6 1.3 "versicolor" 67 | 6.7 3.1 4.4 1.4 "versicolor" 68 | 5.6 3 4.5 1.5 "versicolor" 69 | 5.8 2.7 4.1 1 "versicolor" 70 | 6.2 2.2 4.5 1.5 "versicolor" 71 | 5.6 2.5 3.9 1.1 "versicolor" 72 | 5.9 3.2 4.8 1.8 "versicolor" 73 | 6.1 2.8 4 1.3 "versicolor" 74 | 6.3 2.5 4.9 1.5 "versicolor" 75 | 6.1 2.8 4.7 1.2 "versicolor" 76 | 6.4 2.9 4.3 1.3 "versicolor" 77 | 6.6 3 4.4 1.4 "versicolor" 78 | 6.8 2.8 4.8 1.4 "versicolor" 79 | 6.7 3 5 1.7 "versicolor" 80 | 6 2.9 4.5 1.5 "versicolor" 81 | 5.7 2.6 3.5 1 "versicolor" 82 | 5.5 2.4 3.8 1.1 "versicolor" 83 | 5.5 2.4 3.7 1 "versicolor" 84 | 5.8 2.7 3.9 1.2 "versicolor" 85 | 6 2.7 5.1 1.6 "versicolor" 86 | 5.4 3 4.5 1.5 "versicolor" 87 | 6 3.4 4.5 1.6 "versicolor" 88 | 6.7 3.1 4.7 1.5 "versicolor" 89 | 6.3 2.3 4.4 1.3 "versicolor" 90 | 5.6 3 4.1 1.3 "versicolor" 91 | 5.5 2.5 4 1.3 "versicolor" 92 | 5.5 2.6 4.4 1.2 "versicolor" 93 | 6.1 3 4.6 1.4 "versicolor" 94 | 5.8 2.6 4 1.2 "versicolor" 95 | 5 2.3 3.3 1 "versicolor" 96 | 5.6 2.7 4.2 1.3 "versicolor" 97 | 5.7 3 4.2 1.2 "versicolor" 98 | 5.7 2.9 4.2 1.3 "versicolor" 99 | 6.2 2.9 4.3 1.3 "versicolor" 100 | 5.1 2.5 3 1.1 "versicolor" 101 | 5.7 2.8 4.1 1.3 "versicolor" 102 | 6.3 3.3 6 2.5 "virginica" 103 | 5.8 2.7 5.1 1.9 "virginica" 104 | 7.1 3 5.9 2.1 "virginica" 105 | 6.3 2.9 5.6 1.8 "virginica" 106 | 6.5 3 5.8 2.2 "virginica" 107 | 7.6 3 6.6 2.1 "virginica" 108 | 4.9 2.5 4.5 1.7 "virginica" 109 | 7.3 2.9 6.3 1.8 "virginica" 110 | 6.7 2.5 5.8 1.8 "virginica" 111 | 7.2 3.6 6.1 2.5 "virginica" 112 | 6.5 3.2 5.1 2 "virginica" 113 | 6.4 2.7 5.3 1.9 "virginica" 114 | 6.8 3 5.5 2.1 "virginica" 115 | 5.7 2.5 5 2 "virginica" 116 | 5.8 2.8 5.1 2.4 "virginica" 117 | 6.4 3.2 5.3 2.3 "virginica" 118 | 6.5 3 5.5 1.8 "virginica" 119 | 7.7 3.8 6.7 2.2 "virginica" 120 | 7.7 2.6 6.9 2.3 "virginica" 121 | 6 2.2 5 1.5 "virginica" 122 | 6.9 3.2 5.7 2.3 "virginica" 123 | 5.6 2.8 4.9 2 "virginica" 124 | 7.7 2.8 6.7 2 "virginica" 125 | 6.3 2.7 4.9 1.8 "virginica" 126 | 6.7 3.3 5.7 2.1 "virginica" 127 | 7.2 3.2 6 1.8 "virginica" 128 | 6.2 2.8 4.8 1.8 "virginica" 129 | 6.1 3 4.9 1.8 "virginica" 130 | 6.4 2.8 5.6 2.1 "virginica" 131 | 7.2 3 5.8 1.6 "virginica" 132 | 7.4 2.8 6.1 1.9 "virginica" 133 | 7.9 3.8 6.4 2 "virginica" 134 | 6.4 2.8 5.6 2.2 "virginica" 135 | 6.3 2.8 5.1 1.5 "virginica" 136 | 6.1 2.6 5.6 1.4 "virginica" 137 | 7.7 3 6.1 2.3 "virginica" 138 | 6.3 3.4 5.6 2.4 "virginica" 139 | 6.4 3.1 5.5 1.8 "virginica" 140 | 6 3 4.8 1.8 "virginica" 141 | 6.9 3.1 5.4 2.1 "virginica" 142 | 6.7 3.1 5.6 2.4 "virginica" 143 | 6.9 3.1 5.1 2.3 "virginica" 144 | 5.8 2.7 5.1 1.9 "virginica" 145 | 6.8 3.2 5.9 2.3 "virginica" 146 | 6.7 3.3 5.7 2.5 "virginica" 147 | 6.7 3 5.2 2.3 "virginica" 148 | 6.3 2.5 5 1.9 "virginica" 149 | 6.5 3 5.2 2 "virginica" 150 | 6.2 3.4 5.4 2.3 "virginica" 151 | 5.9 3 5.1 1.8 "virginica" 152 | -------------------------------------------------------------------------------- /Datasets/list.csv: -------------------------------------------------------------------------------- 1 | newsgroup,document_id 2 | talk.religion.misc,82757 3 | talk.religion.misc,82758 4 | talk.religion.misc,82759 5 | talk.religion.misc,82760 6 | talk.religion.misc,82763 7 | talk.religion.misc,82766 8 | talk.religion.misc,82767 9 | talk.religion.misc,82771 10 | talk.religion.misc,82772 11 | talk.religion.misc,82774 12 | talk.religion.misc,82775 13 | talk.religion.misc,82776 14 | talk.religion.misc,82777 15 | talk.religion.misc,82778 16 | talk.religion.misc,82779 17 | talk.religion.misc,82781 18 | talk.religion.misc,82782 19 | talk.religion.misc,82783 20 | talk.religion.misc,82784 21 | talk.religion.misc,82785 22 | talk.religion.misc,82786 23 | talk.religion.misc,82787 24 | talk.religion.misc,82788 25 | talk.religion.misc,82792 26 | talk.religion.misc,82793 27 | talk.religion.misc,82794 28 | talk.religion.misc,82795 29 | talk.religion.misc,82796 30 | talk.religion.misc,82797 31 | talk.religion.misc,82798 32 | talk.religion.misc,82799 33 | talk.religion.misc,82800 34 | talk.religion.misc,82801 35 | talk.religion.misc,82802 36 | talk.religion.misc,82804 37 | talk.religion.misc,82806 38 | talk.religion.misc,82807 39 | talk.religion.misc,82808 40 | talk.religion.misc,82810 41 | talk.religion.misc,82812 42 | talk.religion.misc,82813 43 | talk.religion.misc,82814 44 | talk.religion.misc,82815 45 | talk.religion.misc,82816 46 | talk.religion.misc,82818 47 | talk.religion.misc,82819 48 | talk.religion.misc,83437 49 | talk.religion.misc,83438 50 | talk.religion.misc,83439 51 | talk.religion.misc,83440 52 | talk.religion.misc,83441 53 | talk.religion.misc,83442 54 | talk.religion.misc,83444 55 | talk.religion.misc,83445 56 | talk.religion.misc,83447 57 | talk.religion.misc,83449 58 | talk.religion.misc,83450 59 | talk.religion.misc,83451 60 | talk.religion.misc,83453 61 | talk.religion.misc,83454 62 | talk.religion.misc,83455 63 | talk.religion.misc,83456 64 | talk.religion.misc,83457 65 | talk.religion.misc,83460 66 | talk.religion.misc,83461 67 | talk.religion.misc,83463 68 | talk.religion.misc,83467 69 | talk.religion.misc,83468 70 | talk.religion.misc,83469 71 | talk.religion.misc,83470 72 | talk.religion.misc,83471 73 | talk.religion.misc,83472 74 | talk.religion.misc,83473 75 | talk.religion.misc,83476 76 | talk.religion.misc,83477 77 | talk.religion.misc,83478 78 | talk.religion.misc,83479 79 | talk.religion.misc,83481 80 | talk.religion.misc,83482 81 | talk.religion.misc,83483 82 | talk.religion.misc,83484 83 | talk.religion.misc,83485 84 | talk.religion.misc,83486 85 | talk.religion.misc,83487 86 | talk.religion.misc,83488 87 | talk.religion.misc,83490 88 | talk.religion.misc,83491 89 | talk.religion.misc,83492 90 | talk.religion.misc,83494 91 | talk.religion.misc,83495 92 | talk.religion.misc,83496 93 | talk.religion.misc,83497 94 | talk.religion.misc,83498 95 | talk.religion.misc,83499 96 | talk.religion.misc,83500 97 | talk.religion.misc,83501 98 | talk.religion.misc,83503 99 | talk.religion.misc,83504 100 | talk.religion.misc,83505 101 | talk.religion.misc,83506 102 | talk.religion.misc,83507 103 | talk.religion.misc,83508 104 | talk.religion.misc,83509 105 | talk.religion.misc,83510 106 | talk.religion.misc,83511 107 | talk.religion.misc,83512 108 | talk.religion.misc,83513 109 | talk.religion.misc,83514 110 | talk.religion.misc,83515 111 | talk.religion.misc,83516 112 | talk.religion.misc,83517 113 | talk.religion.misc,83518 114 | talk.religion.misc,83519 115 | talk.religion.misc,83520 116 | talk.religion.misc,83522 117 | talk.religion.misc,83523 118 | talk.religion.misc,83524 119 | talk.religion.misc,83525 120 | talk.religion.misc,83526 121 | talk.religion.misc,83527 122 | talk.religion.misc,83528 123 | talk.religion.misc,83529 124 | talk.religion.misc,83535 125 | talk.religion.misc,83544 126 | talk.religion.misc,83547 127 | talk.religion.misc,83558 128 | talk.religion.misc,83561 129 | talk.religion.misc,83562 130 | talk.religion.misc,83568 131 | talk.religion.misc,83572 132 | talk.religion.misc,83581 133 | talk.religion.misc,83582 134 | talk.religion.misc,83585 135 | talk.religion.misc,83586 136 | talk.religion.misc,83587 137 | talk.religion.misc,83591 138 | talk.religion.misc,83592 139 | talk.religion.misc,83593 140 | talk.religion.misc,83594 141 | talk.religion.misc,83601 142 | talk.religion.misc,83602 143 | talk.religion.misc,83608 144 | talk.religion.misc,83609 145 | talk.religion.misc,83610 146 | talk.religion.misc,83614 147 | talk.religion.misc,83617 148 | talk.religion.misc,83620 149 | talk.religion.misc,83621 150 | talk.religion.misc,83622 151 | talk.religion.misc,83623 152 | talk.religion.misc,83624 153 | talk.religion.misc,83627 154 | talk.religion.misc,83629 155 | talk.religion.misc,83630 156 | talk.religion.misc,83642 157 | talk.religion.misc,83643 158 | talk.religion.misc,83646 159 | talk.religion.misc,83650 160 | talk.religion.misc,83651 161 | talk.religion.misc,83658 162 | talk.religion.misc,83659 163 | talk.religion.misc,83660 164 | talk.religion.misc,83661 165 | talk.religion.misc,83662 166 | talk.religion.misc,83668 167 | talk.religion.misc,83669 168 | talk.religion.misc,83670 169 | talk.religion.misc,83671 170 | talk.religion.misc,83672 171 | talk.religion.misc,83673 172 | talk.religion.misc,83674 173 | talk.religion.misc,83677 174 | talk.religion.misc,83680 175 | talk.religion.misc,83681 176 | talk.religion.misc,83686 177 | talk.religion.misc,83687 178 | talk.religion.misc,83688 179 | talk.religion.misc,83689 180 | talk.religion.misc,83690 181 | talk.religion.misc,83691 182 | talk.religion.misc,83701 183 | talk.religion.misc,83704 184 | talk.religion.misc,83705 185 | talk.religion.misc,83706 186 | talk.religion.misc,83708 187 | talk.religion.misc,83711 188 | talk.religion.misc,83713 189 | talk.religion.misc,83714 190 | talk.religion.misc,83719 191 | talk.religion.misc,83722 192 | talk.religion.misc,83725 193 | talk.religion.misc,83727 194 | talk.religion.misc,83728 195 | talk.religion.misc,83729 196 | talk.religion.misc,83732 197 | talk.religion.misc,83736 198 | talk.religion.misc,83738 199 | talk.religion.misc,83740 200 | talk.religion.misc,83741 201 | talk.religion.misc,83744 202 | talk.religion.misc,83745 203 | talk.religion.misc,83776 204 | talk.religion.misc,83777 205 | talk.religion.misc,83781 206 | talk.religion.misc,83783 207 | talk.religion.misc,83786 208 | talk.religion.misc,83788 209 | talk.religion.misc,83789 210 | talk.religion.misc,83790 211 | talk.religion.misc,83795 212 | talk.religion.misc,83798 213 | talk.religion.misc,83799 214 | talk.religion.misc,83800 215 | talk.religion.misc,83801 216 | talk.religion.misc,83803 217 | talk.religion.misc,83805 218 | talk.religion.misc,83807 219 | talk.religion.misc,83808 220 | talk.religion.misc,83811 221 | talk.religion.misc,83812 222 | talk.religion.misc,83816 223 | talk.religion.misc,83817 224 | talk.religion.misc,83818 225 | talk.religion.misc,83823 226 | talk.religion.misc,83827 227 | talk.religion.misc,83828 228 | talk.religion.misc,83829 229 | talk.religion.misc,83830 230 | talk.religion.misc,83835 231 | talk.religion.misc,83841 232 | talk.religion.misc,83842 233 | talk.religion.misc,83843 234 | talk.religion.misc,83844 235 | talk.religion.misc,83845 236 | talk.religion.misc,83846 237 | talk.religion.misc,83847 238 | talk.religion.misc,83848 239 | talk.religion.misc,83849 240 | talk.religion.misc,83852 241 | talk.religion.misc,83855 242 | talk.religion.misc,83856 243 | talk.religion.misc,83860 244 | talk.religion.misc,83863 245 | talk.religion.misc,83867 246 | talk.religion.misc,83876 247 | talk.religion.misc,83880 248 | talk.religion.misc,83882 249 | talk.religion.misc,83884 250 | talk.religion.misc,83885 251 | talk.religion.misc,83891 252 | talk.religion.misc,83892 253 | talk.religion.misc,83895 254 | talk.religion.misc,83897 255 | talk.religion.misc,83899 256 | talk.religion.misc,83900 257 | talk.religion.misc,83901 258 | talk.religion.misc,83902 259 | talk.religion.misc,83905 260 | talk.religion.misc,83907 261 | talk.religion.misc,83910 262 | talk.religion.misc,83911 263 | talk.religion.misc,83913 264 | talk.religion.misc,83915 265 | talk.religion.misc,83918 266 | talk.religion.misc,83919 267 | talk.religion.misc,83922 268 | talk.religion.misc,83929 269 | talk.religion.misc,83931 270 | talk.religion.misc,83932 271 | talk.religion.misc,83934 272 | talk.religion.misc,83936 273 | talk.religion.misc,83937 274 | talk.religion.misc,83943 275 | talk.religion.misc,83944 276 | talk.religion.misc,83957 277 | talk.religion.misc,83967 278 | talk.religion.misc,83968 279 | talk.religion.misc,83971 280 | talk.religion.misc,83972 281 | talk.religion.misc,83974 282 | talk.religion.misc,83975 283 | talk.religion.misc,83976 284 | talk.religion.misc,83977 285 | talk.religion.misc,83978 286 | talk.religion.misc,83979 287 | talk.religion.misc,83980 288 | talk.religion.misc,83983 289 | talk.religion.misc,83985 290 | talk.religion.misc,83986 291 | talk.religion.misc,83992 292 | talk.religion.misc,83994 293 | talk.religion.misc,83995 294 | talk.religion.misc,83998 295 | talk.religion.misc,83999 296 | talk.religion.misc,84004 297 | talk.religion.misc,84006 298 | talk.religion.misc,84007 299 | talk.religion.misc,84008 300 | talk.religion.misc,84009 301 | talk.religion.misc,84011 302 | talk.religion.misc,84014 303 | talk.religion.misc,84015 304 | talk.religion.misc,84018 305 | talk.religion.misc,84019 306 | talk.religion.misc,84020 307 | talk.religion.misc,84021 308 | talk.religion.misc,84022 309 | talk.religion.misc,84023 310 | talk.religion.misc,84024 311 | talk.religion.misc,84025 312 | talk.religion.misc,84033 313 | talk.religion.misc,84042 314 | talk.religion.misc,84043 315 | talk.religion.misc,84047 316 | talk.religion.misc,84048 317 | talk.religion.misc,84052 318 | talk.religion.misc,84053 319 | talk.religion.misc,84054 320 | talk.religion.misc,84055 321 | talk.religion.misc,84056 322 | talk.religion.misc,84057 323 | talk.religion.misc,84058 324 | talk.religion.misc,84059 325 | talk.religion.misc,84060 326 | talk.religion.misc,84061 327 | talk.religion.misc,84062 328 | talk.religion.misc,84063 329 | talk.religion.misc,84064 330 | talk.religion.misc,84065 331 | talk.religion.misc,84066 332 | talk.religion.misc,84067 333 | talk.religion.misc,84069 334 | talk.religion.misc,84070 335 | talk.religion.misc,84071 336 | talk.religion.misc,84072 337 | talk.religion.misc,84073 338 | talk.religion.misc,84074 339 | talk.religion.misc,84075 340 | talk.religion.misc,84076 341 | talk.religion.misc,84077 342 | talk.religion.misc,84078 343 | talk.religion.misc,84079 344 | talk.religion.misc,84080 345 | talk.religion.misc,84081 346 | talk.religion.misc,84082 347 | talk.religion.misc,84083 348 | talk.religion.misc,84084 349 | talk.religion.misc,84085 350 | talk.religion.misc,84086 351 | talk.religion.misc,84087 352 | talk.religion.misc,84091 353 | talk.religion.misc,84092 354 | talk.religion.misc,84096 355 | talk.religion.misc,84097 356 | talk.religion.misc,84098 357 | talk.religion.misc,84099 358 | talk.religion.misc,84100 359 | talk.religion.misc,84101 360 | talk.religion.misc,84105 361 | talk.religion.misc,84106 362 | talk.religion.misc,84113 363 | talk.religion.misc,84120 364 | talk.religion.misc,84121 365 | talk.religion.misc,84122 366 | talk.religion.misc,84123 367 | talk.religion.misc,84124 368 | talk.religion.misc,84125 369 | talk.religion.misc,84127 370 | talk.religion.misc,84128 371 | talk.religion.misc,84129 372 | talk.religion.misc,84130 373 | talk.religion.misc,84131 374 | talk.religion.misc,84132 375 | talk.religion.misc,84133 376 | talk.religion.misc,84134 377 | talk.religion.misc,84135 378 | talk.religion.misc,84136 379 | talk.religion.misc,84137 380 | talk.religion.misc,84138 381 | talk.religion.misc,84139 382 | talk.religion.misc,84140 383 | talk.religion.misc,84141 384 | talk.religion.misc,84142 385 | talk.religion.misc,84143 386 | talk.religion.misc,84144 387 | talk.religion.misc,84145 388 | talk.religion.misc,84146 389 | talk.religion.misc,84147 390 | talk.religion.misc,84148 391 | talk.religion.misc,84149 392 | talk.religion.misc,84150 393 | talk.religion.misc,84151 394 | talk.religion.misc,84152 395 | talk.religion.misc,84153 396 | talk.religion.misc,84156 397 | talk.religion.misc,84157 398 | talk.religion.misc,84158 399 | talk.religion.misc,84159 400 | talk.religion.misc,84164 401 | talk.religion.misc,84165 402 | talk.religion.misc,84169 403 | talk.religion.misc,84170 404 | talk.religion.misc,84174 405 | talk.religion.misc,84175 406 | talk.religion.misc,84178 407 | talk.religion.misc,84182 408 | talk.religion.misc,84183 409 | talk.religion.misc,84184 410 | talk.religion.misc,84185 411 | talk.religion.misc,84186 412 | talk.religion.misc,84187 413 | talk.religion.misc,84188 414 | talk.religion.misc,84189 415 | talk.religion.misc,84190 416 | talk.religion.misc,84192 417 | talk.religion.misc,84193 418 | talk.religion.misc,84194 419 | talk.religion.misc,84195 420 | talk.religion.misc,84196 421 | talk.religion.misc,84197 422 | talk.religion.misc,84198 423 | talk.religion.misc,84199 424 | talk.religion.misc,84200 425 | talk.religion.misc,84201 426 | talk.religion.misc,84202 427 | talk.religion.misc,84203 428 | talk.religion.misc,84204 429 | talk.religion.misc,84205 430 | talk.religion.misc,84206 431 | talk.religion.misc,84207 432 | talk.religion.misc,84208 433 | talk.religion.misc,84209 434 | talk.religion.misc,84210 435 | talk.religion.misc,84211 436 | talk.religion.misc,84212 437 | talk.religion.misc,84213 438 | talk.religion.misc,84214 439 | talk.religion.misc,84215 440 | talk.religion.misc,84216 441 | talk.religion.misc,84217 442 | talk.religion.misc,84218 443 | talk.religion.misc,84219 444 | talk.religion.misc,84220 445 | talk.religion.misc,84221 446 | talk.religion.misc,84222 447 | talk.religion.misc,84223 448 | talk.religion.misc,84224 449 | talk.religion.misc,84225 450 | talk.religion.misc,84226 451 | talk.religion.misc,84227 452 | talk.religion.misc,84228 453 | talk.religion.misc,84229 454 | talk.religion.misc,84230 455 | talk.religion.misc,84231 456 | talk.religion.misc,84232 457 | talk.religion.misc,84233 458 | talk.religion.misc,84234 459 | talk.religion.misc,84235 460 | talk.religion.misc,84236 461 | talk.religion.misc,84237 462 | talk.religion.misc,84238 463 | talk.religion.misc,84239 464 | talk.religion.misc,84240 465 | talk.religion.misc,84241 466 | talk.religion.misc,84242 467 | talk.religion.misc,84243 468 | talk.religion.misc,84244 469 | talk.religion.misc,84245 470 | talk.religion.misc,84246 471 | talk.religion.misc,84247 472 | talk.religion.misc,84248 473 | talk.religion.misc,84249 474 | talk.religion.misc,84250 475 | talk.religion.misc,84251 476 | talk.religion.misc,84252 477 | talk.religion.misc,84253 478 | talk.religion.misc,84254 479 | talk.religion.misc,84255 480 | talk.religion.misc,84256 481 | talk.religion.misc,84257 482 | talk.religion.misc,84258 483 | talk.religion.misc,84259 484 | talk.religion.misc,84260 485 | talk.religion.misc,84261 486 | talk.religion.misc,84262 487 | talk.religion.misc,84263 488 | talk.religion.misc,84264 489 | talk.religion.misc,84265 490 | talk.religion.misc,84266 491 | talk.religion.misc,84267 492 | talk.religion.misc,84268 493 | talk.religion.misc,84269 494 | talk.religion.misc,84270 495 | talk.religion.misc,84271 496 | talk.religion.misc,84272 497 | talk.religion.misc,84273 498 | talk.religion.misc,84274 499 | talk.religion.misc,84275 500 | talk.religion.misc,84276 501 | talk.religion.misc,84277 502 | talk.religion.misc,84278 503 | talk.religion.misc,84279 504 | talk.religion.misc,84280 505 | talk.religion.misc,84281 506 | talk.religion.misc,84282 507 | talk.religion.misc,84283 508 | talk.religion.misc,84284 509 | talk.religion.misc,84285 510 | talk.religion.misc,84286 511 | talk.religion.misc,84288 512 | talk.religion.misc,84289 513 | talk.religion.misc,84290 514 | talk.religion.misc,84291 515 | talk.religion.misc,84292 516 | talk.religion.misc,84293 517 | talk.religion.misc,84294 518 | talk.religion.misc,84295 519 | talk.religion.misc,84296 520 | talk.religion.misc,84297 521 | talk.religion.misc,84298 522 | talk.religion.misc,84299 523 | talk.religion.misc,84300 524 | talk.religion.misc,84301 525 | talk.religion.misc,84302 526 | talk.religion.misc,84303 527 | talk.religion.misc,84304 528 | talk.religion.misc,84305 529 | talk.religion.misc,84306 530 | talk.religion.misc,84307 531 | talk.religion.misc,84308 532 | talk.religion.misc,84309 533 | talk.religion.misc,84310 534 | talk.religion.misc,84311 535 | talk.religion.misc,84312 536 | talk.religion.misc,84313 537 | talk.religion.misc,84314 538 | talk.religion.misc,84315 539 | talk.religion.misc,84316 540 | talk.religion.misc,84319 541 | talk.religion.misc,84320 542 | talk.religion.misc,84321 543 | talk.religion.misc,84323 544 | talk.religion.misc,84324 545 | talk.religion.misc,84325 546 | talk.religion.misc,84326 547 | talk.religion.misc,84327 548 | talk.religion.misc,84328 549 | talk.religion.misc,84329 550 | talk.religion.misc,84331 551 | talk.religion.misc,84332 552 | talk.religion.misc,84334 553 | talk.religion.misc,84336 554 | talk.religion.misc,84338 555 | talk.religion.misc,84339 556 | talk.religion.misc,84340 557 | talk.religion.misc,84341 558 | talk.religion.misc,84342 559 | talk.religion.misc,84343 560 | talk.religion.misc,84344 561 | talk.religion.misc,84345 562 | talk.religion.misc,84346 563 | talk.religion.misc,84347 564 | talk.religion.misc,84348 565 | talk.religion.misc,84349 566 | talk.religion.misc,84350 567 | talk.religion.misc,84351 568 | talk.religion.misc,84352 569 | talk.religion.misc,84353 570 | talk.religion.misc,84354 571 | talk.religion.misc,84357 572 | talk.religion.misc,84358 573 | talk.religion.misc,84360 574 | talk.religion.misc,84380 575 | talk.religion.misc,84395 576 | talk.religion.misc,84396 577 | talk.religion.misc,84397 578 | talk.religion.misc,84398 579 | talk.religion.misc,84399 580 | talk.religion.misc,84401 581 | talk.religion.misc,84414 582 | talk.religion.misc,84422 583 | talk.religion.misc,84423 584 | talk.religion.misc,84428 585 | talk.religion.misc,84429 586 | talk.religion.misc,84430 587 | talk.religion.misc,84431 588 | talk.religion.misc,84433 589 | talk.religion.misc,84434 590 | talk.religion.misc,84435 591 | talk.religion.misc,84436 592 | talk.religion.misc,84437 593 | talk.religion.misc,84438 594 | talk.religion.misc,84439 595 | talk.religion.misc,84440 596 | talk.religion.misc,84441 597 | talk.religion.misc,84442 598 | talk.religion.misc,84443 599 | talk.religion.misc,84444 600 | talk.religion.misc,84445 601 | talk.religion.misc,84446 602 | talk.religion.misc,84447 603 | talk.religion.misc,84448 604 | talk.religion.misc,84449 605 | talk.religion.misc,84450 606 | talk.religion.misc,84451 607 | talk.religion.misc,84452 608 | talk.religion.misc,84506 609 | talk.religion.misc,84507 610 | talk.religion.misc,84508 611 | talk.religion.misc,84509 612 | talk.religion.misc,84510 613 | talk.religion.misc,84511 614 | talk.religion.misc,84538 615 | talk.religion.misc,84552 616 | talk.religion.misc,84553 617 | talk.religion.misc,84554 618 | talk.religion.misc,84555 619 | talk.religion.misc,84557 620 | talk.religion.misc,84558 621 | talk.religion.misc,84559 622 | talk.religion.misc,84560 623 | talk.religion.misc,84562 624 | talk.religion.misc,84563 625 | talk.religion.misc,84564 626 | talk.religion.misc,84565 627 | talk.religion.misc,84568 628 | talk.religion.misc,84569 629 | talk.religion.misc,84570 630 | -------------------------------------------------------------------------------- /Datasets/tree.dot: -------------------------------------------------------------------------------- 1 | digraph Tree { 2 | node [shape=box, style="filled", color="black"] ; 3 | 0 [label="petal width (cm) <= 0.8\ngini = 0.667\nsamples = 150\nvalue = [50, 50, 50]\nclass = y[0]", fillcolor="#ffffff"] ; 4 | 1 [label="gini = 0.0\nsamples = 50\nvalue = [50, 0, 0]\nclass = y[0]", fillcolor="#e58139"] ; 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ; 6 | 2 [label="petal width (cm) <= 1.75\ngini = 0.5\nsamples = 100\nvalue = [0, 50, 50]\nclass = y[1]", fillcolor="#ffffff"] ; 7 | 0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ; 8 | 3 [label="gini = 0.168\nsamples = 54\nvalue = [0, 49, 5]\nclass = y[1]", fillcolor="#4de88e"] ; 9 | 2 -> 3 ; 10 | 4 [label="gini = 0.043\nsamples = 46\nvalue = [0, 1, 45]\nclass = y[2]", fillcolor="#843de6"] ; 11 | 2 -> 4 ; 12 | } -------------------------------------------------------------------------------- /Datasets/wine.data: -------------------------------------------------------------------------------- 1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065 2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050 3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185 4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480 5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735 6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450 7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290 8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295 9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660 100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406 101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710 102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562 103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438 104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415 105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672 106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315 107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510 108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488 109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312 110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680 111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562 112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325 113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607 114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434 115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385 116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407 117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495 118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345 119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372 120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564 121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625 122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465 123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365 124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380 125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380 126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378 127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352 128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466 129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342 130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580 131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630 132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530 133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560 134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600 135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650 136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695 137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720 138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515 139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580 140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590 141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600 142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780 143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520 144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550 145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855 146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830 147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415 148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625 149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650 150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550 151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500 152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480 153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425 154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675 155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640 156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725 157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480 158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880 159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660 160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620 161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520 162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680 163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570 164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675 165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615 166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520 167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695 168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685 169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750 170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630 171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510 172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470 173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660 174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740 175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750 176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835 177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840 178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560 179 | -------------------------------------------------------------------------------- /Images/ML-tutorials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TirendazAcademy/Machine-Learning-Tutorial/c9df9fcf8ea06ab0b00048ea5bc3afb2c5ce2b72/Images/ML-tutorials.png -------------------------------------------------------------------------------- /Images/Sklearn-Classification-Algorithms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TirendazAcademy/Machine-Learning-Tutorial/c9df9fcf8ea06ab0b00048ea5bc3afb2c5ce2b72/Images/Sklearn-Classification-Algorithms.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![ML](https://github.com/TirendazAcademy/Machine-Learning-Tutorial/blob/main/Images/ML-tutorials.png) 2 | 3 | [![](https://img.shields.io/badge/Python-blue?&style=plastic&logo=python&logoColor=white)]() 4 | [![](https://img.shields.io/badge/Pandas-navy?style=plastic&logo=pandas&logoColor=white)]() 5 | [![](https://img.shields.io/badge/Numpy-black?style=plastic&logo=numpy&logoColor=white)]() 6 | [![](https://img.shields.io/badge/Matplotlib-yellow?style=plastic&logo=matplotlib&logoColor=white)]() 7 | [![](https://img.shields.io/badge/Seaborn-purple?style=plastic&logo=seaborn&logoColor=white)]() 8 | [![](https://img.shields.io/badge/ScikitLearn-darkgreen?style=plastic&logo=scikit-learn&logoColor=white)]() 9 | 10 | Welcome to the Machine Learning with Python repository! This repository is designed to be a comprehensive resource for anyone looking to learn about machine learning using Python. 11 | 12 | This repo contains the jupyter notebooks and datasets I used on the [Tirendaz Akademi Youtube](https://www.youtube.com/c/TirendazAkademi) channel. In these tutorials, I used Pandas for data preprocessing, Numpy for multidimensional arrays and matrix operations, Matplotlib and Seaborn for data visualization, Scikit-Learn to implement machine learning algorithms. Whether you're a beginner or an experienced programmer, this repo has something for everyone. 13 | 14 | The repo is organized into different sections, each focusing on a specific topic related to machine learning. You'll find sections on data preprocessing, regression, classification, clustering, and more. Each section contains a variety of resources, including Jupyter notebooks, Python scripts, and links to external resources. 15 | 16 | Whether you're looking to build predictive models, perform data analysis, or just learn more about machine learning, this repo has everything you need to get started. So dive in and start learning today! 17 | 18 | Let me know if you have any questions. If you enjoy these notebooks and videos, don't forget to give a star ✨. 19 | 20 | # [![](https://img.shields.io/badge/YouTube-Turkish-deeppink?&logo=youtube&logoColor=white)](https://www.youtube.com/watch?v=7p-an2KTO5o&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF) 21 | 22 | - [HOW TO CREATE A MACHINE LEARNING MODEL](https://www.youtube.com/watch?v=4gy_tt9l0fE&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=3) 23 | - [SCIKIT-LEARN TUTORIAL-1](https://www.youtube.com/watch?v=glcVaiuQYK4&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=4) 24 | - [SCIKIT-LEARN TUTORIAL-2](https://www.youtube.com/watch?v=fCoGvpbEtYo&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=5) 25 | - [FEATURE ENGINEERING](https://www.youtube.com/watch?v=zrNDviBZKgA&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=6) 26 | - [K-NEAREST NEIGHBOR (KNN)](https://www.youtube.com/watch?v=IlMzkTcIqjA&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=7) 27 | - [LINEAR REGRESSION](https://www.youtube.com/watch?v=gFzq_oz_mSQ&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=8) 28 | - [LINEAR REGRESSION in ACTION](https://www.youtube.com/watch?v=gFzq_oz_mSQ&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=8) 29 | - [RIDGE AND LASSO REGRESSION](https://www.youtube.com/watch?v=9SUXFv8941A&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=9) 30 | - [LINEAR MODELS FOR CLASSIFICATION](https://www.youtube.com/watch?v=boL4lvARJfc&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=10) 31 | - [LOGISTIC REGRESSION in ACTION](https://www.youtube.com/watch?v=Cb77_3veTTU&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=11) 32 | - [NAIVE BAYES CLASSIFICATION](https://www.youtube.com/watch?v=Nj_vkiubCoM&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=12) 33 | - [SUPPORT VECTOR MACHINES](https://www.youtube.com/watch?v=yx3zkenooN0&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=13) 34 | - [DECISION TREES](https://www.youtube.com/watch?v=k1liepnnSjA&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=14) 35 | - [ENSEMBLE LEARNING](https://www.youtube.com/watch?v=KaBxQFJZTVQ&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=15) 36 | - [ARTIFICIAL NEURAL NETWORK](https://www.youtube.com/watch?v=QUp1nVyBEic&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=16) 37 | - [DATA SCALING](https://www.youtube.com/watch?v=ZLUy5YeTbWA&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=17) 38 | - [PRINCIPAL COMPONENT ANALYSIS]() 39 | - [MANIFOLD LEARNING](https://www.youtube.com/watch?v=xJsNVzo_xAs&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=18) 40 | - [K-MEANS CLUSTERING](https://www.youtube.com/watch?v=5aUPhfjcuiA&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=20) 41 | - [AGGLOMERATIVE-HIERARCHICA -DBSCAN CLUSTERING](https://www.youtube.com/watch?v=6BcJxBE5J5I&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=21) 42 | - [GAUSSIAN MIXTURE MODELS](https://www.youtube.com/watch?v=_9wobRYDNnQ&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=22) 43 | - [MODEL EVALUATION](https://www.youtube.com/watch?v=c-qHZb_cfms&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=23) 44 | - [MODEL IMPROVEMENT](https://www.youtube.com/watch?v=S1nxwkTJpo0&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=24) 45 | - [EVALUATION METRICS](https://www.youtube.com/watch?v=5U8hc4irdbQ&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=25) 46 | - [PIPELINES](https://www.youtube.com/watch?v=8UMDSfoi2KA&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=26) 47 | - [TEXT ANALYSIS](https://www.youtube.com/watch?v=JnAOuZ94y68&list=PLfMRLSpipmfuumcvO3fObVAUpSqYAcZmF&index=27) 48 | - [FEATURE ENGINEERING](https://www.youtube.com/watch?v=Gh4DijnuX0o) 49 | - [FEATURE SELECTION](https://www.youtube.com/watch?v=857SKdW-Pvg&t=4s) 50 | - [INTRODUCTION MACHINE LEARNING](https://www.youtube.com/watch?v=146vch4ZZ14) 51 | 52 | Let's connect [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [Instagram](https://www.instagram.com/tirendazacademy) |[GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎 53 | 54 | Happy learning ... ✌️ 55 | 56 | 57 | --------------------------------------------------------------------------------