├── All Regression Check ├── Algerian Forest.pdf ├── Tips FE Automation │ ├── FE Automation.ipynb │ └── index.css └── index.php ├── Decision Tree ├── Carseats.csv ├── Decision Tree.ipynb ├── index.css └── index.php ├── Hyper Parameter Tuning ├── HyperParameter Tuning.pdf └── index.css ├── KNN ├── KNN.ipynb └── index.css ├── Linear Regression ├── LINEAR REGRESSION .pdf ├── Tv Ads Linear Regression.pdf ├── index.html └── tvmarketing.csv ├── Logistic Regression ├── Logistic Regression .pdf └── index.html ├── ML]]Preprocessing Automation ├── Heart Stroke Jup Notebook.ipynb ├── healthcare-dataset-stroke-data.csv └── index.html ├── Multi Linear Regression ├── Multi Linear Regressions ├── 50_Startups.csv ├── Startup Profits Prediction.ipynb └── index.php ├── PCA ├── PCA.ipynb └── index.php └── SVC ├── SVC - Jupyter Notebook.pdf ├── SVC.ipynb ├── Social_Network_Ads.csv └── index.php /All Regression Check/Algerian Forest.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harshithvarmapothuri/ML-Algorithms/cc69de9f9340833d9f3cbad8f8d82dbfcc48fabb/All Regression Check/Algerian Forest.pdf -------------------------------------------------------------------------------- /All Regression Check/Tips FE Automation/FE Automation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "954bf0ce-f43f-4bf7-b72c-4cebc5481215", 6 | "metadata": {}, 7 | "source": [ 8 | "# ML Feature Engineering Automation" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "c089e171-700b-4033-8043-cdee4b785f59", 14 | "metadata": {}, 15 | "source": [ 16 | "## Import Necessary Libraries" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "id": "5547d609-4d6b-4fa1-abb4-fe56c92c77bd", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import pandas as pd\n", 27 | "import numpy as np\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import seaborn as sns\n", 30 | "import warnings\n", 31 | "warnings.filterwarnings(\"ignore\")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "62a65aae-088a-4218-a0fa-8d852d4b20da", 37 | "metadata": {}, 38 | "source": [ 39 | "## Load the Tips Dataset" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "id": "f1a1e18d-842f-4bee-be3e-373ae312c0bc", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "df=sns.load_dataset(\"tips\")" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "id": "ec1125bd-abb1-49ba-9cc6-97edcd7293f4", 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/html": [ 61 | "
\n", 79 | " | total_bill | \n", 80 | "tip | \n", 81 | "sex | \n", 82 | "smoker | \n", 83 | "day | \n", 84 | "time | \n", 85 | "size | \n", 86 | "
---|---|---|---|---|---|---|---|
0 | \n", 91 | "16.99 | \n", 92 | "1.01 | \n", 93 | "Female | \n", 94 | "No | \n", 95 | "Sun | \n", 96 | "Dinner | \n", 97 | "2 | \n", 98 | "
1 | \n", 101 | "10.34 | \n", 102 | "1.66 | \n", 103 | "Male | \n", 104 | "No | \n", 105 | "Sun | \n", 106 | "Dinner | \n", 107 | "3 | \n", 108 | "
2 | \n", 111 | "21.01 | \n", 112 | "3.50 | \n", 113 | "Male | \n", 114 | "No | \n", 115 | "Sun | \n", 116 | "Dinner | \n", 117 | "3 | \n", 118 | "
3 | \n", 121 | "23.68 | \n", 122 | "3.31 | \n", 123 | "Male | \n", 124 | "No | \n", 125 | "Sun | \n", 126 | "Dinner | \n", 127 | "2 | \n", 128 | "
4 | \n", 131 | "24.59 | \n", 132 | "3.61 | \n", 133 | "Female | \n", 134 | "No | \n", 135 | "Sun | \n", 136 | "Dinner | \n", 137 | "4 | \n", 138 | "
... | \n", 141 | "... | \n", 142 | "... | \n", 143 | "... | \n", 144 | "... | \n", 145 | "... | \n", 146 | "... | \n", 147 | "... | \n", 148 | "
239 | \n", 151 | "29.03 | \n", 152 | "5.92 | \n", 153 | "Male | \n", 154 | "No | \n", 155 | "Sat | \n", 156 | "Dinner | \n", 157 | "3 | \n", 158 | "
240 | \n", 161 | "27.18 | \n", 162 | "2.00 | \n", 163 | "Female | \n", 164 | "Yes | \n", 165 | "Sat | \n", 166 | "Dinner | \n", 167 | "2 | \n", 168 | "
241 | \n", 171 | "22.67 | \n", 172 | "2.00 | \n", 173 | "Male | \n", 174 | "Yes | \n", 175 | "Sat | \n", 176 | "Dinner | \n", 177 | "2 | \n", 178 | "
242 | \n", 181 | "17.82 | \n", 182 | "1.75 | \n", 183 | "Male | \n", 184 | "No | \n", 185 | "Sat | \n", 186 | "Dinner | \n", 187 | "2 | \n", 188 | "
243 | \n", 191 | "18.78 | \n", 192 | "3.00 | \n", 193 | "Female | \n", 194 | "No | \n", 195 | "Thur | \n", 196 | "Dinner | \n", 197 | "2 | \n", 198 | "
244 rows × 7 columns
\n", 202 | "GridSearchCV(cv=5, estimator=RandomForestClassifier(),\n", 537 | " param_grid={'criterion': ('gini', 'entropy', 'log_loss'),\n", 538 | " 'max_features': ('sqrt', 'auto', None),\n", 539 | " 'n_estimators': [100, 200, 300, 400, 500]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5, estimator=RandomForestClassifier(),\n", 540 | " param_grid={'criterion': ('gini', 'entropy', 'log_loss'),\n", 541 | " 'max_features': ('sqrt', 'auto', None),\n", 542 | " 'n_estimators': [100, 200, 300, 400, 500]})
RandomForestClassifier()
RandomForestClassifier()
RandomForestClassifier(max_features=None, n_estimators=400)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(max_features=None, n_estimators=400)
\n", 77 | " | sepal_length | \n", 78 | "sepal_width | \n", 79 | "petal_length | \n", 80 | "petal_width | \n", 81 | "species | \n", 82 | "
---|---|---|---|---|---|
0 | \n", 87 | "5.1 | \n", 88 | "3.5 | \n", 89 | "1.4 | \n", 90 | "0.2 | \n", 91 | "setosa | \n", 92 | "
1 | \n", 95 | "4.9 | \n", 96 | "3.0 | \n", 97 | "1.4 | \n", 98 | "0.2 | \n", 99 | "setosa | \n", 100 | "
2 | \n", 103 | "4.7 | \n", 104 | "3.2 | \n", 105 | "1.3 | \n", 106 | "0.2 | \n", 107 | "setosa | \n", 108 | "
3 | \n", 111 | "4.6 | \n", 112 | "3.1 | \n", 113 | "1.5 | \n", 114 | "0.2 | \n", 115 | "setosa | \n", 116 | "
4 | \n", 119 | "5.0 | \n", 120 | "3.6 | \n", 121 | "1.4 | \n", 122 | "0.2 | \n", 123 | "setosa | \n", 124 | "
... | \n", 127 | "... | \n", 128 | "... | \n", 129 | "... | \n", 130 | "... | \n", 131 | "... | \n", 132 | "
145 | \n", 135 | "6.7 | \n", 136 | "3.0 | \n", 137 | "5.2 | \n", 138 | "2.3 | \n", 139 | "virginica | \n", 140 | "
146 | \n", 143 | "6.3 | \n", 144 | "2.5 | \n", 145 | "5.0 | \n", 146 | "1.9 | \n", 147 | "virginica | \n", 148 | "
147 | \n", 151 | "6.5 | \n", 152 | "3.0 | \n", 153 | "5.2 | \n", 154 | "2.0 | \n", 155 | "virginica | \n", 156 | "
148 | \n", 159 | "6.2 | \n", 160 | "3.4 | \n", 161 | "5.4 | \n", 162 | "2.3 | \n", 163 | "virginica | \n", 164 | "
149 | \n", 167 | "5.9 | \n", 168 | "3.0 | \n", 169 | "5.1 | \n", 170 | "1.8 | \n", 171 | "virginica | \n", 172 | "
150 rows × 5 columns
\n", 176 | "\n", 298 | " | sepal_length | \n", 299 | "sepal_width | \n", 300 | "petal_length | \n", 301 | "petal_width | \n", 302 | "species | \n", 303 | "sp | \n", 304 | "
---|---|---|---|---|---|---|
0 | \n", 309 | "5.1 | \n", 310 | "3.5 | \n", 311 | "1.4 | \n", 312 | "0.2 | \n", 313 | "setosa | \n", 314 | "0 | \n", 315 | "
1 | \n", 318 | "4.9 | \n", 319 | "3.0 | \n", 320 | "1.4 | \n", 321 | "0.2 | \n", 322 | "setosa | \n", 323 | "0 | \n", 324 | "
2 | \n", 327 | "4.7 | \n", 328 | "3.2 | \n", 329 | "1.3 | \n", 330 | "0.2 | \n", 331 | "setosa | \n", 332 | "0 | \n", 333 | "
3 | \n", 336 | "4.6 | \n", 337 | "3.1 | \n", 338 | "1.5 | \n", 339 | "0.2 | \n", 340 | "setosa | \n", 341 | "0 | \n", 342 | "
4 | \n", 345 | "5.0 | \n", 346 | "3.6 | \n", 347 | "1.4 | \n", 348 | "0.2 | \n", 349 | "setosa | \n", 350 | "0 | \n", 351 | "
... | \n", 354 | "... | \n", 355 | "... | \n", 356 | "... | \n", 357 | "... | \n", 358 | "... | \n", 359 | "... | \n", 360 | "
145 | \n", 363 | "6.7 | \n", 364 | "3.0 | \n", 365 | "5.2 | \n", 366 | "2.3 | \n", 367 | "virginica | \n", 368 | "2 | \n", 369 | "
146 | \n", 372 | "6.3 | \n", 373 | "2.5 | \n", 374 | "5.0 | \n", 375 | "1.9 | \n", 376 | "virginica | \n", 377 | "2 | \n", 378 | "
147 | \n", 381 | "6.5 | \n", 382 | "3.0 | \n", 383 | "5.2 | \n", 384 | "2.0 | \n", 385 | "virginica | \n", 386 | "2 | \n", 387 | "
148 | \n", 390 | "6.2 | \n", 391 | "3.4 | \n", 392 | "5.4 | \n", 393 | "2.3 | \n", 394 | "virginica | \n", 395 | "2 | \n", 396 | "
149 | \n", 399 | "5.9 | \n", 400 | "3.0 | \n", 401 | "5.1 | \n", 402 | "1.8 | \n", 403 | "virginica | \n", 404 | "2 | \n", 405 | "
150 rows × 6 columns
\n", 409 | "KNeighborsRegressor(n_neighbors=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsRegressor(n_neighbors=3)
KNeighborsClassifier(n_neighbors=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier(n_neighbors=3)
\n", 79 | " | id | \n", 80 | "gender | \n", 81 | "age | \n", 82 | "hypertension | \n", 83 | "heart_disease | \n", 84 | "ever_married | \n", 85 | "work_type | \n", 86 | "Residence_type | \n", 87 | "avg_glucose_level | \n", 88 | "bmi | \n", 89 | "smoking_status | \n", 90 | "stroke | \n", 91 | "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 96 | "9046 | \n", 97 | "Male | \n", 98 | "67.0 | \n", 99 | "0 | \n", 100 | "1 | \n", 101 | "Yes | \n", 102 | "Private | \n", 103 | "Urban | \n", 104 | "228.69 | \n", 105 | "36.6 | \n", 106 | "formerly smoked | \n", 107 | "1 | \n", 108 | "
1 | \n", 111 | "51676 | \n", 112 | "Female | \n", 113 | "61.0 | \n", 114 | "0 | \n", 115 | "0 | \n", 116 | "Yes | \n", 117 | "Self-employed | \n", 118 | "Rural | \n", 119 | "202.21 | \n", 120 | "NaN | \n", 121 | "never smoked | \n", 122 | "1 | \n", 123 | "
2 | \n", 126 | "31112 | \n", 127 | "Male | \n", 128 | "80.0 | \n", 129 | "0 | \n", 130 | "1 | \n", 131 | "Yes | \n", 132 | "Private | \n", 133 | "Rural | \n", 134 | "105.92 | \n", 135 | "32.5 | \n", 136 | "never smoked | \n", 137 | "1 | \n", 138 | "
3 | \n", 141 | "60182 | \n", 142 | "Female | \n", 143 | "49.0 | \n", 144 | "0 | \n", 145 | "0 | \n", 146 | "Yes | \n", 147 | "Private | \n", 148 | "Urban | \n", 149 | "171.23 | \n", 150 | "34.4 | \n", 151 | "smokes | \n", 152 | "1 | \n", 153 | "
4 | \n", 156 | "1665 | \n", 157 | "Female | \n", 158 | "79.0 | \n", 159 | "1 | \n", 160 | "0 | \n", 161 | "Yes | \n", 162 | "Self-employed | \n", 163 | "Rural | \n", 164 | "174.12 | \n", 165 | "24.0 | \n", 166 | "never smoked | \n", 167 | "1 | \n", 168 | "
... | \n", 171 | "... | \n", 172 | "... | \n", 173 | "... | \n", 174 | "... | \n", 175 | "... | \n", 176 | "... | \n", 177 | "... | \n", 178 | "... | \n", 179 | "... | \n", 180 | "... | \n", 181 | "... | \n", 182 | "... | \n", 183 | "
5105 | \n", 186 | "18234 | \n", 187 | "Female | \n", 188 | "80.0 | \n", 189 | "1 | \n", 190 | "0 | \n", 191 | "Yes | \n", 192 | "Private | \n", 193 | "Urban | \n", 194 | "83.75 | \n", 195 | "NaN | \n", 196 | "never smoked | \n", 197 | "0 | \n", 198 | "
5106 | \n", 201 | "44873 | \n", 202 | "Female | \n", 203 | "81.0 | \n", 204 | "0 | \n", 205 | "0 | \n", 206 | "Yes | \n", 207 | "Self-employed | \n", 208 | "Urban | \n", 209 | "125.20 | \n", 210 | "40.0 | \n", 211 | "never smoked | \n", 212 | "0 | \n", 213 | "
5107 | \n", 216 | "19723 | \n", 217 | "Female | \n", 218 | "35.0 | \n", 219 | "0 | \n", 220 | "0 | \n", 221 | "Yes | \n", 222 | "Self-employed | \n", 223 | "Rural | \n", 224 | "82.99 | \n", 225 | "30.6 | \n", 226 | "never smoked | \n", 227 | "0 | \n", 228 | "
5108 | \n", 231 | "37544 | \n", 232 | "Male | \n", 233 | "51.0 | \n", 234 | "0 | \n", 235 | "0 | \n", 236 | "Yes | \n", 237 | "Private | \n", 238 | "Rural | \n", 239 | "166.29 | \n", 240 | "25.6 | \n", 241 | "formerly smoked | \n", 242 | "0 | \n", 243 | "
5109 | \n", 246 | "44679 | \n", 247 | "Female | \n", 248 | "44.0 | \n", 249 | "0 | \n", 250 | "0 | \n", 251 | "Yes | \n", 252 | "Govt_job | \n", 253 | "Urban | \n", 254 | "85.28 | \n", 255 | "26.2 | \n", 256 | "Unknown | \n", 257 | "0 | \n", 258 | "
5110 rows × 12 columns
\n", 262 | "\n", 361 | " | gender | \n", 362 | "age | \n", 363 | "hypertension | \n", 364 | "heart_disease | \n", 365 | "ever_married | \n", 366 | "work_type | \n", 367 | "Residence_type | \n", 368 | "avg_glucose_level | \n", 369 | "bmi | \n", 370 | "smoking_status | \n", 371 | "stroke | \n", 372 | "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 377 | "Male | \n", 378 | "67.0 | \n", 379 | "0 | \n", 380 | "1 | \n", 381 | "Yes | \n", 382 | "Private | \n", 383 | "Urban | \n", 384 | "228.69 | \n", 385 | "36.6 | \n", 386 | "formerly smoked | \n", 387 | "1 | \n", 388 | "
1 | \n", 391 | "Female | \n", 392 | "61.0 | \n", 393 | "0 | \n", 394 | "0 | \n", 395 | "Yes | \n", 396 | "Self-employed | \n", 397 | "Rural | \n", 398 | "202.21 | \n", 399 | "NaN | \n", 400 | "never smoked | \n", 401 | "1 | \n", 402 | "
2 | \n", 405 | "Male | \n", 406 | "80.0 | \n", 407 | "0 | \n", 408 | "1 | \n", 409 | "Yes | \n", 410 | "Private | \n", 411 | "Rural | \n", 412 | "105.92 | \n", 413 | "32.5 | \n", 414 | "never smoked | \n", 415 | "1 | \n", 416 | "
3 | \n", 419 | "Female | \n", 420 | "49.0 | \n", 421 | "0 | \n", 422 | "0 | \n", 423 | "Yes | \n", 424 | "Private | \n", 425 | "Urban | \n", 426 | "171.23 | \n", 427 | "34.4 | \n", 428 | "smokes | \n", 429 | "1 | \n", 430 | "
4 | \n", 433 | "Female | \n", 434 | "79.0 | \n", 435 | "1 | \n", 436 | "0 | \n", 437 | "Yes | \n", 438 | "Self-employed | \n", 439 | "Rural | \n", 440 | "174.12 | \n", 441 | "24.0 | \n", 442 | "never smoked | \n", 443 | "1 | \n", 444 | "
\n", 560 | " | gender | \n", 561 | "age | \n", 562 | "hypertension | \n", 563 | "heart_disease | \n", 564 | "ever_married | \n", 565 | "work_type | \n", 566 | "Residence_type | \n", 567 | "avg_glucose_level | \n", 568 | "bmi | \n", 569 | "smoking_status | \n", 570 | "stroke | \n", 571 | "
---|---|---|---|---|---|---|---|---|---|---|---|
3520 | \n", 576 | "Male | \n", 577 | "18.0 | \n", 578 | "0 | \n", 579 | "0 | \n", 580 | "No | \n", 581 | "Self-employed | \n", 582 | "Urban | \n", 583 | "74.00 | \n", 584 | "23.7 | \n", 585 | "Unknown | \n", 586 | "0 | \n", 587 | "
4494 | \n", 590 | "Male | \n", 591 | "54.0 | \n", 592 | "0 | \n", 593 | "0 | \n", 594 | "No | \n", 595 | "Private | \n", 596 | "Rural | \n", 597 | "106.52 | \n", 598 | "27.4 | \n", 599 | "formerly smoked | \n", 600 | "0 | \n", 601 | "
154 | \n", 604 | "Female | \n", 605 | "55.0 | \n", 606 | "0 | \n", 607 | "0 | \n", 608 | "Yes | \n", 609 | "Self-employed | \n", 610 | "Rural | \n", 611 | "92.98 | \n", 612 | "25.6 | \n", 613 | "never smoked | \n", 614 | "1 | \n", 615 | "
119 | \n", 618 | "Female | \n", 619 | "77.0 | \n", 620 | "0 | \n", 621 | "0 | \n", 622 | "Yes | \n", 623 | "Private | \n", 624 | "Urban | \n", 625 | "105.22 | \n", 626 | "31.0 | \n", 627 | "never smoked | \n", 628 | "1 | \n", 629 | "
38 | \n", 632 | "Male | \n", 633 | "58.0 | \n", 634 | "0 | \n", 635 | "0 | \n", 636 | "No | \n", 637 | "Private | \n", 638 | "Rural | \n", 639 | "92.62 | \n", 640 | "32.0 | \n", 641 | "Unknown | \n", 642 | "1 | \n", 643 | "
... | \n", 646 | "... | \n", 647 | "... | \n", 648 | "... | \n", 649 | "... | \n", 650 | "... | \n", 651 | "... | \n", 652 | "... | \n", 653 | "... | \n", 654 | "... | \n", 655 | "... | \n", 656 | "... | \n", 657 | "
2377 | \n", 660 | "Female | \n", 661 | "28.0 | \n", 662 | "0 | \n", 663 | "0 | \n", 664 | "Yes | \n", 665 | "Private | \n", 666 | "Rural | \n", 667 | "97.06 | \n", 668 | "23.2 | \n", 669 | "Unknown | \n", 670 | "0 | \n", 671 | "
82 | \n", 674 | "Female | \n", 675 | "79.0 | \n", 676 | "0 | \n", 677 | "0 | \n", 678 | "No | \n", 679 | "Private | \n", 680 | "Rural | \n", 681 | "88.92 | \n", 682 | "22.9 | \n", 683 | "never smoked | \n", 684 | "1 | \n", 685 | "
1572 | \n", 688 | "Male | \n", 689 | "12.0 | \n", 690 | "0 | \n", 691 | "0 | \n", 692 | "No | \n", 693 | "children | \n", 694 | "Urban | \n", 695 | "64.08 | \n", 696 | "18.2 | \n", 697 | "Unknown | \n", 698 | "0 | \n", 699 | "
4112 | \n", 702 | "Female | \n", 703 | "36.0 | \n", 704 | "0 | \n", 705 | "0 | \n", 706 | "Yes | \n", 707 | "Private | \n", 708 | "Urban | \n", 709 | "72.16 | \n", 710 | "23.2 | \n", 711 | "never smoked | \n", 712 | "0 | \n", 713 | "
152 | \n", 716 | "Female | \n", 717 | "80.0 | \n", 718 | "0 | \n", 719 | "0 | \n", 720 | "Yes | \n", 721 | "Self-employed | \n", 722 | "Urban | \n", 723 | "76.57 | \n", 724 | "34.1 | \n", 725 | "never smoked | \n", 726 | "1 | \n", 727 | "
9722 rows × 11 columns
\n", 731 | "\n", 827 | " | gender | \n", 828 | "age | \n", 829 | "hypertension | \n", 830 | "heart_disease | \n", 831 | "ever_married | \n", 832 | "work_type | \n", 833 | "Residence_type | \n", 834 | "avg_glucose_level | \n", 835 | "bmi | \n", 836 | "smoking_status | \n", 837 | "
---|---|---|---|---|---|---|---|---|---|---|
3520 | \n", 842 | "Male | \n", 843 | "18.0 | \n", 844 | "0 | \n", 845 | "0 | \n", 846 | "No | \n", 847 | "Self-employed | \n", 848 | "Urban | \n", 849 | "74.00 | \n", 850 | "23.7 | \n", 851 | "Unknown | \n", 852 | "
4494 | \n", 855 | "Male | \n", 856 | "54.0 | \n", 857 | "0 | \n", 858 | "0 | \n", 859 | "No | \n", 860 | "Private | \n", 861 | "Rural | \n", 862 | "106.52 | \n", 863 | "27.4 | \n", 864 | "formerly smoked | \n", 865 | "
154 | \n", 868 | "Female | \n", 869 | "55.0 | \n", 870 | "0 | \n", 871 | "0 | \n", 872 | "Yes | \n", 873 | "Self-employed | \n", 874 | "Rural | \n", 875 | "92.98 | \n", 876 | "25.6 | \n", 877 | "never smoked | \n", 878 | "
119 | \n", 881 | "Female | \n", 882 | "77.0 | \n", 883 | "0 | \n", 884 | "0 | \n", 885 | "Yes | \n", 886 | "Private | \n", 887 | "Urban | \n", 888 | "105.22 | \n", 889 | "31.0 | \n", 890 | "never smoked | \n", 891 | "
38 | \n", 894 | "Male | \n", 895 | "58.0 | \n", 896 | "0 | \n", 897 | "0 | \n", 898 | "No | \n", 899 | "Private | \n", 900 | "Rural | \n", 901 | "92.62 | \n", 902 | "32.0 | \n", 903 | "Unknown | \n", 904 | "
... | \n", 907 | "... | \n", 908 | "... | \n", 909 | "... | \n", 910 | "... | \n", 911 | "... | \n", 912 | "... | \n", 913 | "... | \n", 914 | "... | \n", 915 | "... | \n", 916 | "... | \n", 917 | "
2377 | \n", 920 | "Female | \n", 921 | "28.0 | \n", 922 | "0 | \n", 923 | "0 | \n", 924 | "Yes | \n", 925 | "Private | \n", 926 | "Rural | \n", 927 | "97.06 | \n", 928 | "23.2 | \n", 929 | "Unknown | \n", 930 | "
82 | \n", 933 | "Female | \n", 934 | "79.0 | \n", 935 | "0 | \n", 936 | "0 | \n", 937 | "No | \n", 938 | "Private | \n", 939 | "Rural | \n", 940 | "88.92 | \n", 941 | "22.9 | \n", 942 | "never smoked | \n", 943 | "
1572 | \n", 946 | "Male | \n", 947 | "12.0 | \n", 948 | "0 | \n", 949 | "0 | \n", 950 | "No | \n", 951 | "children | \n", 952 | "Urban | \n", 953 | "64.08 | \n", 954 | "18.2 | \n", 955 | "Unknown | \n", 956 | "
4112 | \n", 959 | "Female | \n", 960 | "36.0 | \n", 961 | "0 | \n", 962 | "0 | \n", 963 | "Yes | \n", 964 | "Private | \n", 965 | "Urban | \n", 966 | "72.16 | \n", 967 | "23.2 | \n", 968 | "never smoked | \n", 969 | "
152 | \n", 972 | "Female | \n", 973 | "80.0 | \n", 974 | "0 | \n", 975 | "0 | \n", 976 | "Yes | \n", 977 | "Self-employed | \n", 978 | "Urban | \n", 979 | "76.57 | \n", 980 | "34.1 | \n", 981 | "never smoked | \n", 982 | "
9722 rows × 10 columns
\n", 986 | "RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier()
\n", 75 | " | method | \n", 76 | "number | \n", 77 | "orbital_period | \n", 78 | "mass | \n", 79 | "distance | \n", 80 | "year | \n", 81 | "
---|---|---|---|---|---|---|
0 | \n", 86 | "Radial Velocity | \n", 87 | "1 | \n", 88 | "269.300000 | \n", 89 | "7.10 | \n", 90 | "77.40 | \n", 91 | "2006 | \n", 92 | "
1 | \n", 95 | "Radial Velocity | \n", 96 | "1 | \n", 97 | "874.774000 | \n", 98 | "2.21 | \n", 99 | "56.95 | \n", 100 | "2008 | \n", 101 | "
2 | \n", 104 | "Radial Velocity | \n", 105 | "1 | \n", 106 | "763.000000 | \n", 107 | "2.60 | \n", 108 | "19.84 | \n", 109 | "2011 | \n", 110 | "
3 | \n", 113 | "Radial Velocity | \n", 114 | "1 | \n", 115 | "326.030000 | \n", 116 | "19.40 | \n", 117 | "110.62 | \n", 118 | "2007 | \n", 119 | "
4 | \n", 122 | "Radial Velocity | \n", 123 | "1 | \n", 124 | "516.220000 | \n", 125 | "10.50 | \n", 126 | "119.47 | \n", 127 | "2009 | \n", 128 | "
... | \n", 131 | "... | \n", 132 | "... | \n", 133 | "... | \n", 134 | "... | \n", 135 | "... | \n", 136 | "... | \n", 137 | "
1030 | \n", 140 | "Transit | \n", 141 | "1 | \n", 142 | "3.941507 | \n", 143 | "NaN | \n", 144 | "172.00 | \n", 145 | "2006 | \n", 146 | "
1031 | \n", 149 | "Transit | \n", 150 | "1 | \n", 151 | "2.615864 | \n", 152 | "NaN | \n", 153 | "148.00 | \n", 154 | "2007 | \n", 155 | "
1032 | \n", 158 | "Transit | \n", 159 | "1 | \n", 160 | "3.191524 | \n", 161 | "NaN | \n", 162 | "174.00 | \n", 163 | "2007 | \n", 164 | "
1033 | \n", 167 | "Transit | \n", 168 | "1 | \n", 169 | "4.125083 | \n", 170 | "NaN | \n", 171 | "293.00 | \n", 172 | "2008 | \n", 173 | "
1034 | \n", 176 | "Transit | \n", 177 | "1 | \n", 178 | "4.187757 | \n", 179 | "NaN | \n", 180 | "260.00 | \n", 181 | "2008 | \n", 182 | "
1035 rows × 6 columns
\n", 186 | "\n", 261 | " | number | \n", 262 | "orbital_period | \n", 263 | "mass | \n", 264 | "distance | \n", 265 | "year | \n", 266 | "
---|---|---|---|---|---|
0 | \n", 271 | "1 | \n", 272 | "269.30000 | \n", 273 | "7.100 | \n", 274 | "77.40 | \n", 275 | "2006 | \n", 276 | "
1 | \n", 279 | "1 | \n", 280 | "874.77400 | \n", 281 | "2.210 | \n", 282 | "56.95 | \n", 283 | "2008 | \n", 284 | "
2 | \n", 287 | "1 | \n", 288 | "763.00000 | \n", 289 | "2.600 | \n", 290 | "19.84 | \n", 291 | "2011 | \n", 292 | "
3 | \n", 295 | "1 | \n", 296 | "326.03000 | \n", 297 | "19.400 | \n", 298 | "110.62 | \n", 299 | "2007 | \n", 300 | "
4 | \n", 303 | "1 | \n", 304 | "516.22000 | \n", 305 | "10.500 | \n", 306 | "119.47 | \n", 307 | "2009 | \n", 308 | "
... | \n", 311 | "... | \n", 312 | "... | \n", 313 | "... | \n", 314 | "... | \n", 315 | "... | \n", 316 | "
640 | \n", 319 | "1 | \n", 320 | "111.70000 | \n", 321 | "2.100 | \n", 322 | "14.90 | \n", 323 | "2009 | \n", 324 | "
641 | \n", 327 | "1 | \n", 328 | "5.05050 | \n", 329 | "1.068 | \n", 330 | "44.46 | \n", 331 | "2013 | \n", 332 | "
642 | \n", 335 | "1 | \n", 336 | "311.28800 | \n", 337 | "1.940 | \n", 338 | "17.24 | \n", 339 | "1999 | \n", 340 | "
649 | \n", 343 | "1 | \n", 344 | "2.70339 | \n", 345 | "1.470 | \n", 346 | "178.00 | \n", 347 | "2013 | \n", 348 | "
784 | \n", 351 | "3 | \n", 352 | "580.00000 | \n", 353 | "0.947 | \n", 354 | "135.00 | \n", 355 | "2012 | \n", 356 | "
498 rows × 5 columns
\n", 360 | "\n", 498 | " | F1 | \n", 499 | "F2 | \n", 500 | "F3 | \n", 501 | "
---|---|---|---|
0 | \n", 506 | "-566.504590 | \n", 507 | "24.780669 | \n", 508 | "3.449467 | \n", 509 | "
1 | \n", 512 | "38.989887 | \n", 513 | "4.932340 | \n", 514 | "-0.616922 | \n", 515 | "
2 | \n", 518 | "-72.741937 | \n", 519 | "-32.223337 | \n", 520 | "-3.343070 | \n", 521 | "
3 | \n", 524 | "-509.805791 | \n", 525 | "58.337215 | \n", 526 | "8.138673 | \n", 527 | "
4 | \n", 530 | "-319.629461 | \n", 531 | "67.231795 | \n", 532 | "2.486263 | \n", 533 | "
... | \n", 536 | "... | \n", 537 | "... | \n", 538 | "... | \n", 539 | "
493 | \n", 542 | "-724.036578 | \n", 543 | "-37.923212 | \n", 544 | "-1.785338 | \n", 545 | "
494 | \n", 548 | "-830.718586 | \n", 549 | "-8.445276 | \n", 550 | "-5.654267 | \n", 551 | "
495 | \n", 554 | "-524.453396 | \n", 555 | "-35.544418 | \n", 556 | "7.117729 | \n", 557 | "
496 | \n", 560 | "-833.214190 | \n", 561 | "125.044259 | \n", 562 | "-4.721034 | \n", 563 | "
497 | \n", 566 | "-255.870669 | \n", 567 | "82.653148 | \n", 568 | "-4.432730 | \n", 569 | "
498 rows × 3 columns
\n", 573 | "