├── README.md └── Predict_survival.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # predict_survival 2 | 3 | The data has been split into two groups: 4 | 5 | training set (train.csv) 6 | test set (test.csv) 7 | The training set should be used to build your machine learning models. For the training set, we provide the outcome (also known as the “ground truth”) for each passenger. Your model will be based on “features” like passengers’ gender and class. You can also use feature engineering to create new features. 8 | 9 | The test set should be used to see how well your model performs on unseen data. For the test set, we do not provide the ground truth for each passenger. It is your job to predict these outcomes. For each passenger in the test set, use the model you trained to predict whether or not they survived the sinking of the Titanic. 10 | 11 | We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like. 12 | -------------------------------------------------------------------------------- /Predict_survival.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Import data wrangling and analytics library" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 7, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sns\n", 20 | "import os\n", 21 | "\n", 22 | "\n", 23 | "from sklearn.model_selection import train_test_split\n", 24 | "from sklearn.ensemble import RandomForestClassifier\n", 25 | "from sklearn.linear_model import LogisticRegression\n", 26 | "from sklearn.naive_bayes import GaussianNB\n", 27 | "from sklearn.tree import DecisionTreeClassifier\n", 28 | "from sklearn.neighbors import KNeighborsClassifier\n", 29 | "from sklearn.preprocessing import MinMaxScaler\n", 30 | "from sklearn import metrics\n", 31 | "from sklearn.preprocessing import LabelEncoder\n", 32 | "from sklearn.feature_selection import RFE\n", 33 | "from sklearn import metrics\n", 34 | "\n", 35 | "\n", 36 | "import warnings\n", 37 | "from sklearn.exceptions import DataConversionWarning\n", 38 | "warnings.filterwarnings(action='ignore', category=UserWarning)\n", 39 | "\n", 40 | "\n", 41 | "import warnings\n", 42 | "from sklearn.exceptions import DataConversionWarning\n", 43 | "warnings.filterwarnings(action='ignore', category=DataConversionWarning)\n", 44 | "\n", 45 | "import warnings\n", 46 | "from sklearn.exceptions import DataConversionWarning\n", 47 | "warnings.filterwarnings(action='ignore', category=DataConversionWarning)\n", 48 | "\n", 49 | "import warnings\n", 50 | "warnings.simplefilter(action='ignore', category=FutureWarning)\n" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "# Data read and Analysis" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 8, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "trainDf = pd.read_csv('train.csv')\n", 67 | "testDf = pd.read_csv('test.csv')" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 9, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/html": [ 78 | "
\n", 79 | "\n", 92 | "\n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", 188 | "
" 189 | ], 190 | "text/plain": [ 191 | " PassengerId Survived Pclass \\\n", 192 | "0 1 0 3 \n", 193 | "1 2 1 1 \n", 194 | "2 3 1 3 \n", 195 | "3 4 1 1 \n", 196 | "4 5 0 3 \n", 197 | "\n", 198 | " Name Sex Age SibSp \\\n", 199 | "0 Braund, Mr. Owen Harris male 22.0 1 \n", 200 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", 201 | "2 Heikkinen, Miss. Laina female 26.0 0 \n", 202 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", 203 | "4 Allen, Mr. William Henry male 35.0 0 \n", 204 | "\n", 205 | " Parch Ticket Fare Cabin Embarked \n", 206 | "0 0 A/5 21171 7.2500 NaN S \n", 207 | "1 0 PC 17599 71.2833 C85 C \n", 208 | "2 0 STON/O2. 3101282 7.9250 NaN S \n", 209 | "3 0 113803 53.1000 C123 S \n", 210 | "4 0 373450 8.0500 NaN S " 211 | ] 212 | }, 213 | "execution_count": 9, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "trainDf.head()" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 10, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "\n", 232 | "RangeIndex: 891 entries, 0 to 890\n", 233 | "Data columns (total 12 columns):\n", 234 | " # Column Non-Null Count Dtype \n", 235 | "--- ------ -------------- ----- \n", 236 | " 0 PassengerId 891 non-null int64 \n", 237 | " 1 Survived 891 non-null int64 \n", 238 | " 2 Pclass 891 non-null int64 \n", 239 | " 3 Name 891 non-null object \n", 240 | " 4 Sex 891 non-null object \n", 241 | " 5 Age 714 non-null float64\n", 242 | " 6 SibSp 891 non-null int64 \n", 243 | " 7 Parch 891 non-null int64 \n", 244 | " 8 Ticket 891 non-null object \n", 245 | " 9 Fare 891 non-null float64\n", 246 | " 10 Cabin 204 non-null object \n", 247 | " 11 Embarked 889 non-null object \n", 248 | "dtypes: float64(2), int64(5), object(5)\n", 249 | "memory usage: 83.7+ KB\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "trainDf.info()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 11, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "\n", 267 | "RangeIndex: 418 entries, 0 to 417\n", 268 | "Data columns (total 11 columns):\n", 269 | " # Column Non-Null Count Dtype \n", 270 | "--- ------ -------------- ----- \n", 271 | " 0 PassengerId 418 non-null int64 \n", 272 | " 1 Pclass 418 non-null int64 \n", 273 | " 2 Name 418 non-null object \n", 274 | " 3 Sex 418 non-null object \n", 275 | " 4 Age 332 non-null float64\n", 276 | " 5 SibSp 418 non-null int64 \n", 277 | " 6 Parch 418 non-null int64 \n", 278 | " 7 Ticket 418 non-null object \n", 279 | " 8 Fare 417 non-null float64\n", 280 | " 9 Cabin 91 non-null object \n", 281 | " 10 Embarked 418 non-null object \n", 282 | "dtypes: float64(2), int64(4), object(5)\n", 283 | "memory usage: 36.0+ KB\n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "testDf.info()" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 6, 294 | "metadata": { 295 | "scrolled": true 296 | }, 297 | "outputs": [ 298 | { 299 | "ename": "KeyboardInterrupt", 300 | "evalue": "", 301 | "output_type": "error", 302 | "traceback": [ 303 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 304 | "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 305 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrainDf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 306 | "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas_profiling\\__init__.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfig\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mConfig\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcontroller\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas_decorator\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofile_report\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mversion\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0m__version__\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 307 | "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas_profiling\\controller\\pandas_decorator.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofile_report\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 308 | "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas_profiling\\profile_report.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 18\u001b[0m )\n\u001b[0;32m 19\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mserialize_report\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mSerializeReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 20\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdataframe\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mhash_dataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrename_index\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 21\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpaths\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mget_config\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 309 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap.py\u001b[0m in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n", 310 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap.py\u001b[0m in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n", 311 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap.py\u001b[0m in \u001b[0;36m_load_unlocked\u001b[1;34m(spec)\u001b[0m\n", 312 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap_external.py\u001b[0m in \u001b[0;36mexec_module\u001b[1;34m(self, module)\u001b[0m\n", 313 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap_external.py\u001b[0m in \u001b[0;36mget_code\u001b[1;34m(self, fullname)\u001b[0m\n", 314 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap_external.py\u001b[0m in \u001b[0;36mget_data\u001b[1;34m(self, path)\u001b[0m\n", 315 | "\u001b[1;31mKeyboardInterrupt\u001b[0m: " 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "from pandas_profiling import ProfileReport\n", 321 | "ProfileReport(trainDf)" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "# Data Cleaning" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 12, 334 | "metadata": { 335 | "scrolled": true 336 | }, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/plain": [ 341 | "PassengerId 0\n", 342 | "Survived 0\n", 343 | "Pclass 0\n", 344 | "Name 0\n", 345 | "Sex 0\n", 346 | "Age 177\n", 347 | "SibSp 0\n", 348 | "Parch 0\n", 349 | "Ticket 0\n", 350 | "Fare 0\n", 351 | "Cabin 687\n", 352 | "Embarked 2\n", 353 | "dtype: int64" 354 | ] 355 | }, 356 | "execution_count": 12, 357 | "metadata": {}, 358 | "output_type": "execute_result" 359 | } 360 | ], 361 | "source": [ 362 | "trainDf.isna().sum()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 13, 368 | "metadata": { 369 | "scrolled": true 370 | }, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "text/plain": [ 375 | "PassengerId 0\n", 376 | "Pclass 0\n", 377 | "Name 0\n", 378 | "Sex 0\n", 379 | "Age 86\n", 380 | "SibSp 0\n", 381 | "Parch 0\n", 382 | "Ticket 0\n", 383 | "Fare 1\n", 384 | "Cabin 327\n", 385 | "Embarked 0\n", 386 | "dtype: int64" 387 | ] 388 | }, 389 | "execution_count": 13, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "testDf.isnull().sum()" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 14, 401 | "metadata": {}, 402 | "outputs": [ 403 | { 404 | "data": { 405 | "text/html": [ 406 | "
\n", 407 | "\n", 420 | "\n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
616211Icard, Miss. Ameliefemale38.00011357280.0B28NaN
82983011Stone, Mrs. George Nelson (Martha Evelyn)female62.00011357280.0B28NaN
\n", 471 | "
" 472 | ], 473 | "text/plain": [ 474 | " PassengerId Survived Pclass Name \\\n", 475 | "61 62 1 1 Icard, Miss. Amelie \n", 476 | "829 830 1 1 Stone, Mrs. George Nelson (Martha Evelyn) \n", 477 | "\n", 478 | " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", 479 | "61 female 38.0 0 0 113572 80.0 B28 NaN \n", 480 | "829 female 62.0 0 0 113572 80.0 B28 NaN " 481 | ] 482 | }, 483 | "execution_count": 14, 484 | "metadata": {}, 485 | "output_type": "execute_result" 486 | } 487 | ], 488 | "source": [ 489 | "trainDf[trainDf[\"Embarked\"].isnull()]" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 15, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "text/html": [ 500 | "
\n", 501 | "\n", 514 | "\n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
\n", 535 | "
" 536 | ], 537 | "text/plain": [ 538 | "Empty DataFrame\n", 539 | "Columns: [PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked]\n", 540 | "Index: []" 541 | ] 542 | }, 543 | "execution_count": 15, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [ 549 | "trainDf[\"Embarked\"] = trainDf[\"Embarked\"].fillna(\"C\")\n", 550 | "trainDf[trainDf[\"Embarked\"].isnull()]" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 16, 556 | "metadata": {}, 557 | "outputs": [ 558 | { 559 | "data": { 560 | "text/plain": [ 561 | "PassengerId 0\n", 562 | "Survived 0\n", 563 | "Pclass 0\n", 564 | "Name 0\n", 565 | "Sex 0\n", 566 | "Age 177\n", 567 | "SibSp 0\n", 568 | "Parch 0\n", 569 | "Ticket 0\n", 570 | "Fare 0\n", 571 | "Cabin 687\n", 572 | "Embarked 0\n", 573 | "dtype: int64" 574 | ] 575 | }, 576 | "execution_count": 16, 577 | "metadata": {}, 578 | "output_type": "execute_result" 579 | } 580 | ], 581 | "source": [ 582 | "trainDf.isnull().sum()" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 17, 588 | "metadata": {}, 589 | "outputs": [ 590 | { 591 | "data": { 592 | "text/html": [ 593 | "
\n", 594 | "\n", 607 | "\n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | "
PassengerIdPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
15210443Storey, Mr. Thomasmale60.5003701NaNNaNS
\n", 641 | "
" 642 | ], 643 | "text/plain": [ 644 | " PassengerId Pclass Name Sex Age SibSp Parch Ticket \\\n", 645 | "152 1044 3 Storey, Mr. Thomas male 60.5 0 0 3701 \n", 646 | "\n", 647 | " Fare Cabin Embarked \n", 648 | "152 NaN NaN S " 649 | ] 650 | }, 651 | "execution_count": 17, 652 | "metadata": {}, 653 | "output_type": "execute_result" 654 | } 655 | ], 656 | "source": [ 657 | "testDf[testDf[\"Fare\"].isnull()]" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 18, 663 | "metadata": {}, 664 | "outputs": [ 665 | { 666 | "data": { 667 | "text/html": [ 668 | "
\n", 669 | "\n", 682 | "\n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | "
PassengerIdPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
\n", 702 | "
" 703 | ], 704 | "text/plain": [ 705 | "Empty DataFrame\n", 706 | "Columns: [PassengerId, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked]\n", 707 | "Index: []" 708 | ] 709 | }, 710 | "execution_count": 18, 711 | "metadata": {}, 712 | "output_type": "execute_result" 713 | } 714 | ], 715 | "source": [ 716 | "testDf[\"Fare\"] = testDf[\"Fare\"].fillna(np.mean(testDf[testDf[\"Pclass\"] == 3][\"Fare\"]))\n", 717 | "testDf[testDf[\"Fare\"].isnull()]" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 19, 723 | "metadata": {}, 724 | "outputs": [ 725 | { 726 | "data": { 727 | "text/plain": [ 728 | "PassengerId 0\n", 729 | "Survived 0\n", 730 | "Pclass 0\n", 731 | "Name 0\n", 732 | "Sex 0\n", 733 | "SibSp 0\n", 734 | "Parch 0\n", 735 | "Ticket 0\n", 736 | "Fare 0\n", 737 | "Embarked 0\n", 738 | "dtype: int64" 739 | ] 740 | }, 741 | "execution_count": 19, 742 | "metadata": {}, 743 | "output_type": "execute_result" 744 | } 745 | ], 746 | "source": [ 747 | "trainDf.dropna(axis = 1, how = 'any', inplace = True)\n", 748 | "trainDf.isnull().sum()" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": 20, 754 | "metadata": {}, 755 | "outputs": [ 756 | { 757 | "data": { 758 | "text/plain": [ 759 | "PassengerId 0\n", 760 | "Pclass 0\n", 761 | "Name 0\n", 762 | "Sex 0\n", 763 | "SibSp 0\n", 764 | "Parch 0\n", 765 | "Ticket 0\n", 766 | "Fare 0\n", 767 | "Embarked 0\n", 768 | "dtype: int64" 769 | ] 770 | }, 771 | "execution_count": 20, 772 | "metadata": {}, 773 | "output_type": "execute_result" 774 | } 775 | ], 776 | "source": [ 777 | "testDf.dropna(axis = 1, how = 'any', inplace = True)\n", 778 | "testDf.isnull().sum()" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 21, 784 | "metadata": {}, 785 | "outputs": [ 786 | { 787 | "data": { 788 | "text/html": [ 789 | "
\n", 790 | "\n", 803 | "\n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | "
PassengerIdSurvivedPclassNameSexSibSpParchTicketFareEmbarked
0103Braund, Mr. Owen Harrismale10A/5 211717.2500S
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female10PC 1759971.2833C
2313Heikkinen, Miss. Lainafemale00STON/O2. 31012827.9250S
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female1011380353.1000S
4503Allen, Mr. William Henrymale003734508.0500S
5603Moran, Mr. Jamesmale003308778.4583Q
6701McCarthy, Mr. Timothy Jmale001746351.8625S
7803Palsson, Master. Gosta Leonardmale3134990921.0750S
8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female0234774211.1333S
91012Nasser, Mrs. Nicholas (Adele Achem)female1023773630.0708C
\n", 952 | "
" 953 | ], 954 | "text/plain": [ 955 | " PassengerId Survived Pclass \\\n", 956 | "0 1 0 3 \n", 957 | "1 2 1 1 \n", 958 | "2 3 1 3 \n", 959 | "3 4 1 1 \n", 960 | "4 5 0 3 \n", 961 | "5 6 0 3 \n", 962 | "6 7 0 1 \n", 963 | "7 8 0 3 \n", 964 | "8 9 1 3 \n", 965 | "9 10 1 2 \n", 966 | "\n", 967 | " Name Sex SibSp Parch \\\n", 968 | "0 Braund, Mr. Owen Harris male 1 0 \n", 969 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 1 0 \n", 970 | "2 Heikkinen, Miss. Laina female 0 0 \n", 971 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 1 0 \n", 972 | "4 Allen, Mr. William Henry male 0 0 \n", 973 | "5 Moran, Mr. James male 0 0 \n", 974 | "6 McCarthy, Mr. Timothy J male 0 0 \n", 975 | "7 Palsson, Master. Gosta Leonard male 3 1 \n", 976 | "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 0 2 \n", 977 | "9 Nasser, Mrs. Nicholas (Adele Achem) female 1 0 \n", 978 | "\n", 979 | " Ticket Fare Embarked \n", 980 | "0 A/5 21171 7.2500 S \n", 981 | "1 PC 17599 71.2833 C \n", 982 | "2 STON/O2. 3101282 7.9250 S \n", 983 | "3 113803 53.1000 S \n", 984 | "4 373450 8.0500 S \n", 985 | "5 330877 8.4583 Q \n", 986 | "6 17463 51.8625 S \n", 987 | "7 349909 21.0750 S \n", 988 | "8 347742 11.1333 S \n", 989 | "9 237736 30.0708 C " 990 | ] 991 | }, 992 | "execution_count": 21, 993 | "metadata": {}, 994 | "output_type": "execute_result" 995 | } 996 | ], 997 | "source": [ 998 | "trainDf.head(10)" 999 | ] 1000 | }, 1001 | { 1002 | "cell_type": "code", 1003 | "execution_count": 22, 1004 | "metadata": {}, 1005 | "outputs": [], 1006 | "source": [ 1007 | "# set the PassengerId as index and drop the variable Name due to unique value \n", 1008 | "trainDf.set_index('PassengerId', inplace=True)\n", 1009 | "trainDf.drop(['Name', 'Ticket'], axis=1, inplace=True)" 1010 | ] 1011 | }, 1012 | { 1013 | "cell_type": "code", 1014 | "execution_count": 23, 1015 | "metadata": {}, 1016 | "outputs": [], 1017 | "source": [ 1018 | "# set the PassengerId as index and drop the variable Name due to unique value \n", 1019 | "testDf.set_index('PassengerId', inplace=True)\n", 1020 | "testDf.drop(['Name', 'Ticket'], axis=1, inplace=True)" 1021 | ] 1022 | }, 1023 | { 1024 | "cell_type": "code", 1025 | "execution_count": 24, 1026 | "metadata": {}, 1027 | "outputs": [ 1028 | { 1029 | "data": { 1030 | "text/html": [ 1031 | "
\n", 1032 | "\n", 1045 | "\n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | "
SurvivedPclassSexSibSpParchFareEmbarked
PassengerId
103male107.2500S
211female1071.2833C
313female007.9250S
411female1053.1000S
503male008.0500S
........................
88702male0013.0000S
88811female0030.0000S
88903female1223.4500S
89011male0030.0000C
89103male007.7500Q
\n", 1181 | "

891 rows × 7 columns

\n", 1182 | "
" 1183 | ], 1184 | "text/plain": [ 1185 | " Survived Pclass Sex SibSp Parch Fare Embarked\n", 1186 | "PassengerId \n", 1187 | "1 0 3 male 1 0 7.2500 S\n", 1188 | "2 1 1 female 1 0 71.2833 C\n", 1189 | "3 1 3 female 0 0 7.9250 S\n", 1190 | "4 1 1 female 1 0 53.1000 S\n", 1191 | "5 0 3 male 0 0 8.0500 S\n", 1192 | "... ... ... ... ... ... ... ...\n", 1193 | "887 0 2 male 0 0 13.0000 S\n", 1194 | "888 1 1 female 0 0 30.0000 S\n", 1195 | "889 0 3 female 1 2 23.4500 S\n", 1196 | "890 1 1 male 0 0 30.0000 C\n", 1197 | "891 0 3 male 0 0 7.7500 Q\n", 1198 | "\n", 1199 | "[891 rows x 7 columns]" 1200 | ] 1201 | }, 1202 | "execution_count": 24, 1203 | "metadata": {}, 1204 | "output_type": "execute_result" 1205 | } 1206 | ], 1207 | "source": [ 1208 | "trainDf" 1209 | ] 1210 | }, 1211 | { 1212 | "cell_type": "markdown", 1213 | "metadata": {}, 1214 | "source": [ 1215 | "# Exploratory data analysis (EDA)" 1216 | ] 1217 | }, 1218 | { 1219 | "cell_type": "code", 1220 | "execution_count": 25, 1221 | "metadata": { 1222 | "scrolled": true 1223 | }, 1224 | "outputs": [ 1225 | { 1226 | "data": { 1227 | "text/plain": [ 1228 | "" 1229 | ] 1230 | }, 1231 | "execution_count": 25, 1232 | "metadata": {}, 1233 | "output_type": "execute_result" 1234 | }, 1235 | { 1236 | "data": { 1237 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEICAYAAACwDehOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAARfklEQVR4nO3de7DndV3H8ecLFjVRuchpo11sTRkduihwQkybMSkvVEKmaKksuNPWRGTZjWzKxkvZTQMrakfUxTGV0GRzLNsQb3nJs4EgoLGRxm4gxxtgjDjouz9+n/34Yzm7/Hbhe36Hc56Pmd/8vt/P9/P7/N5n9rv72u/nezmpKiRJAjhg2gVIkpYOQ0GS1BkKkqTOUJAkdYaCJKkzFCRJ3aChkOTQJBcn+UySa5M8McnhSbYmua69H9b6Jsl5SbYnuTLJcUPWJkm6uwx5n0KSzcCHq+oNSR4APBh4GfDlqnpNknOAw6rqt5OcDJwNnAw8ATi3qp6wt/GPOOKIWrdu3WD1S9JytG3bti9W1cxC2wYLhSSHAFcA31tjX5Lks8BTqurGJEcCH6iqxyT527b8tt377ek7Zmdna25ubpD6JWm5SrKtqmYX2jbk9NEjgXngTUkuT/KGJAcDq8f+ob8JWN2W1wA3jH1+R2u7iyQbk8wlmZufnx+wfElaeYYMhVXAccD5VXUs8H/AOeMd2hHEPh2qVNWmqpqtqtmZmQWPfiRJ+2nIUNgB7KiqT7T1ixmFxBfatBHt/ea2fSdw1Njn17Y2SdIiGSwUquom4IYkj2lNJwHXAFuA9a1tPXBJW94CnN6uQjoRuGVv5xMkSfe9VQOPfzbw1nbl0fXAmYyC6KIkG4DPA6e1vu9ldOXRduD21leStIgGDYWqugJY6Az3SQv0LeCsIeuRJO2ddzRLkjpDQZLUGQqSpG7oE81L3vG/eeG0S9AStO1PT592CdJUeKQgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkbNBSSfC7JVUmuSDLX2g5PsjXJde39sNaeJOcl2Z7kyiTHDVmbJOnuFuNI4Uer6vFVNdvWzwEuraqjgUvbOsAzgaPbayNw/iLUJkkaM43po1OAzW15M3DqWPuFNfJx4NAkR06hPklasYYOhQL+Jcm2JBtb2+qqurEt3wSsbstrgBvGPrujtd1Fko1J5pLMzc/PD1W3JK1IqwYe/8lVtTPJdwJbk3xmfGNVVZLalwGrahOwCWB2dnafPitJ2rtBjxSqamd7vxn4B+AE4Au7poXa+82t+07gqLGPr21tkqRFMlgoJDk4yUN3LQNPAz4NbAHWt27rgUva8hbg9HYV0onALWPTTJKkRTDk9NFq4B+S7Pqev6uqf07ySeCiJBuAzwOntf7vBU4GtgO3A2cOWJskaQGDhUJVXQ88boH2LwEnLdBewFlD1SNJumfe0SxJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSusFDIcmBSS5P8p62/sgkn0iyPck7kjygtT+wrW9v29cNXZsk6a4W40jhJcC1Y+t/DLyuqh4NfAXY0No3AF9p7a9r/SRJi2jQUEiyFvgJ4A1tPcBTgYtbl83AqW35lLZO235S6y9JWiRDHyn8BfBbwLfa+sOBr1bVnW19B7CmLa8BbgBo229p/e8iycYkc0nm5ufnByxdklaewUIhyU8CN1fVtvty3KraVFWzVTU7MzNzXw4tSSveqgHHfhLwrCQnAw8CHgacCxyaZFU7GlgL7Gz9dwJHATuSrAIOAb40YH2SpN0MdqRQVb9TVWurah3wfOD9VfUC4DLgOa3beuCStrylrdO2v7+qaqj6JEl3N437FH4beGmS7YzOGVzQ2i8AHt7aXwqcM4XaJGlFG3L6qKuqDwAfaMvXAycs0OfrwHMXox5J0sK8o1mS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkbqJQSHLpJG2SpPu3vf6O5iQPAh4MHJHkMCBt08OANQPXJklaZHsNBeAXgF8FvhvYxrdD4VbgL4crS5I0DXsNhao6Fzg3ydlV9fpFqkmSNCX3dKQAQFW9PskPA+vGP1NVFw5UlyRpCiYKhSRvAR4FXAF8szUXYChI0jIyUSgAs8AxVVVDFiNJmq5JQ+HTwHcBNw5Yi6Qx//OKH5h2CVqCHvH7Vw06/qShcARwTZJ/B+7Y1VhVzxqkKknSVEwaCn+wrwO3exw+BDywfc/FVfXyJI8E3g48nNFlri+qqm8keSCjcxTHA18CnldVn9vX75Uk7b9Jrz764H6MfQfw1Kr6WpKDgI8k+SfgpcDrqurtSf4G2ACc396/UlWPTvJ84I+B5+3H90qS9tOkj7m4Lcmt7fX1JN9McuvePlMjX2urB7VXAU8FLm7tm4FT2/IpbZ22/aQku26WkyQtgolCoaoeWlUPq6qHAd8B/Azw1/f0uSQHJrkCuBnYCvwX8NWqurN12cG3H5exBrihfd+dwC2Mpph2H3Njkrkkc/Pz85OUL0ma0D4/JbUdAbwbePoEfb9ZVY8H1gInAI/d1+9bYMxNVTVbVbMzMzP3djhJ0phJb1579tjqAYzuW/j6pF9SVV9NchnwRODQJKva0cBaYGfrthM4CtiRZBVwCKMTzpKkRTLpkcJPjb2eDtzG6BzAHiWZSXJoW/4O4MeBa4HLgOe0buuBS9rylrZO2/5+b5aTpMU16dVHZ+7H2EcCm5McyCh8Lqqq9yS5Bnh7klcBlwMXtP4XAG9Jsh34MvD8/fhOSdK9MOn00Vrg9cCTWtOHgZdU1Y49faaqrgSOXaD9ekbnF3Zv/zrw3EnqkSQNY9Lpozcxmt757vb6x9YmSVpGJg2Fmap6U1Xd2V5vBrz0R5KWmUlD4UtJXtjuOzgwyQvxyiBJWnYmDYUXA6cBNzF6UupzgDMGqkmSNCWTPhDvFcD6qvoKQJLDgT9jFBaSpGVi0iOFH9wVCABV9WUWuLJIknT/NmkoHJDksF0r7Uhh0qMMSdL9xKT/sP858LEkf9/Wnwu8epiSJEnTMukdzRcmmWP02GuAZ1fVNcOVJUmahomngFoIGASStIzt86OzJUnLl6EgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd1goZDkqCSXJbkmydVJXtLaD0+yNcl17f2w1p4k5yXZnuTKJMcNVZskaWFDHincCfx6VR0DnAicleQY4Bzg0qo6Gri0rQM8Ezi6vTYC5w9YmyRpAYOFQlXdWFX/0ZZvA64F1gCnAJtbt83AqW35FODCGvk4cGiSI4eqT5J0d4tyTiHJOuBY4BPA6qq6sW26CVjdltcAN4x9bEdr232sjUnmkszNz88PV7QkrUCDh0KShwDvBH61qm4d31ZVBdS+jFdVm6pqtqpmZ2Zm7sNKJUmDhkKSgxgFwlur6l2t+Qu7poXa+82tfSdw1NjH17Y2SdIiGfLqowAXANdW1WvHNm0B1rfl9cAlY+2nt6uQTgRuGZtmkiQtglUDjv0k4EXAVUmuaG0vA14DXJRkA/B54LS27b3AycB24HbgzAFrkyQtYLBQqKqPANnD5pMW6F/AWUPVI0m6Z97RLEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6wUIhyRuT3Jzk02NthyfZmuS69n5Ya0+S85JsT3JlkuOGqkuStGdDHim8GXjGbm3nAJdW1dHApW0d4JnA0e21ETh/wLokSXswWChU1YeAL+/WfAqwuS1vBk4da7+wRj4OHJrkyKFqkyQtbLHPKayuqhvb8k3A6ra8BrhhrN+O1nY3STYmmUsyNz8/P1ylkrQCTe1Ec1UVUPvxuU1VNVtVszMzMwNUJkkr12KHwhd2TQu195tb+07gqLF+a1ubJGkRLXYobAHWt+X1wCVj7ae3q5BOBG4Zm2aSJC2SVUMNnORtwFOAI5LsAF4OvAa4KMkG4PPAaa37e4GTge3A7cCZQ9UlSdqzwUKhqn52D5tOWqBvAWcNVYskaTLe0SxJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSuiUVCkmekeSzSbYnOWfa9UjSSrNkQiHJgcBfAc8EjgF+Nskx061KklaWJRMKwAnA9qq6vqq+AbwdOGXKNUnSirJq2gWMWQPcMLa+A3jC7p2SbAQ2ttWvJfnsItS2UhwBfHHaRSwF+bP10y5Bd+W+ucvLc1+M8j172rCUQmEiVbUJ2DTtOpajJHNVNTvtOqTduW8unqU0fbQTOGpsfW1rkyQtkqUUCp8Ejk7yyCQPAJ4PbJlyTZK0oiyZ6aOqujPJLwPvAw4E3lhVV0+5rJXGaTktVe6biyRVNe0aJElLxFKaPpIkTZmhIEnqDAUtKMlTkrxn2nVoeUjyK0muTfLWgcb/gyS/McTYK82SOdEsaVn7JeDHqmrHtAvR3nmksIwlWZfkM0nenOQ/k7w1yY8l+bck1yU5ob0+luTyJB9N8pgFxjk4yRuT/Hvr5+NHNLEkfwN8L/BPSX53oX0pyRlJ3p1ka5LPJfnlJC9tfT6e5PDW7+eTfDLJp5K8M8mDF/i+RyX55yTbknw4yWMX9ye+fzMUlr9HA38OPLa9fg54MvAbwMuAzwA/UlXHAr8P/OECY/wu8P6qOgH4UeBPkxy8CLVrGaiqXwT+l9G+czB73pe+H3g28EPAq4Hb2375MeD01uddVfVDVfU44FpgwwJfuQk4u6qOZ7Sf//UwP9ny5PTR8vffVXUVQJKrgUurqpJcBawDDgE2JzkaKOCgBcZ4GvCssTnbBwGPYPSXUtoXe9qXAC6rqtuA25LcAvxja78K+MG2/P1JXgUcCjyE0X1NXZKHAD8M/H3SnxH0wAF+jmXLUFj+7hhb/tbY+rcY/fm/ktFfxp9Osg74wAJjBPiZqvLhg7q3FtyXkjyBe95XAd4MnFpVn0pyBvCU3cY/APhqVT3+Pq16BXH6SIfw7WdMnbGHPu8Dzk77r1eSYxehLi1P93ZfeihwY5KDgBfsvrGqbgX+O8lz2/hJ8rh7WfOKYijoT4A/SnI5ez5yfCWjaaUr2xTUKxerOC0793Zf+j3gE8C/MToftpAXABuSfAq4Gn8vyz7xMReSpM4jBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoK0n9pzfK5OcmWSK9oNWNL9mnc0S/shyROBnwSOq6o7khwBPGDKZUn3mkcK0v45EvhiVd0BUFVfrKr/TXJ8kg+2J3S+L8mRSQ5J8tldT6BN8rYkPz/V6qU98OY1aT+0B699BHgw8K/AO4CPAh8ETqmq+STPA55eVS9O8uPAK4BzgTOq6hlTKl3aK6ePpP1QVV9LcjzwI4weAf0O4FWMHv+8tT3a50DgxtZ/a3sez18BPotHS5ZHCtJ9IMlzgLOAB1XVExfYfgCjo4h1wMm7HmcuLTWeU5D2Q5LHtN9BscvjGf1+iZl2EpokByX5vrb919r2nwPe1J7yKS05HilI+6FNHb2e0S97uRPYDmwE1gLnMXok+SrgL4APAe8GTqiq25K8Fritql6+6IVL98BQkCR1Th9JkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6v4fFZiFomQxq2MAAAAASUVORK5CYII=\n", 1238 | "text/plain": [ 1239 | "
" 1240 | ] 1241 | }, 1242 | "metadata": { 1243 | "needs_background": "light" 1244 | }, 1245 | "output_type": "display_data" 1246 | } 1247 | ], 1248 | "source": [ 1249 | "sns.countplot(trainDf['Sex'], data=trainDf, )" 1250 | ] 1251 | }, 1252 | { 1253 | "cell_type": "code", 1254 | "execution_count": 26, 1255 | "metadata": {}, 1256 | "outputs": [ 1257 | { 1258 | "data": { 1259 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAULklEQVR4nO3df7RV5X3n8fdXQEnEHxFuMsolXlJNEwlI6tVqGbOoaaNhHMxkkKtJCVRSMlFTOpl2xrGZaExsbZo2teoki7VMwIbFD7UTLasxy5hoWzXaew1KQK0kJuFSUgENEbPwB37nj7N5cosXOcDd91wu79daZ7H3s5/znO9Zbvi4fz0nMhNJkgAOa3UBkqShw1CQJBWGgiSpMBQkSYWhIEkqRra6gAMxbty47OjoaHUZknRQ6enp2ZKZbf1tO6hDoaOjg+7u7laXIUkHlYj48Z62efpIklQYCpKkwlCQJBUH9TUFSRpoL7/8Mr29vezYsaPVpRyw0aNH097ezqhRo5p+j6EgSX309vZy1FFH0dHRQUS0upz9lpls3bqV3t5eJk6c2PT7PH0kSX3s2LGDsWPHHtSBABARjB07dp+PeAwFSdrNwR4Iu+zP9zAUJEmFoSBJTbj22muZNGkSU6ZMYerUqTz00EMHPOadd97JddddNwDVwZgxYwZknEP+QvNpf3RLq0sYMnr+/COtLkEakh588EFWrVrFI488whFHHMGWLVt46aWXmnrvK6+8wsiR/f9TO3PmTGbOnDmQpR4wjxQkaS82bdrEuHHjOOKIIwAYN24cJ5xwAh0dHWzZsgWA7u5upk+fDsDVV1/NnDlzmDZtGnPmzOHMM89k7dq1Zbzp06fT3d3N4sWLufzyy9m2bRsnnngir776KgAvvPACEyZM4OWXX+YHP/gB5513Hqeddhpnn302TzzxBABPP/00Z511FpMnT+ZTn/rUgH1XQ0GS9uJ973sfGzZs4O1vfzuXXnop9913317fs27dOr71rW+xbNkyurq6WLlyJdAImE2bNtHZ2Vn6HnPMMUydOrWMu2rVKs4991xGjRrFggULuOGGG+jp6eELX/gCl156KQALFy7k4x//OGvWrOH4448fsO9qKEjSXowZM4aenh4WLVpEW1sbXV1dLF68+HXfM3PmTN7whjcAMHv2bG677TYAVq5cyaxZs17Tv6urixUrVgCwfPlyurq62L59Ow888AAXXnghU6dO5WMf+xibNm0C4P777+fiiy8GYM6cOQP1Vb2mIEnNGDFiBNOnT2f69OlMnjyZJUuWMHLkyHLKZ/fnAY488siyPH78eMaOHctjjz3GihUr+PKXv/ya8WfOnMmVV17Js88+S09PD+eccw4vvPACxx57LKtXr+63pjpunfVIQZL24sknn+Spp54q66tXr+bEE0+ko6ODnp4eAG6//fbXHaOrq4vPf/7zbNu2jSlTprxm+5gxYzj99NNZuHAh559/PiNGjODoo49m4sSJ3HrrrUDjKeVHH30UgGnTprF8+XIAli5dOiDfEwwFSdqr7du3M3fuXE455RSmTJnCunXruPrqq7nqqqtYuHAhnZ2djBgx4nXHmDVrFsuXL2f27Nl77NPV1cXXvvY1urq6StvSpUu5+eabOfXUU5k0aRJ33HEHANdffz033XQTkydPZuPGjQPzRYHIzAEbbLB1dnbmgf7Ijrek/pK3pErw+OOP8853vrPVZQyY/r5PRPRkZmd//T1SkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCp9olqR9NNC3sjdzO/hdd93FwoUL2blzJx/96Ee54oorBrSGXTxSkKQhbufOnVx22WV84xvfYN26dSxbtox169bV8lmGgiQNcQ8//DAnnXQSb3vb2zj88MO56KKLypPNA81QkKQhbuPGjUyYMKGst7e3D+jUFn0ZCpKkwlCQpCFu/PjxbNiwoaz39vYyfvz4Wj7LUJCkIe7000/nqaee4umnn+all15i+fLltf22s7ekStI+GuwZhUeOHMmNN97Iueeey86dO7nkkkuYNGlSPZ9Vy6iSpAE1Y8YMZsyYUfvnePpIklQYCpKkwlCQJBWGgiSpqD0UImJERHwvIlZV6xMj4qGIWB8RKyLi8Kr9iGp9fbW9o+7aJEn/3mAcKSwEHu+z/mfAFzPzJOA5YH7VPh94rmr/YtVPkjSIar0lNSLagf8EXAt8MiICOAf4UNVlCXA18CXggmoZ4DbgxoiIzMw6a5SkffWTayYP6Hhv/fSavfa55JJLWLVqFW9+85v5/ve/P6Cf31fdRwp/BfxP4NVqfSzws8x8pVrvBXY9qz0e2ABQbd9W9f93ImJBRHRHRPfmzZtrLF2Sho558+Zx11131f45tYVCRJwPPJOZPQM5bmYuyszOzOxsa2sbyKElach6z3vew3HHHVf759R5+mgaMDMiZgCjgaOB64FjI2JkdTTQDuya/3UjMAHojYiRwDHA1hrrkyTtprYjhcz835nZnpkdwEXAtzPzw8B3gFlVt7nArl+KuLNap9r+ba8nSNLgasVzCv+LxkXn9TSuGdxctd8MjK3aPwnU8wOkkqQ9GpQJ8TLzXuDeavmHwBn99NkBXDgY9UiS+ucsqZK0j5q5hXSgXXzxxdx7771s2bKF9vZ2PvOZzzB//vy9v3EfGQqSdBBYtmzZoHyOcx9JkgpDQZJUGAqStJvhcjf8/nwPQ0GS+hg9ejRbt2496IMhM9m6dSujR4/ep/d5oVmS+mhvb6e3t5fhMLfa6NGjaW9v36f3GAqS1MeoUaOYOHFiq8toGU8fSZIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqSitlCIiNER8XBEPBoRayPiM1X7xIh4KCLWR8SKiDi8aj+iWl9fbe+oqzZJUv/qPFJ4ETgnM08FpgLnRcSZwJ8BX8zMk4DngPlV//nAc1X7F6t+kqRBVFsoZMP2anVU9UrgHOC2qn0J8IFq+YJqnWr7eyMi6qpPkvRatV5TiIgREbEaeAa4G/gB8LPMfKXq0guMr5bHAxsAqu3bgLH9jLkgIrojonvz5s11li9Jh5xaQyEzd2bmVKAdOAN4xwCMuSgzOzOzs62t7UCHkyT1MSh3H2Xmz4DvAGcBx0bEyGpTO7CxWt4ITACoth8DbB2M+iRJDXXefdQWEcdWy28Afht4nEY4zKq6zQXuqJbvrNaptn87M7Ou+iRJrzVy71322/HAkogYQSN8VmbmqohYByyPiM8B3wNurvrfDPxNRKwHngUuqrE2SVI/aguFzHwMeHc/7T+kcX1h9/YdwIV11SNJ2jufaJYkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUNBUKEXFPM22SpIPb6z68FhGjgTcC4yLiTcCuqayP5pezm0qShom9PdH8MeAPgBOAHn4ZCj8HbqyvLElSK7xuKGTm9cD1EfGJzLxhkGqSJLVIU3MfZeYNEfEbQEff92TmLTXVJUlqgaZCISL+BvgVYDWws2pOwFCQpGGk2VlSO4FT/H0DSRremn1O4fvAf6izEElS6zV7pDAOWBcRDwMv7mrMzJm1VCVJaolmQ+HqOouQJA0Nzd59dF/dhUiSWq/Zu4+ep3G3EcDhwCjghcw8uq7CJEmDr9kjhaN2LUdEABcAZ9ZVlCSpNfZ5ltRs+Dpw7sCXI0lqpWZPH32wz+phNJ5b2FFLRZKklmn27qP/3Gf5FeBHNE4hSZKGkWavKfxu3YVIklqv2dNH7cANwLSq6R+BhZnZW1dhkrTLT66Z3OoShoy3fnpNreM3e6H5q8CdNH5X4QTg76o2SdIw0mwotGXmVzPzleq1GGirsS5JUgs0GwpbI+J3ImJE9fodYGudhUmSBl+zoXAJMBv4KbAJmAXMq6kmSVKLNHtL6jXA3Mx8DiAijgO+QCMsJEnDRLNHClN2BQJAZj4LvLuekiRJrdJsKBwWEW/atVIdKTR7lCFJOkg0+w/7XwAPRsSt1fqFwLX1lCRJapVmn2i+JSK6gXOqpg9m5rr6ypIktULTp4CqEDAIJGkY2+eps5sVERMi4jsRsS4i1kbEwqr9uIi4OyKeqv58U9UeEfHXEbE+Ih6LiF+rqzZJUv9qCwUas6n+j8w8hcYP8lwWEacAVwD3ZObJwD3VOsD7gZOr1wLgSzXWJknqR22hkJmbMvORavl54HFgPI0pt5dU3ZYAH6iWLwBuqX7E57vAsRFxfF31SZJeq84jhSIiOmg81/AQ8JbM3FRt+inwlmp5PLChz9t6q7bdx1oQEd0R0b158+b6ipakQ1DtoRARY4DbgT/IzJ/33ZaZCeS+jJeZizKzMzM729qck0+SBlKtoRARo2gEwtLM/Nuq+d92nRaq/nymat8ITOjz9vaqTZI0SOq8+yiAm4HHM/Mv+2y6E5hbLc8F7ujT/pHqLqQzgW19TjNJkgZBnVNVTAPmAGsiYnXVdiVwHbAyIuYDP6Yx+yrA3wMzgPXALwB/AlSSBlltoZCZ/wTEHja/t5/+CVxWVz2SpL0blLuPJEkHB0NBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqRiZF0DR8RXgPOBZzLzXVXbccAKoAP4ETA7M5+LiACuB2YAvwDmZeYjddWm/v3kmsmtLmHIeOun17S6BKkl6jxSWAyct1vbFcA9mXkycE+1DvB+4OTqtQD4Uo11SZL2oLZQyMx/AJ7drfkCYEm1vAT4QJ/2W7Lhu8CxEXF8XbVJkvo32NcU3pKZm6rlnwJvqZbHAxv69Out2iRJg6hlF5ozM4Hc1/dFxIKI6I6I7s2bN9dQmSQdugY7FP5t12mh6s9nqvaNwIQ+/dqrttfIzEWZ2ZmZnW1tbbUWK0mHmsEOhTuBudXyXOCOPu0fiYYzgW19TjNJkgZJnbekLgOmA+Miohe4CrgOWBkR84EfA7Or7n9P43bU9TRuSf3duuqSJO1ZbaGQmRfvYdN7++mbwGV11SJJao5PNEuSCkNBklQYCpKkorZrCpIOzGl/dEurSxgy/t9Rra7g0OGRgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFUMqFCLivIh4MiLWR8QVra5Hkg41QyYUImIEcBPwfuAU4OKIOKW1VUnSoWXIhAJwBrA+M3+YmS8By4ELWlyTJB1SRra6gD7GAxv6rPcCv757p4hYACyoVrdHxJODUNsh4UQYB2xpdR1DwlXR6grUh/tmHwOzb564pw1DKRSakpmLgEWtrmM4iojuzOxsdR3S7tw3B89QOn20EZjQZ729apMkDZKhFAr/DJwcERMj4nDgIuDOFtckSYeUIXP6KDNfiYjLgW8CI4CvZObaFpd1qPG0nIYq981BEpnZ6hokSUPEUDp9JElqMUNBklQYCupXREyPiFWtrkPDQ0T8fkQ8HhFLaxr/6oj4wzrGPtQMmQvNkoa1S4HfyszeVhei1+eRwjAWER0R8URELI6If4mIpRHxWxFxf0Q8FRFnVK8HI+J7EfFARPxqP+McGRFfiYiHq35OP6KmRcSXgbcB34iIP+5vX4qIeRHx9Yi4OyJ+FBGXR8Qnqz7fjYjjqn6/FxH/HBGPRsTtEfHGfj7vVyLirojoiYh/jIh3DO43PrgZCsPfScBfAO+oXh8C/iPwh8CVwBPA2Zn5buDTwJ/0M8YfA9/OzDOA3wT+PCKOHITaNQxk5n8D/pXGvnMke96X3gV8EDgduBb4RbVfPgh8pOrzt5l5emaeCjwOzO/nIxcBn8jM02js5/+3nm82PHn6aPh7OjPXAETEWuCezMyIWAN0AMcASyLiZCCBUf2M8T5gZp9ztqOBt9L4Syntiz3tSwDfyczngecjYhvwd1X7GmBKtfyuiPgccCwwhsZzTUVEjAF+A7g1oswRdEQN32PYMhSGvxf7LL/aZ/1VGv/9P0vjL+N/iYgO4N5+xgjgv2amkw/qQPW7L0XEr7P3fRVgMfCBzHw0IuYB03cb/zDgZ5k5dUCrPoR4+kjH8Ms5pubtoc83gU9E9b9eEfHuQahLw9OB7ktHAZsiYhTw4d03ZubPgacj4sJq/IiIUw+w5kOKoaDPA38aEd9jz0eOn6VxWumx6hTUZwerOA07B7ov/R/gIeB+GtfD+vNhYH5EPAqsxd9l2SdOcyFJKjxSkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEj7qZrHZ21EPBYRq6sHsKSDmk80S/shIs4Czgd+LTNfjIhxwOEtLks6YB4pSPvneGBLZr4IkJlbMvNfI+K0iLivmqHzmxFxfEQcExFP7pqBNiKWRcTvtbR6aQ98eE3aD9XEa/8EvBH4FrACeAC4D7ggMzdHRBdwbmZeEhG/DVwDXA/My8zzWlS69Lo8fSTth8zcHhGnAWfTmAJ6BfA5GtM/311N7TMC2FT1v7uaj+cmwLl4NGR5pCANgIiYBVwGjM7Ms/rZfhiNo4gOYMau6cylocZrCtJ+iIhfrX6DYpepNH5foq26CE1EjIqISdX2/15t/xDw1WqWT2nI8UhB2g/VqaMbaPzYyyvAemAB0A78NY0pyUcCfwX8A/B14IzMfD4i/hJ4PjOvGvTCpb0wFCRJhaePJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBX/Hxg+KR5WewrDAAAAAElFTkSuQmCC\n", 1260 | "text/plain": [ 1261 | "
" 1262 | ] 1263 | }, 1264 | "metadata": { 1265 | "needs_background": "light" 1266 | }, 1267 | "output_type": "display_data" 1268 | } 1269 | ], 1270 | "source": [ 1271 | "sns.countplot('Sex',hue='Survived',data=trainDf)\n", 1272 | "plt.show()" 1273 | ] 1274 | }, 1275 | { 1276 | "cell_type": "code", 1277 | "execution_count": 27, 1278 | "metadata": {}, 1279 | "outputs": [ 1280 | { 1281 | "data": { 1282 | "text/plain": [ 1283 | "" 1284 | ] 1285 | }, 1286 | "execution_count": 27, 1287 | "metadata": {}, 1288 | "output_type": "execute_result" 1289 | }, 1290 | { 1291 | "data": { 1292 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPZElEQVR4nO3dfazeZX3H8fcHCrKJ8mA7hm23stloWFTUM8SHZE72IMxZ4gQxOio26ZawReOcY1syH+IWzZwOp7I1Qy1kExDn6IxTCQ9zGlBPJ/I4Z8dgtII9PCo6nWXf/XGuc3Eop+Vu6e/cp5z3K7lzX7/rd/1+9/cmzflw/Z7uVBWSJAEcMO4CJEkLh6EgSeoMBUlSZyhIkjpDQZLULRl3AY/F0qVLa9WqVeMuQ5L2K5s3b76rqpbNtW6/DoVVq1YxOTk57jIkab+S5LZdrfPwkSSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKnbr+9o3hee9/vnj7sELUCb//yMcZcgjYUzBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpGzQUktya5Pok1yaZbH1HJrksyTfb+xGtP0k+kGRLkuuSPHfI2iRJjzQfM4VfrKrjqmqiLZ8NXF5Vq4HL2zLAScDq9loPnDsPtUmSZhnH4aM1wMbW3gicMqv//Jp2DXB4kqPHUJ8kLVpDh0IBn0+yOcn61ndUVd3R2ncCR7X2cuD2WdtubX0Pk2R9kskkk1NTU0PVLUmL0tA/x/niqtqW5CeAy5L8++yVVVVJak92WFUbgA0AExMTe7StJGn3Bp0pVNW29r4d+BRwPPDtmcNC7X17G74NWDlr8xWtT5I0TwYLhSRPTPKkmTbwK8ANwCZgbRu2Fri0tTcBZ7SrkE4A7p91mEmSNA+GPHx0FPCpJDOf8/dV9dkkXwUuTrIOuA04rY3/DHAysAX4PnDmgLVJkuYwWChU1S3As+fovxs4cY7+As4aqh5J0qPzjmZJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYOHQpIDk3wtyafb8jFJvpxkS5KLkhzc+p/Qlre09auGrk2S9HDzMVN4I3DzrOX3AO+vqqcB9wLrWv864N7W//42TpI0jwYNhSQrgF8D/rYtB3gpcEkbshE4pbXXtGXa+hPbeEnSPBl6pvCXwFuB/2vLTwHuq6odbXkrsLy1lwO3A7T197fxD5NkfZLJJJNTU1MDli5Ji89goZDk5cD2qtq8L/dbVRuqaqKqJpYtW7Yvdy1Ji96SAff9IuAVSU4GDgGeDJwDHJ5kSZsNrAC2tfHbgJXA1iRLgMOAuwesT5K0k8FmClX1h1W1oqpWAacDV1TVa4ErgVe1YWuBS1t7U1umrb+iqmqo+iRJjzSO+xT+AHhzki1MnzM4r/WfBzyl9b8ZOHsMtUnSojbk4aOuqq4CrmrtW4Dj5xjzA+DU+ahHkjQ372iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpG5efmRH0p7773c+c9wlaAH6qT+5ftD9O1OQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqRupFBIcvkofZKk/dtu72hOcgjw48DSJEcAaaueDCwfuDZJ0jx7tMdc/BbwJuCpwGYeCoXvAB8crixJ0jjs9vBRVZ1TVccAb6mqn6mqY9rr2VW121BIckiSryT5epIbk7yj9R+T5MtJtiS5KMnBrf8JbXlLW79qX31JSdJoRnogXlX9VZIXAqtmb1NV5+9msx8CL62qB5IcBHwxyT8DbwbeX1UXJvlrYB1wbnu/t6qeluR04D3Aq/fmS0mS9s6oJ5ovAN4LvBj4+faa2N02Ne2BtnhQexXwUuCS1r8ROKW117Rl2voTk8wcrpIkzYNRH509ARxbVbUnO09yINPnIp4GfAj4T+C+qtrRhmzloRPWy4HbAapqR5L7gacAd+3JZ0qS9t6o9yncAPzknu68qh6squOAFcDxwDP2dB87S7I+yWSSyampqce6O0nSLKPOFJYCNyX5CtPnCgCoqleMsnFV3ZfkSuAFwOFJlrTZwgpgWxu2DVgJbE2yBDgMuHuOfW0ANgBMTEzs0cxFkrR7o4bC2/d0x0mWAT9qgfBjwC8zffL4SuBVwIXAWuDStsmmtnx1W3/Fnh6ukiQ9NqNeffQve7Hvo4GN7bzCAcDFVfXpJDcBFyZ5F/A14Lw2/jzggiRbgHuA0/fiMyVJj8FIoZDku0xfOQRwMNNXEn2vqp68q22q6jrgOXP038L0+YWd+38AnDpKPZKkYYw6U3jSTLtdJroGOGGooiRJ47HHT0lt9x/8I/Cr+74cSdI4jXr46JWzFg9g+r6FHwxSkSRpbEa9+ujXZ7V3ALcyfQhJkvQ4Muo5hTOHLkSSNH6jPvtoRZJPJdneXp9MsmLo4iRJ82vUE80fZfrmsqe21z+1PknS48ioobCsqj5aVTva62PAsgHrkiSNwaihcHeS1yU5sL1exxzPJZIk7d9GDYU3AKcBdwJ3MP1sotcPVJMkaUxGvST1ncDaqroXIMmRTP/ozhuGKkySNP9GnSk8ayYQAKrqHuZ4rpEkaf82aigckOSImYU2Uxh1liFJ2k+M+of9L4Crk3yiLZ8K/OkwJUmSxmXUO5rPTzIJvLR1vbKqbhquLEnSOIx8CKiFgEEgSY9je/zobEnS45ehIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUDRYKSVYmuTLJTUluTPLG1n9kksuSfLO9H9H6k+QDSbYkuS7Jc4eqTZI0tyFnCjuA36uqY4ETgLOSHAucDVxeVauBy9sywEnA6vZaD5w7YG2SpDkMFgpVdUdV/Vtrfxe4GVgOrAE2tmEbgVNaew1wfk27Bjg8ydFD1SdJeqR5OaeQZBXwHODLwFFVdUdbdSdwVGsvB26ftdnW1rfzvtYnmUwyOTU1NVzRkrQIDR4KSQ4FPgm8qaq+M3tdVRVQe7K/qtpQVRNVNbFs2bJ9WKkkadBQSHIQ04Hwd1X1D6372zOHhdr79ta/DVg5a/MVrU+SNE+GvPoowHnAzVX1vlmrNgFrW3stcOms/jPaVUgnAPfPOswkSZoHSwbc94uA3wSuT3Jt6/sj4N3AxUnWAbcBp7V1nwFOBrYA3wfOHLA2SdIcBguFqvoikF2sPnGO8QWcNVQ9kqRH5x3NkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoGC4UkH0myPckNs/qOTHJZkm+29yNaf5J8IMmWJNclee5QdUmSdm3ImcLHgJft1Hc2cHlVrQYub8sAJwGr22s9cO6AdUmSdmGwUKiqLwD37NS9BtjY2huBU2b1n1/TrgEOT3L0ULVJkuY23+cUjqqqO1r7TuCo1l4O3D5r3NbW9whJ1ieZTDI5NTU1XKWStAiN7URzVRVQe7HdhqqaqKqJZcuWDVCZJC1e8x0K3545LNTet7f+bcDKWeNWtD5J0jya71DYBKxt7bXApbP6z2hXIZ0A3D/rMJMkaZ4sGWrHST4OvARYmmQr8Dbg3cDFSdYBtwGnteGfAU4GtgDfB84cqi5J0q4NFgpV9ZpdrDpxjrEFnDVULZKk0XhHsySpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6BRUKSV6W5BtJtiQ5e9z1SNJis2BCIcmBwIeAk4BjgdckOXa8VUnS4rJgQgE4HthSVbdU1f8CFwJrxlyTJC0qS8ZdwCzLgdtnLW8Fnr/zoCTrgfVt8YEk35iH2haLpcBd4y5iIch71467BD2c/zZnvC37Yi8/vasVCykURlJVG4AN467j8SjJZFVNjLsOaWf+25w/C+nw0TZg5azlFa1PkjRPFlIofBVYneSYJAcDpwObxlyTJC0qC+bwUVXtSPI7wOeAA4GPVNWNYy5rsfGwnBYq/23Ok1TVuGuQJC0QC+nwkSRpzAwFSVJnKMjHi2jBSvKRJNuT3DDuWhYLQ2GR8/EiWuA+Brxs3EUsJoaCfLyIFqyq+gJwz7jrWEwMBc31eJHlY6pF0pgZCpKkzlCQjxeR1BkK8vEikjpDYZGrqh3AzONFbgYu9vEiWiiSfBy4Gnh6kq1J1o27psc7H3MhSeqcKUiSOkNBktQZCpKkzlCQJHWGgiSpMxQkIMkfJ7kxyXVJrk3y/H2wz1fsq6fOJnlgX+xHejRekqpFL8kLgPcBL6mqHyZZChxcVd8aYdsl7V6PoWt8oKoOHfpzJGcKEhwN3FVVPwSoqruq6ltJbm0BQZKJJFe19tuTXJDkS8AFSa5J8nMzO0tyVRv/+iQfTHJYktuSHNDWPzHJ7UkOSvKzST6bZHOSf03yjDbmmCRXJ7k+ybvm+b+HFjFDQYLPAyuT/EeSDyf5hRG2ORb4pap6DXARcBpAkqOBo6tqcmZgVd0PXAvM7PflwOeq6kdM/yD971bV84C3AB9uY84Bzq2qZwJ3PNYvKI3KUNCiV1UPAM8D1gNTwEVJXv8om22qqv9p7YuBV7X2acAlc4y/CHh1a5/ePuNQ4IXAJ5JcC/wN07MWgBcBH2/tC/bk+0iPxZJxFyAtBFX1IHAVcFWS64G1wA4e+h+nQ3ba5Huztt2W5O4kz2L6D/9vz/ERm4A/S3Ik0wF0BfBE4L6qOm5XZe3dt5H2njMFLXpJnp5k9ayu44DbgFuZ/gMO8BuPspuLgLcCh1XVdTuvbLORrzJ9WOjTVfVgVX0H+K8kp7Y6kuTZbZMvMT2jAHjtHn8paS8ZChIcCmxMclOS65g+X/B24B3AOUkmgQcfZR+XMP1H/OLdjLkIeF17n/FaYF2SrwM38tBPob4ROKvNWvwlPM0bL0mVJHXOFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1/w/4gcjVwj04NgAAAABJRU5ErkJggg==\n", 1293 | "text/plain": [ 1294 | "
" 1295 | ] 1296 | }, 1297 | "metadata": { 1298 | "needs_background": "light" 1299 | }, 1300 | "output_type": "display_data" 1301 | } 1302 | ], 1303 | "source": [ 1304 | "sns.countplot('Survived', data=trainDf)" 1305 | ] 1306 | }, 1307 | { 1308 | "cell_type": "code", 1309 | "execution_count": 28, 1310 | "metadata": {}, 1311 | "outputs": [ 1312 | { 1313 | "data": { 1314 | "text/plain": [ 1315 | "" 1316 | ] 1317 | }, 1318 | "execution_count": 28, 1319 | "metadata": {}, 1320 | "output_type": "execute_result" 1321 | }, 1322 | { 1323 | "data": { 1324 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPzklEQVR4nO3dfcyddX3H8ffHFsQHtDzcY9h21mnjgk5RG2SyLA7iBrgJMWA0CpV1VhM0GPcgM5lTo4tGNyZsMWmGUoxPKDI6Q9xIQVEj6F3lGY0dEWkD9OZRmToH++6P+9eft+UunEqvc9re71dycn7X9/qdi++dk/Dp9XhSVUiSBPCESTcgSdpzGAqSpM5QkCR1hoIkqTMUJEnd4kk38HgceuihtWLFikm3IUl7lU2bNt1dVVPzrdurQ2HFihVMT09Pug1J2qskuW1n6zx8JEnqDAVJUjdoKCT5YZIbklybZLrVDk5yeZIftPeDWj1Jzk2yOcn1SV48ZG+SpEcax57CH1bVkVW1qi2fDWysqpXAxrYMcAKwsr3WAh8bQ2+SpDkmcfjoJGB9G68HTp5Tv7BmXQ0sSXL4BPqTpAVr6FAo4D+TbEqyttUOq6o72vhO4LA2XgrcPuezW1rtVyRZm2Q6yfTMzMxQfUvSgjT0Jam/X1Vbk/wGcHmS781dWVWVZJce01pV64B1AKtWrfIRr5K0Gw26p1BVW9v7NuAS4Cjgru2Hhdr7tjZ9K7B8zseXtZokaUwGC4UkT0ly4PYx8EfAjcAGYHWbthq4tI03AKe3q5COBh6Yc5hJkjQGQx4+Ogy4JMn2/86nq+rLSb4NXJRkDXAb8Jo2/zLgRGAz8FPgjAF7kzQmx5x3zKRbWBC+8bZv7JbtDBYKVXUr8MJ56vcAx81TL+DMofqRJD0272iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYOHQpJFSb6b5Ett+VlJrkmyOcnnkuzf6k9sy5vb+hVD9yZJ+lXj2FM4C7hlzvKHgHOq6jnAfcCaVl8D3Nfq57R5kqQxGjQUkiwDXgn8a1sOcCzwhTZlPXByG5/Ulmnrj2vzJUljMvSewj8Bfw38X1s+BLi/qh5qy1uApW28FLgdoK1/oM3/FUnWJplOMj0zMzNg65K08AwWCkn+BNhWVZt253aral1VraqqVVNTU7tz05K04C0ecNvHAK9KciJwAPA04KPAkiSL297AMmBrm78VWA5sSbIYeDpwz4D9SZJ2MNieQlX9TVUtq6oVwGuBK6rq9cCVwClt2mrg0jbe0JZp66+oqhqqP0nSI03iPoV3Au9IspnZcwbnt/r5wCGt/g7g7An0JkkL2pCHj7qq+grwlTa+FThqnjk/B04dRz+SpPl5R7MkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpGywUkhyQ5FtJrktyU5L3tvqzklyTZHOSzyXZv9Wf2JY3t/UrhupNkjS/IfcU/gc4tqpeCBwJHJ/kaOBDwDlV9RzgPmBNm78GuK/Vz2nzJEljNFgo1KwH2+J+7VXAscAXWn09cHIbn9SWaeuPS5Kh+pMkPdKg5xSSLEpyLbANuBz4L+D+qnqoTdkCLG3jpcDtAG39A8Ah82xzbZLpJNMzMzNDti9JC86goVBVD1fVkcAy4Cjgd3bDNtdV1aqqWjU1NfV4NydJmmMsVx9V1f3AlcDvAUuSLG6rlgFb23grsBygrX86cM84+pMkzRry6qOpJEva+EnAK4BbmA2HU9q01cClbbyhLdPWX1FVNVR/kqRHWvzYU35thwPrkyxiNnwuqqovJbkZ+GyS9wPfBc5v888HPplkM3Av8NoBe5MkzWOwUKiq64EXzVO/ldnzCzvWfw6cOlQ/kqTH5h3NkqRupFBIsnGUmiRp7/aoh4+SHAA8GTg0yUHA9pvJnsYv7y+QJO0jHuucwpuBtwPPADbxy1D4MfDPw7UlSZqERw2Fqvoo8NEkb6uq88bUkyRpQka6+qiqzkvyMmDF3M9U1YUD9SVJmoCRQiHJJ4FnA9cCD7dyAYaCJO1DRr1PYRVwhHcYS9K+bdT7FG4EfnPIRiRJkzfqnsKhwM1JvsXsj+cAUFWvGqQrSdJEjBoK7xmyCUnSnmHUq4++OnQjkqTJG/Xqo58we7URwP7M/rTmf1fV04ZqTJI0fqPuKRy4fdx+N/kk4OihmpIkTcYuPyW1Zv0b8Me7vx1J0iSNevjo1XMWn8DsfQs/H6QjSdLEjHr10Z/OGT8E/JDZQ0iSpH3IqOcUzhi6EUnS5I36IzvLklySZFt7XZxk2dDNSZLGa9QTzZ8ANjD7uwrPAP691SRJ+5BRQ2Gqqj5RVQ+11wXA1IB9SZImYNRQuCfJG5Isaq83APcM2ZgkafxGDYU/A14D3AncAZwCvHGgniRJEzLqJanvA1ZX1X0ASQ4GPsJsWEiS9hGj7im8YHsgAFTVvcCLhmlJkjQpo4bCE5IctH2h7SmMupchSdpLjPo/9n8Avpnk8235VOADw7QkSZqUUe9ovjDJNHBsK726qm4eri1J0iSMfAiohYBBIEn7sF1+dLYkad+1YE4Wv+SvLpx0CwvCpg+fPukWJD0O7ilIkjpDQZLUDRYKSZYnuTLJzUluSnJWqx+c5PIkP2jvB7V6kpybZHOS65O8eKjeJEnzG3JP4SHgL6rqCOBo4MwkRwBnAxuraiWwsS0DnACsbK+1wMcG7E2SNI/BQqGq7qiq77TxT4BbgKXM/ozn+jZtPXByG58EXFizrgaWJDl8qP4kSY80lnMKSVYw+6yka4DDquqOtupO4LA2XgrcPudjW1ptx22tTTKdZHpmZma4piVpARo8FJI8FbgYeHtV/XjuuqoqoHZle1W1rqpWVdWqqSl/50eSdqdBQyHJfswGwqeq6outfNf2w0LtfVurbwWWz/n4slaTJI3JkFcfBTgfuKWq/nHOqg3A6jZeDVw6p356uwrpaOCBOYeZJEljMOQdzccApwE3JLm21d4FfBC4KMka4DZmf9EN4DLgRGAz8FPgjAF7kyTNY7BQqKqvA9nJ6uPmmV/AmUP1I0l6bN7RLEnqFswD8bR3+9H7fnfSLezzfuvdN0y6Be0B3FOQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1A0WCkk+nmRbkhvn1A5OcnmSH7T3g1o9Sc5NsjnJ9UlePFRfkqSdG3JP4QLg+B1qZwMbq2olsLEtA5wArGyvtcDHBuxLkrQTg4VCVV0F3LtD+SRgfRuvB06eU7+wZl0NLEly+FC9SZLmN+5zCodV1R1tfCdwWBsvBW6fM29Lq0mSxmhiJ5qrqoDa1c8lWZtkOsn0zMzMAJ1J0sI17lC4a/thofa+rdW3AsvnzFvWao9QVeuqalVVrZqamhq0WUlaaMYdChuA1W28Grh0Tv30dhXS0cADcw4zSZLGZPFQG07yGeDlwKFJtgB/B3wQuCjJGuA24DVt+mXAicBm4KfAGUP1JUnaucFCoapet5NVx80zt4Azh+pFkjQa72iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKnbo0IhyfFJvp9kc5KzJ92PJC00e0woJFkE/AtwAnAE8LokR0y2K0laWPaYUACOAjZX1a1V9Qvgs8BJE+5JkhaUVNWkewAgySnA8VX15235NOClVfXWHeatBda2xecC3x9ro+N1KHD3pJvQr8Xvbu+2r39/z6yqqflWLB53J49XVa0D1k26j3FIMl1Vqybdh3ad393ebSF/f3vS4aOtwPI5y8taTZI0JntSKHwbWJnkWUn2B14LbJhwT5K0oOwxh4+q6qEkbwX+A1gEfLyqbppwW5O2IA6T7aP87vZuC/b722NONEuSJm9POnwkSZowQ0GS1BkKe6AkH0+yLcmNk+5FuybJ8iRXJrk5yU1Jzpp0TxpdkgOSfCvJde37e++kexo3zynsgZL8AfAgcGFVPX/S/Wh0SQ4HDq+q7yQ5ENgEnFxVN0+4NY0gSYCnVNWDSfYDvg6cVVVXT7i1sXFPYQ9UVVcB9066D+26qrqjqr7Txj8BbgGWTrYrjapmPdgW92uvBfUvZ0NBGkiSFcCLgGsm3Ip2QZJFSa4FtgGXV9WC+v4MBWkASZ4KXAy8vap+POl+NLqqeriqjmT2qQpHJVlQh3ANBWk3a8eiLwY+VVVfnHQ/+vVU1f3AlcDxE25lrAwFaTdqJyrPB26pqn+cdD/aNUmmkixp4ycBrwC+N9GmxsxQ2AMl+QzwTeC5SbYkWTPpnjSyY4DTgGOTXNteJ066KY3scODKJNcz+zy2y6vqSxPuaay8JFWS1LmnIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAeRZKH22WlNyb5fJInP8rc9yT5y3H2J+1uhoL06H5WVUe2p9X+AnjLpBuShmQoSKP7GvAcgCSnJ7m+PXf/kztOTPKmJN9u6y/evoeR5NS213Fdkqta7XntGf7Xtm2uHOtfJc3hzWvSo0jyYFU9NcliZp9n9GXgKuAS4GVVdXeSg6vq3iTvAR6sqo8kOaSq7mnbeD9wV1Wdl+QG4Piq2ppkSVXdn+Q84Oqq+lSS/YFFVfWzifzBWvDcU5Ae3ZPaY5SngR8x+1yjY4HPV9XdAFU1329fPD/J11oIvB54Xqt/A7ggyZuARa32TeBdSd4JPNNA0CQtnnQD0h7uZ+0xyt3sM+8e0wXM/uLadUneCLwcoKrekuSlwCuBTUleUlWfTnJNq12W5M1VdcXu+xOk0bmnIO26K4BTkxwCkOTgeeYcCNzRHqP9+u3FJM+uqmuq6t3ADLA8yW8Dt1bVucClwAsG/wuknXBPQdpFVXVTkg8AX03yMPBd4I07TPtbZn9xbaa9H9jqH24nkgNsBK4D3gmcluR/gTuBvx/8j5B2whPNkqTOw0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSuv8HlYj/r/k4eJoAAAAASUVORK5CYII=\n", 1325 | "text/plain": [ 1326 | "
" 1327 | ] 1328 | }, 1329 | "metadata": { 1330 | "needs_background": "light" 1331 | }, 1332 | "output_type": "display_data" 1333 | } 1334 | ], 1335 | "source": [ 1336 | "sns.countplot('Pclass', data=trainDf)" 1337 | ] 1338 | }, 1339 | { 1340 | "cell_type": "code", 1341 | "execution_count": 29, 1342 | "metadata": {}, 1343 | "outputs": [ 1344 | { 1345 | "data": { 1346 | "image/png": "\n", 1347 | "text/plain": [ 1348 | "
" 1349 | ] 1350 | }, 1351 | "metadata": { 1352 | "needs_background": "light" 1353 | }, 1354 | "output_type": "display_data" 1355 | } 1356 | ], 1357 | "source": [ 1358 | "sns.countplot('Embarked',hue='Survived',data=trainDf)\n", 1359 | "plt.show()" 1360 | ] 1361 | }, 1362 | { 1363 | "cell_type": "code", 1364 | "execution_count": 30, 1365 | "metadata": {}, 1366 | "outputs": [ 1367 | { 1368 | "data": { 1369 | "image/png": "\n", 1370 | "text/plain": [ 1371 | "
" 1372 | ] 1373 | }, 1374 | "metadata": { 1375 | "needs_background": "light" 1376 | }, 1377 | "output_type": "display_data" 1378 | } 1379 | ], 1380 | "source": [ 1381 | "plt.figure(figsize=(16,9))\n", 1382 | "sns.heatmap(trainDf.corr(), annot=True, cmap=\"cubehelix\")\n", 1383 | "plt.show()" 1384 | ] 1385 | }, 1386 | { 1387 | "cell_type": "code", 1388 | "execution_count": 31, 1389 | "metadata": {}, 1390 | "outputs": [ 1391 | { 1392 | "data": { 1393 | "text/plain": [ 1394 | "array([[,\n", 1395 | " ],\n", 1396 | " [,\n", 1397 | " ],\n", 1398 | " [, ]],\n", 1399 | " dtype=object)" 1400 | ] 1401 | }, 1402 | "execution_count": 31, 1403 | "metadata": {}, 1404 | "output_type": "execute_result" 1405 | }, 1406 | { 1407 | "data": { 1408 | "image/png": "\n", 1409 | "text/plain": [ 1410 | "
" 1411 | ] 1412 | }, 1413 | "metadata": { 1414 | "needs_background": "light" 1415 | }, 1416 | "output_type": "display_data" 1417 | } 1418 | ], 1419 | "source": [ 1420 | "trainDf.hist(figsize=(16,9))" 1421 | ] 1422 | }, 1423 | { 1424 | "cell_type": "markdown", 1425 | "metadata": {}, 1426 | "source": [ 1427 | "# Data type transformation" 1428 | ] 1429 | }, 1430 | { 1431 | "cell_type": "code", 1432 | "execution_count": 32, 1433 | "metadata": {}, 1434 | "outputs": [ 1435 | { 1436 | "data": { 1437 | "text/html": [ 1438 | "
\n", 1439 | "\n", 1452 | "\n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | "
SurvivedPclassSexSibSpParchFareEmbarked
PassengerId
103male107.2500S
211female1071.2833C
313female007.9250S
411female1053.1000S
503male008.0500S
603male008.4583Q
701male0051.8625S
803male3121.0750S
913female0211.1333S
1012female1030.0708C
\n", 1578 | "
" 1579 | ], 1580 | "text/plain": [ 1581 | " Survived Pclass Sex SibSp Parch Fare Embarked\n", 1582 | "PassengerId \n", 1583 | "1 0 3 male 1 0 7.2500 S\n", 1584 | "2 1 1 female 1 0 71.2833 C\n", 1585 | "3 1 3 female 0 0 7.9250 S\n", 1586 | "4 1 1 female 1 0 53.1000 S\n", 1587 | "5 0 3 male 0 0 8.0500 S\n", 1588 | "6 0 3 male 0 0 8.4583 Q\n", 1589 | "7 0 1 male 0 0 51.8625 S\n", 1590 | "8 0 3 male 3 1 21.0750 S\n", 1591 | "9 1 3 female 0 2 11.1333 S\n", 1592 | "10 1 2 female 1 0 30.0708 C" 1593 | ] 1594 | }, 1595 | "execution_count": 32, 1596 | "metadata": {}, 1597 | "output_type": "execute_result" 1598 | } 1599 | ], 1600 | "source": [ 1601 | "trainDf.head(10)" 1602 | ] 1603 | }, 1604 | { 1605 | "cell_type": "code", 1606 | "execution_count": 33, 1607 | "metadata": {}, 1608 | "outputs": [ 1609 | { 1610 | "data": { 1611 | "text/html": [ 1612 | "
\n", 1613 | "\n", 1626 | "\n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | "
PclassSexSibSpParchFareEmbarked
PassengerId
8923male007.8292Q
8933female107.0000S
8942male009.6875Q
8953male008.6625S
8963female1112.2875S
\n", 1695 | "
" 1696 | ], 1697 | "text/plain": [ 1698 | " Pclass Sex SibSp Parch Fare Embarked\n", 1699 | "PassengerId \n", 1700 | "892 3 male 0 0 7.8292 Q\n", 1701 | "893 3 female 1 0 7.0000 S\n", 1702 | "894 2 male 0 0 9.6875 Q\n", 1703 | "895 3 male 0 0 8.6625 S\n", 1704 | "896 3 female 1 1 12.2875 S" 1705 | ] 1706 | }, 1707 | "execution_count": 33, 1708 | "metadata": {}, 1709 | "output_type": "execute_result" 1710 | } 1711 | ], 1712 | "source": [ 1713 | "testDf.head()" 1714 | ] 1715 | }, 1716 | { 1717 | "cell_type": "code", 1718 | "execution_count": 34, 1719 | "metadata": {}, 1720 | "outputs": [], 1721 | "source": [ 1722 | "le = LabelEncoder()\n", 1723 | "# dtype transform of train dataset\n", 1724 | "trainDf.Sex = le.fit_transform(trainDf.Sex)\n", 1725 | "trainDf.Embarked = le.fit_transform(trainDf.Embarked)\n", 1726 | "\n", 1727 | "# dtype transform of test dataset\n", 1728 | "testDf.Sex = le.fit_transform(testDf.Sex)\n", 1729 | "testDf.Embarked = le.fit_transform(testDf.Embarked)" 1730 | ] 1731 | }, 1732 | { 1733 | "cell_type": "markdown", 1734 | "metadata": {}, 1735 | "source": [ 1736 | "# Feature Ranking" 1737 | ] 1738 | }, 1739 | { 1740 | "cell_type": "code", 1741 | "execution_count": 35, 1742 | "metadata": {}, 1743 | "outputs": [], 1744 | "source": [ 1745 | "# Extract the input variable and target variable\n", 1746 | "X = trainDf.drop('Survived', axis=1)\n", 1747 | "\n", 1748 | "Y = trainDf[['Survived']]\n", 1749 | "\n", 1750 | "# Store the column/feature names into a list \"colnames\"\n", 1751 | "colnames = list(trainDf.drop('Survived', axis=1))" 1752 | ] 1753 | }, 1754 | { 1755 | "cell_type": "code", 1756 | "execution_count": 36, 1757 | "metadata": {}, 1758 | "outputs": [], 1759 | "source": [ 1760 | "# Define dictionary to store our rankings\n", 1761 | "ranks = {}\n", 1762 | "# Create our function which stores the feature rankings to the ranks dictionary\n", 1763 | "def ranking(ranks, names, order=1):\n", 1764 | " minmax = MinMaxScaler()\n", 1765 | " ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0]\n", 1766 | " ranks = map(lambda x: round(x,2), ranks)\n", 1767 | " return dict(zip(names, ranks))" 1768 | ] 1769 | }, 1770 | { 1771 | "cell_type": "code", 1772 | "execution_count": 37, 1773 | "metadata": {}, 1774 | "outputs": [], 1775 | "source": [ 1776 | "# Construct Recursive Feature Elimination ( RFE ) of the Logistic Regression model\n", 1777 | "lr = LogisticRegression(random_state= 42) #lr = LinearRegression(normalize=True)\n", 1778 | "lr.fit(X,Y)\n", 1779 | "\n", 1780 | "#stop the search when only the last feature is left\n", 1781 | "rfe = RFE(lr, n_features_to_select=1, ) #verbose =3\n", 1782 | "rfe.fit(X,Y)\n", 1783 | "ranks[\"RFE\"] = ranking(list(map(float, rfe.ranking_)), colnames, order=-1)" 1784 | ] 1785 | }, 1786 | { 1787 | "cell_type": "code", 1788 | "execution_count": 38, 1789 | "metadata": {}, 1790 | "outputs": [ 1791 | { 1792 | "data": { 1793 | "text/plain": [ 1794 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 1795 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", 1796 | " multi_class='auto', n_jobs=None, penalty='l2',\n", 1797 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", 1798 | " warm_start=False)" 1799 | ] 1800 | }, 1801 | "execution_count": 38, 1802 | "metadata": {}, 1803 | "output_type": "execute_result" 1804 | } 1805 | ], 1806 | "source": [ 1807 | "# Using Logistic Regression\n", 1808 | "lr = LogisticRegression()\n", 1809 | "lr.fit(X,Y)\n", 1810 | "\n", 1811 | "#ranks[\"LogReg\"] = ranking(np.abs(lr.coef_), colnames)" 1812 | ] 1813 | }, 1814 | { 1815 | "cell_type": "code", 1816 | "execution_count": 39, 1817 | "metadata": {}, 1818 | "outputs": [], 1819 | "source": [ 1820 | "# Decision Tree Classifier\n", 1821 | "\n", 1822 | "dt = DecisionTreeClassifier()\n", 1823 | "dt.fit(X,Y)\n", 1824 | "ranks[\"DT\"] = ranking(dt.feature_importances_, colnames)" 1825 | ] 1826 | }, 1827 | { 1828 | "cell_type": "code", 1829 | "execution_count": 40, 1830 | "metadata": {}, 1831 | "outputs": [], 1832 | "source": [ 1833 | "# Random Forest Classifier\n", 1834 | "\n", 1835 | "rf = RandomForestClassifier(n_jobs=-1, n_estimators=9, ) #verbose=3\n", 1836 | "rf.fit(X,Y)\n", 1837 | "ranks[\"RF\"] = ranking(rf.feature_importances_, colnames)" 1838 | ] 1839 | }, 1840 | { 1841 | "cell_type": "markdown", 1842 | "metadata": {}, 1843 | "source": [ 1844 | "# Creating the Feature Ranking Matrix\n", 1845 | "We combine the scores from the various methods above and output it in a matrix form for convenient viewing as such:" 1846 | ] 1847 | }, 1848 | { 1849 | "cell_type": "code", 1850 | "execution_count": 41, 1851 | "metadata": {}, 1852 | "outputs": [ 1853 | { 1854 | "name": "stdout", 1855 | "output_type": "stream", 1856 | "text": [ 1857 | "\tDT\tRF\tRFE\tMean\n", 1858 | "Pclass\t0.22\t0.25\t0.8\t0.42\n", 1859 | "Sex\t1.0\t0.69\t1.0\t0.9\n", 1860 | "SibSp\t0.1\t0.03\t0.4\t0.18\n", 1861 | "Parch\t0.06\t0.07\t0.2\t0.11\n", 1862 | "Fare\t0.92\t1.0\t0.0\t0.64\n", 1863 | "Embarked\t0.0\t0.0\t0.6\t0.2\n" 1864 | ] 1865 | } 1866 | ], 1867 | "source": [ 1868 | "# Create empty dictionary to store the mean value calculated from all the scores\n", 1869 | "r = {}\n", 1870 | "for name in colnames:\n", 1871 | " \n", 1872 | " r[name] = round(np.mean([ranks[method][name] for method in ranks.keys()]), 2)\n", 1873 | " \n", 1874 | "methods = sorted(ranks.keys())\n", 1875 | "ranks[\"Mean\"] = r\n", 1876 | "methods.append(\"Mean\")\n", 1877 | " \n", 1878 | "print(\"\\t%s\" % \"\\t\".join(methods))\n", 1879 | "for name in colnames:\n", 1880 | " print(\"%s\\t%s\" % (name, \"\\t\".join(map(str, [ranks[method][name] for method in methods]))))" 1881 | ] 1882 | }, 1883 | { 1884 | "cell_type": "code", 1885 | "execution_count": 42, 1886 | "metadata": {}, 1887 | "outputs": [], 1888 | "source": [ 1889 | "# Put the mean scores into a Pandas dataframe\n", 1890 | "meanplot = pd.DataFrame(list(r.items()), columns= ['Feature','Mean Ranking'])\n", 1891 | "\n", 1892 | "# Sort the dataframe\n", 1893 | "meanplot = meanplot.sort_values('Mean Ranking', ascending=False)" 1894 | ] 1895 | }, 1896 | { 1897 | "cell_type": "code", 1898 | "execution_count": 43, 1899 | "metadata": { 1900 | "scrolled": true 1901 | }, 1902 | "outputs": [ 1903 | { 1904 | "data": { 1905 | "text/plain": [ 1906 | "" 1907 | ] 1908 | }, 1909 | "execution_count": 43, 1910 | "metadata": {}, 1911 | "output_type": "execute_result" 1912 | }, 1913 | { 1914 | "data": { 1915 | "image/png": "\n", 1916 | "text/plain": [ 1917 | "
" 1918 | ] 1919 | }, 1920 | "metadata": { 1921 | "needs_background": "light" 1922 | }, 1923 | "output_type": "display_data" 1924 | } 1925 | ], 1926 | "source": [ 1927 | "# Let's plot the ranking of the features\n", 1928 | "sns.factorplot(x=\"Mean Ranking\", y=\"Feature\", data = meanplot, kind=\"bar\", \n", 1929 | " size=5, aspect=1.9, palette='coolwarm')" 1930 | ] 1931 | }, 1932 | { 1933 | "cell_type": "code", 1934 | "execution_count": 44, 1935 | "metadata": {}, 1936 | "outputs": [ 1937 | { 1938 | "data": { 1939 | "text/html": [ 1940 | "
\n", 1941 | "\n", 1954 | "\n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | "
FeatureMean Ranking
1Sex0.90
4Fare0.64
0Pclass0.42
5Embarked0.20
2SibSp0.18
3Parch0.11
\n", 1995 | "
" 1996 | ], 1997 | "text/plain": [ 1998 | " Feature Mean Ranking\n", 1999 | "1 Sex 0.90\n", 2000 | "4 Fare 0.64\n", 2001 | "0 Pclass 0.42\n", 2002 | "5 Embarked 0.20\n", 2003 | "2 SibSp 0.18\n", 2004 | "3 Parch 0.11" 2005 | ] 2006 | }, 2007 | "execution_count": 44, 2008 | "metadata": {}, 2009 | "output_type": "execute_result" 2010 | } 2011 | ], 2012 | "source": [ 2013 | "meanplot = meanplot.sort_values('Mean Ranking', ascending=False)\n", 2014 | "meanplot" 2015 | ] 2016 | }, 2017 | { 2018 | "cell_type": "code", 2019 | "execution_count": 45, 2020 | "metadata": {}, 2021 | "outputs": [ 2022 | { 2023 | "data": { 2024 | "text/plain": [ 2025 | "['Sex', 'Fare', 'Pclass', 'Embarked']" 2026 | ] 2027 | }, 2028 | "execution_count": 45, 2029 | "metadata": {}, 2030 | "output_type": "execute_result" 2031 | } 2032 | ], 2033 | "source": [ 2034 | "columnName = meanplot.loc[meanplot['Mean Ranking'] >= 0.20]\n", 2035 | "columnName = list(columnName.Feature)\n", 2036 | "columnName" 2037 | ] 2038 | }, 2039 | { 2040 | "cell_type": "markdown", 2041 | "metadata": {}, 2042 | "source": [ 2043 | "# Predictive Modeling" 2044 | ] 2045 | }, 2046 | { 2047 | "cell_type": "code", 2048 | "execution_count": 46, 2049 | "metadata": {}, 2050 | "outputs": [], 2051 | "source": [ 2052 | "# Extract the input variable and target variable\n", 2053 | "X = trainDf[columnName]\n", 2054 | "y = trainDf[['Survived']]\n", 2055 | "\n", 2056 | "testDf = testDf[columnName]\n" 2057 | ] 2058 | }, 2059 | { 2060 | "cell_type": "code", 2061 | "execution_count": 47, 2062 | "metadata": {}, 2063 | "outputs": [], 2064 | "source": [ 2065 | "# split original data [i.e X and y] into 70:30 \n", 2066 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)" 2067 | ] 2068 | }, 2069 | { 2070 | "cell_type": "markdown", 2071 | "metadata": {}, 2072 | "source": [ 2073 | "### Utility functions" 2074 | ] 2075 | }, 2076 | { 2077 | "cell_type": "code", 2078 | "execution_count": 48, 2079 | "metadata": {}, 2080 | "outputs": [], 2081 | "source": [ 2082 | "def classifier_report(ModelName, model_object):\n", 2083 | " model_object.fit(X_train, y_train)\n", 2084 | " y_test_pred = model_object.predict(X_test)\n", 2085 | " print(ModelName, \"Classifier Report:\")\n", 2086 | " print(\"\\n\", metrics.classification_report(y_test, y_test_pred))\n", 2087 | " # Compute confusion matrix\n", 2088 | " print(\"\\n\\nConfusion_matrix: \\n\")\n", 2089 | " cnf_matrix = metrics.confusion_matrix(y_test, y_test_pred)\n", 2090 | " ax= plt.subplot()\n", 2091 | " sns.heatmap(cnf_matrix, annot=True, ax = None, fmt= '.1f' , cmap= 'Blues', linewidths=0.5); #annot=True to annotate cells " 2092 | ] 2093 | }, 2094 | { 2095 | "cell_type": "code", 2096 | "execution_count": 49, 2097 | "metadata": {}, 2098 | "outputs": [ 2099 | { 2100 | "name": "stdout", 2101 | "output_type": "stream", 2102 | "text": [ 2103 | "Logistic Regression Classifier Report:\n", 2104 | "\n", 2105 | " precision recall f1-score support\n", 2106 | "\n", 2107 | " 0 0.81 0.78 0.79 157\n", 2108 | " 1 0.70 0.75 0.72 111\n", 2109 | "\n", 2110 | " accuracy 0.76 268\n", 2111 | " macro avg 0.76 0.76 0.76 268\n", 2112 | "weighted avg 0.77 0.76 0.77 268\n", 2113 | "\n", 2114 | "\n", 2115 | "\n", 2116 | "Confusion_matrix: \n", 2117 | "\n" 2118 | ] 2119 | }, 2120 | { 2121 | "data": { 2122 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVj0lEQVR4nO3deXhV9bXG8e9KQsSAJCIkKDiA4ly8thRE1FpnqQq21qK1MmjjjFwr4gxXq7de+1ir1iGKc51qragdrA+tM4NUrSBgpaASCiQaSJgzrftHjhgJSU4OJ+eXvXk/PvvJ2fvsYUXjm1/WHo65OyIiknlZoQsQEdlWKYBFRAJRAIuIBKIAFhEJRAEsIhJITgaOocssRCRZtrU72P7gi5POnPXv3bXVx9samQhgtj/44kwcRiJi/Xt3AbCiqiZwJdKRFHXrFLqEjFMLQkTixbKSn1rbldmDZlZmZnMbLbvVzBaY2Qdm9gczK2j03lVmttDMPjKz41vbvwJYROIlKzv5qXUPAydstuwV4EB3HwD8C7gKwMz2B0YCByS2udvMWjyIAlhE4sUs+akV7v46ULHZsr+6e21idgbQJ/F6OPCUu29098XAQmBQS/tXAItIvLShBWFmxWY2u9FU3MajjQX+nHjdG1jS6L3SxLJmZeQknIhIxiQxsv2Su5cAJakdxq4BaoHfprI9KIBFJG6SOLm21YcwGw2cBBztXz3RbCmwa6PV+iSWNUstCBGJlzT2gLe8ezsBuAI4xd3XNXrrBWCkmW1nZn2B/sCslvalEbCIxEtyVzckxcyeBI4EephZKTCJhqsetgNesYYQn+Hu57v7h2b2DDCPhtbERe5e19L+FcAiEi9pbEG4+xlbWDylhfVvAm5Kdv8KYBGJlxRbCyEogEUkXjJwEi5dFMAiEi8KYBGRQLLTdxKuvSmARSRe1AMWEQlELQgRkUA0AhYRCUQjYBGRQDQCFhEJJI23Irc3BbCIxItaECIigagFISISiEbAIiKBKIBFRALRSTgRkUDUAxYRCUQtCBGRQDQCFhEJwxTAIiJhKIBFRAKxLAWwiEgQGgGLiASiABYRCUQBLCISSnTyVwEsIvGiEbCISCBZWboTTkQkCI2ARURCiU7+KoBFJF40AhYRCUQBLCISiG5FFhEJRCNgEZFAFMAiIoEogEVEAlEAi4iEEp38JTr37ImIJCErKyvpqTVm9qCZlZnZ3EbLupvZK2b2ceLrjonlZmZ3mNlCM/vAzL7Zaq1b9Z2KiHQwZpb0lISHgRM2W3YlMM3d+wPTEvMAJwL9E1MxcE9rO1cAi0i8WBumVrj760DFZouHA48kXj8CjGi0/FFvMAMoMLOdW9q/esApunfSjznxiAMpr1jNwB/eDMDN40cw7IgDqa6pY3Hp5xRPepzKNes5avC+3DjuFHI75VBdU8vVtz/Pa+/8q8k+d+yWx2O3jGX3Xbrz6X8qOOuKKaxavT7T35qkycaNG7mkeBQ1NdXU1dZx5NHHMva8i7l58jW8/95sunbpCsBVk26i/z77Ntn+zy9N5dEH7wPg7LHnceJJwzNaf1S15SScmRXTMFr9Uom7l7SyWZG7L0u8Xg4UJV73BpY0Wq80sWwZzVAAp+ixF2dw79Ov8cCNZ29aNm3GAq678wXq6ur5+bjhTBh7HNfeMZUvVq3htPH3say8kv333JkX776IPY+/tsk+Lx9zLK/O+ohfPvQKl485lsvHNGwv0ZSbm8vt9zxIXl4etbU1XHTu2Qw+9HAALhz3M448+rhmt62qrOTh++/h/kefxgzO/cmPOOyII9mhW36myo+stgRwImxbC9yWtncz81S3b7UFYWb7mtnERHP5jsTr/VI9YFy89e6/qahc97Vl02YsoK6uHoBZcxbTu6gAgH9+VMqy8koA5v17GZ2360Rup6a/+046cgCPvzgTgMdfnMnJ3x3Qjt+BtDczIy8vD4Da2lpqa2uTDodZM95i4OAhdMvPZ4du+QwcPISZ099qz3JjI8094C1Z8WVrIfG1LLF8KbBro/X6JJY1q8UANrOJwFM0dEtmJSYDnjSzK1vadlt39vAhvPzWvCbLTz3mv3h/wRKqa2qbvFe40w4s/7wKgOWfV1G40w7tXqe0r7q6Osae+QOGH3cEAwcPYf8DG36p3n/3HYw+41TuvO0Wqqurm2xXXraCwqJem+YLC4soL1uRsbqjzLIs6SlFLwCjEq9HAVMbLT87cTXEIUBlo1bFFrXWgjgHOMDdaxovNLPbgA+BX2xpo8Z9lfvuu6+VQ8TPFeccT11dPU/96Z2vLd+vXy9+Pm44J134m6T24yn/YSMdRXZ2Ng8+8XtWr67i2gmXsmjhxxRfPJ6ddupBTU0Nt948mScemcLon14QutTYSOeNGGb2JHAk0MPMSoFJNOTeM2Z2DvApcHpi9T8Bw4CFwDpgTGv7b60FUQ/ssoXlOyfe2yJ3L3H3ge4+sLi4uLnVYumskwcz7IgDGX3Nw19b3ruwgKdvK+bc6x5jcennW9y27IvV9OrRDYBePbpRXrG6vcuVDNlhh24c/K1BzJz+Jj169MTMyM3NZdjJI5g/b06T9XsWFlG2Yvmm+bKyFfQsLGqynjSVzhaEu5/h7ju7eyd37+PuU9z9C3c/2t37u/sx7l6RWNfd/SJ339Pdv+Hus1vbf2sBPB6YZmZ/NrOSxPQXGq59u7T1fxXblmMP3Y/LRh/DaePvY/2Gr/5oyO+6Pc/deT7X3TGV6f9c1Oz2f3xtDmedPBhoCPKXXv2g3WuW9rNqZQWrVze0lDZu2MDsWdPZfY++fP55OQDuzhuv/o2+/fo32XbQIUN5Z+bbrK6qZHVVJe/MfJtBhwzNaP1RZZb8FJp5K3/nmlkWMIiGyymgoan8jrvXJXkM3/7gi1OvsIN65H9Hc/i3+tOjoCtlFVXceO+fmDDmOLbLzeGLyrUAzJrzCeNueoqJ5x7PhLHHsfCz8k3bn3zBXZSvXMPd15/JA8++ybvzPqN7fhcev2Usu+68I58tq+CsKx5kZdW65kqIrPXv3QXAiqqaVtaMtn9//BE3T76Guvo6vN757jHHM/qnF3DpBWNZtXIluLPX3vvws6smkZeXx4J5c5n63DNMvPYGAP74wnM8/tD9APxkTDHDTjk15LfT7oq6dYI03Ejcf8Jfkm7efXzrCUFjuNUAToNYBrCkblsJYGmbdAXwPhNfTjrUPrrl+KABrOuARSRWOkJrIVkKYBGJlSx9JJGISBgaAYuIBKIHsouIBBKh/FUAi0i8JPOg9Y5CASwisaIRsIhIIOoBi4gEEqH8VQCLSLxoBCwiEkiE8lcBLCLxojvhREQCUQtCRCSQCOWvAlhE4kUjYBGRQCKUvwpgEYkXnYQTEQlELQgRkUAUwCIigUQofxXAIhIvGgGLiAQSofxVAItIvOgqCBGRQLIiNARWAItIrEQofxXAIhIvOgknIhJIhFrACmARiRedhBMRCcRQAIuIBBGhAbACWETiRSfhREQCiVD+KoBFJF50I4aISCBRugoiK3QBIiLpZJb81Pq+7L/N7EMzm2tmT5pZZzPra2YzzWyhmT1tZrmp1qoAFpFYyTJLemqJmfUGxgED3f1AIBsYCdwC/Mrd9wJWAuekXGuqG4qIdETWhikJOcD2ZpYD5AHLgKOAZxPvPwKMSLVWBbCIxIqZJT21xN2XAr8EPqMheCuBfwCr3L02sVop0DvVWhXAIhIrWZb8ZGbFZja70VT85X7MbEdgONAX2AXoApyQzlp1FYSIxEpbroJw9xKgpJm3jwEWu3s5gJk9BwwFCswsJzEK7gMsTbnWVDcUEemI0tWCoKH1cIiZ5VnDykcD84C/A6cl1hkFTE21VgWwiMRKW1oQLXH3mTScbHsXmENDXpYAE4HLzGwhsBMwJdVa1YIQkVhJ57Mg3H0SMGmzxYuAQenYvwJYRGIlOvfBKYBFJGayI3QrsgJYRGJFj6MUEQkkQvmrABaReNHjKDez/r27MnEYiZiibp1ClyAxFKH81QhYROJFPeDNbKhtfR3ZdnRO/NSNeGB22EKkQ3n+3IFp2U+2AlhEJIwIXYWmABaReFEAi4gEoh6wiEggGgGLiAQSoQGwAlhE4iUnQgmsABaRWIlQ/iqARSRedCuyiEggEcpfBbCIxIuughARCUQPZBcRCSRC+asAFpF4sQh9KpwCWERiRSNgEZFAFMAiIoHoYTwiIoFkZ4WuIHkKYBGJFd0JJyISiHrAIiKBRGgArAAWkXjJ0nXAIiJhaAQsIhJIToSawApgEYkVjYBFRALRZWgiIoFEKH8VwCISLxG6EU4BLCLxohaEiEggCmARkUCiE7/RapeIiLTKLPmp9X1ZgZk9a2YLzGy+mQ0xs+5m9oqZfZz4umOqtSqARSRWzCzpKQm/Bv7i7vsCBwHzgSuBae7eH5iWmE+JAlhEYiWrDVNLzCwfOAKYAuDu1e6+ChgOPJJY7RFgxNbUKiISG1lmSU9mVmxmsxtNxY121RcoBx4ys/fM7AEz6wIUufuyxDrLgaJUa9VJOBGJlbZ8JJG7lwAlzbydA3wTuMTdZ5rZr9ms3eDubmaeaq0aAYtIrKSrBQGUAqXuPjMx/ywNgbzCzHYGSHwt25paRURiI10n4dx9ObDEzPZJLDoamAe8AIxKLBsFTE21VrUgRCRW0nwd8CXAb80sF1gEjKFh4PqMmZ0DfAqcnurOFcAiEivZabwTzt3fBwZu4a2j07F/BbCIxEqE7kRWAItIvFiEbkZWAItIrGgELCISiD4VWUQkEI2ARUQC0fOARUQCidCn0iuARSRedBWEiEggEepAKIDTYfmyZVxz1RVUfPEFmHHaD0/nxz8ZxYL58/n5DZOo3riR7Jxsrr52Mt8YMKDJ9i88/wfuv+8eAH563gWcMuLUTH8L0k5OPrCIY/fpgTt8unIdd77+Cecduht79uiCGfyncgN3vPYJG2rrm2z7g4N6cczePah3uH/6Z7y/tCrAdxA9GgFvY7Jzsrn8iivZb/8DWLt2DSN/+AMOGTKUX912K+dfeBGHHf4d3nj9NW6/7VamPPzY17atXLWKe++5iyef/j1mxsjTv8+R3z2Kbvn5gb4bSZfueZ046YBCLnl2LtV1zoSj+nF4v+5MmbGE9TUNgTtmcB+G7V/Icx8s/9q2fQo6c1i/7lzy+w/pnteJG4btzYW/m0t9yg8+3HZEqQesp6GlQc+ehey3/wEAdOnSlX79+lFWtgLDWLNmLQBrVq+mZ8/CJtu+/dabHDJkKPkFBXTLz+eQIUN56803Mlq/tJ9sM3JzssgyyM3JomJdzabwhYZlW8rUwbsX8OaiCmrrnbI11Syr2kj/nl0yV3iEteWB7KFpBJxmS5eWsmD+fL4x4CCuuPJqLig+h9t+eQv19fU8+tunmqxfVraCXr16bZovKiqirGxFJkuWdlKxrobn5yzn/pEDqK6t5/2lVZvaCJccsQff2jWfJSvX89CM0ibbds/L5V/lazbNf7G2mu55ucDaTJUfWeFjNXkpj4DNbEwL7236mI+SkuYeNh8/69au5WfjxzHhyqvp2rUrzzz9JBMmXsVfp73GhIlXMfm6a0KXKBnUJTebQbsXcN7Tcxj7xAd0zsniO3t1B+DO1z9h7BP/pHTVBg7rl/KH6soWRGkEvDUtiP9p7g13L3H3ge4+sLi4uLnVYqWmpobLxo9j2PdO5phjjwPgxal/4OjE6+OOP5G5cz5osl1hYRHLl3/V/1uxYgWFhSl/xJR0IAf17kbZ6o1Ubailzp3pn6xi38Kum96vd3hjUQVD+jYN4Ip11fTokrtpfqcuuVSsq85I3VFnbZhCazGAzeyDZqY5bMUH0cWNuzP5+mvo168fZ4/+6g+DnoWFzH5nFgCzZs5gt933aLLtoUMPY/rbb1JVWUlVZSXT336TQ4celqnSpR2Vr6lm78Ku5GY3/G82YJcdKF21gV7dttu0zqDdCli6akOTbWd9uorD+nUnJ8so7JrLzt0683G52g9JiVACt9YDLgKOB1ZuttyAt9ulogh6791/8NILU+m/996c/v3hAFwy/jKun3wj//eLm6mrrSV3u+24fvINAHw4dw6/e+YpJt9wE/kFBRSffyFn/ug0AM674CLyCwpCfSuSRh+Xr+XtxSu57dT9qKuHxV+s4+UF5dz4vX3I65QFGJ9UrOPetz4F4Nu75bNXjy48+e5/WLJqA28tXsldpx1AXT2UvP2proBIUkdoLSTL3Jv/r2pmU4CH3P3NLbz3hLufmcQxfEPtVlQosdM58Wt/xAOzwxYiHcrz5w6ENIxL31lUmfSvqm/3yw+a1i2OgN39nBbeSyZ8RUQyKzoDYF2GJiLxojvhREQCiVALWAEsIvESofxVAItIvFiEhsAKYBGJlQjlrwJYROIlQvmrABaRmIlQAiuARSRWdBmaiEgg6gGLiASiABYRCUQtCBGRQDQCFhEJJEL5qwAWkZiJUAIrgEUkVqL0QHYFsIjESnTiVwEsInEToQRWAItIrETpMrSt+Vh6EZEOxyz5Kbn9WbaZvWdmLyXm+5rZTDNbaGZPm1luqrUqgEUkVtrhU+kvBeY3mr8F+JW770XDJ8Y3+9mZrVEAi0ismFnSUxL76gN8D3ggMW/AUcCziVUeAUakWqsCWERipS0tCDMrNrPZjabizXZ3O3AFUJ+Y3wlY5e61iflSoHeqteoknIjESltOwbl7CVCyxf2YnQSUufs/zOzINJTWhAJYROIlfRdBDAVOMbNhQGegG/BroMDMchKj4D7A0lQPoBaEiMSKteGflrj7Ve7ex933AEYCf3P3HwN/B05LrDYKmJpqrQpgEYmVdF+GtgUTgcvMbCENPeEpqe5ILQgRiZWsdrgPw91fBV5NvF4EDErHfhXAIhIz0bkTTgEsIrESoYehKYBFJF4ilL8KYBGJF42ARUQCSeYW445CASwisRKd+FUAi0jMRGgArAAWkXiJ0gPZFcAiEi/RyV8FsIjES4TyVwEsIvGij6UXEQkkQvmrp6GJiISiEbCIxEqURsAKYBGJFV2GJiISiEbAIiKBKIBFRAJRC0JEJBCNgEVEAolQ/iqARSRmIpTA5u7tfYx2P4CIxMZWx+eG2uQzp3NO2LjORABLgpkVu3tJ6DqkY9HPxbZLtyJnVnHoAqRD0s/FNkoBLCISiAJYRCQQBXBmqc8nW6Kfi22UTsKJiASiEbCISCAKYBGRQBTAGWJmJ5jZR2a20MyuDF2PhGdmD5pZmZnNDV2LhKEAzgAzywZ+A5wI7A+cYWb7h61KOoCHgRNCFyHhKIAzYxCw0N0XuXs18BQwPHBNEpi7vw5UhK5DwlEAZ0ZvYEmj+dLEMhHZhimARUQCUQBnxlJg10bzfRLLRGQbpgDOjHeA/mbW18xygZHAC4FrEpHAFMAZ4O61wMXAy8B84Bl3/zBsVRKamT0JTAf2MbNSMzsndE2SWboVWUQkEI2ARUQCUQCLiASiABYRCUQBLCISiAJYRCQQBbCISCAKYBGRQP4foAFULroBnqEAAAAASUVORK5CYII=\n", 2123 | "text/plain": [ 2124 | "
" 2125 | ] 2126 | }, 2127 | "metadata": { 2128 | "needs_background": "light" 2129 | }, 2130 | "output_type": "display_data" 2131 | } 2132 | ], 2133 | "source": [ 2134 | "#Logistic Regression\n", 2135 | "\n", 2136 | "lr = LogisticRegression()\n", 2137 | "classifier_report(\"Logistic Regression\", lr)" 2138 | ] 2139 | }, 2140 | { 2141 | "cell_type": "code", 2142 | "execution_count": 50, 2143 | "metadata": {}, 2144 | "outputs": [ 2145 | { 2146 | "name": "stdout", 2147 | "output_type": "stream", 2148 | "text": [ 2149 | "Decision Tree Classifier Report:\n", 2150 | "\n", 2151 | " precision recall f1-score support\n", 2152 | "\n", 2153 | " 0 0.81 0.89 0.85 157\n", 2154 | " 1 0.81 0.71 0.76 111\n", 2155 | "\n", 2156 | " accuracy 0.81 268\n", 2157 | " macro avg 0.81 0.80 0.80 268\n", 2158 | "weighted avg 0.81 0.81 0.81 268\n", 2159 | "\n", 2160 | "\n", 2161 | "\n", 2162 | "Confusion_matrix: \n", 2163 | "\n" 2164 | ] 2165 | }, 2166 | { 2167 | "data": { 2168 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAV6ElEQVR4nO3deXwV5b3H8c8viWxCEhYTkChii1pUFFkuiDtWcSvUV6tY2yJyGxG0drkVsCqotWL1VWu1alM3VBTRoqhXrV6qdUEQkKoIiqhFQCBsCUHZQn73j4wxkJCcHE7OZIbv29e8klnOM8/R+M0vz5l5xtwdERFJv4ywOyAisrdSAIuIhEQBLCISEgWwiEhIFMAiIiHJSsM5dJmFiCTK9rSBlj0vSzhzNs+/c4/PtyfSEcC07HlZOk4jEbF5/p0AbCkPuSPSpLRISxo1LXvhWxaRWLPojKwqgEUkXjIyw+5BwhTAIhIvFuqwboMogEUkXjQEISISElXAIiIhUQUsIhISVcAiIiHRVRAiIiHREISISEg0BCEiEhJVwCIiIVEAi4iEJFMfwomIhENjwCIiIdEQhIhISFQBi4iERBWwiEhIVAGLiIREtyKLiIREQxAiIiHREISISEhUAYuIhEQBLCISEn0IJyISEo0Bi4iEREMQIiIhUQUsIhIOUwCLiIQjSgEcncESEZEEWIYlvNTbltn9ZlZsZguqbbvFzD40s/fM7Ckzy622b5yZLTGzj8zs9PraVwCLSKyYWcJLAh4EBu2y7WXgCHfvASwGxgXn7Q4MBQ4PXnOXmdV5TZwCWERiJZUB7O6vAet32faSu5cHq7OAguD7wcAUd9/q7p8BS4C+dbWvABaRWElxBVyfi4EXgu87A8uq7VsebNstBbCIxIslvphZoZnNrbYUJnwas98C5cDkZLuqqyBEJFYaUtm6exFQlMQ5LgLOBga6uwebVwAHVDusINi2W6qARSRWMjIyEl6SYWaDgCuB77n7V9V2PQMMNbPmZtYV6Aa8XVdbqoBFJFZSeR2wmT0GnAR0MLPlwHgqr3poDrwcnGuWu4909w/MbCqwkMqhidHuvqOu9hXAIhIvKbwPw90vqGXzfXUcfyNwY6LtK4BFJFaidCecAlhEYkUBLCISkkRuMW4qFMAiEiuqgEVEQqIAFhEJiQJYRCQkCmARkbBEJ38VwCISL8neYhwGBbCIxIqGIEREwhKd/FUAJ+ue8RdyxglHsGZ9Gb1/+HsArh11Fmef2IMKd9asL6Nw/COsXFNKbpuW/HXCj+la0IGt27ZzyYTJLPxkZY02u+zfnocnDqddzr7MX/Q5F1/9ENvL65zLQ5qwa68ex2v/epV27dozbfpzAHy4aBG/u34827ZuJTMrk6uunsCRPXrUeO0zTz/F3/56NwA/u+RSvjfk+2nte5RFqQKOzmBJE/Pws7MYPPovO227bdIM+p5/E/2GTuSF1xcwrvAMAK4ccTrvfrScvuffxIhrHubW3/yg1jZvvGIwd0x+hSMGX8eGss1c9P3+jf4+pPEMHnIud//13p223fbHWxg5ajRTp01n1GVX8Kc/3lLjdaUlJdxz95088thUJk95gnvuvpONpaXp6nbkpfmJGHuk3gA2s8PMbIyZ/TlYxpjZd9LRuabszXc+YX3pVzttK/tyS9X3rVo25+t5mg87uCP/mrMYgMX/WU2X/duR165NjTZP7HMI0/5vPgCTn53NOScd1VjdlzTo1bsP2Tk5O20zjE2bvgRgU1kZ++2XV+N1M998g379B5CTm0t2Tg79+g/gzTdeT0uf4yBKAVznEISZjQEuAKbwzcTCBcBjZjbF3Sc2cv8iZ8Loc7jw7L6UbtrMoMI/A/D+4hUMPuUo3pz/Cb0P78KBndrROT+X4vVlVa9rn7svpWWb2bGjAoAVqzewf15OreeQ6Lpy7FVcWjiCP956MxUVFTw0eUqNY4qLV9OxY8eq9fz8fIqLV6ezm5EWpbkg6quARwB93H2iuz8SLBOpfNLniN29qPpzloqKGvy0j0ib8Jdn6XbGNUx5YS4jzz8BgFsfeJmcNq2YNWUslw49kXc/Wl4VtLJ3mfr4Y/xmzDhemvEvfjNmHBOu+W3YXYqdKFXA9QVwBbB/Lds7Bftq5e5F7t7b3XsXFib8jLtYefz5OQwZeDRQOTRxyYRH6Dd0IiOueYgObVvz2Yp1Ox2/ruRLctq0JDOz8j9J5/y2fFGscb+4eXb6Uwz87mkAnHb6GSx4/70ax+Tl5bNq1aqq9dWrV5OXl5+2PkZdnAL4F8AMM3vBzIqC5UVgBnBFo/cuYr514H5V3599Ug8W/6fyz8ac1i3ZJysTgOHfP5Y33lmy03jx116bu5hzT+0JwIXn/BfPvVrzf06Jtv3y8pg7p3I07+3Zsziwy0E1jjl2wHG8NfMNNpaWsrG0lLdmvsGxA45Lc0+jyyzxJWx1jgG7+4tmdgiVQw5fP99+BTCnvmcdxd2kmy7i+F7d6JDbmiUv3sAN9zzPoOMOp1uXPCoqnM9XrufnN1aO7x12cEf+dv1PcHcWfbKSkdd98xTrp+64lFHXP8rKNaX89vbpPDxxOONHnc27Hy3jwaffCuvtSQqM+Z9fMXfO25SUbOC7p5zApaMv59oJN/CHib9nR3k5zZo359oJ1wPwwYL3eWLqFCZcfyM5ubkUjhzFj86vvFrmkktHk5ObG+I7iZamUNkmyr55onKj8ZY9L2vsc0iEbJ5/JwBbykPuiDQpLSrLwT1Oz0PH/CPhUPvo5tNDTWvdiCEisRKhAlgBLCLxkhGhy9AUwCISK6qARURCEqUP4RTAIhIrEcpfBbCIxIsmZBcRCUmUKuDo/KoQEUlAKm9FNrP7zazYzBZU29bOzF42s4+Dr22D7RbMGLnEzN4zs2Pqa18BLCKxkuJbkR8EBu2ybSwww927UTktw9hg+xlAt2ApBO6ur3EFsIjESiorYHd/DVi/y+bBwKTg+0nAkGrbH/JKs4BcM+tUV/sKYBGJlYZUwNWnzg2WRKZvzHf3r58ptgr4eqq6zsCyasct55s5dGqlD+FEJFYaciecuxcBSU9a7u5uZklPqKMAFpFYScONGKvNrJO7rwyGGIqD7SuAA6odVxBs2y0NQYhIrKRhPuBngGHB98OA6dW2/zS4GqIfUFptqKJWqoBFJFZSWQGb2WPASUAHM1sOjAcmAlPNbASwFDgvOPx54ExgCfAVMLy+9hXAIhIrqRyBcPcLdrNrYC3HOjC6Ie0rgEUkVjQdpYhISDQbmohISBTAIiIhiVD+KoBFJF5UAYuIhCRC+asAFpF40VUQIiIhyYhQCawAFpFYiVD+KoBFJF70IZyISEgiNASsABaReNGHcCIiITEUwCIioYhQAawAFpF40YdwIiIhiVD+KoBFJF50I4aISEh0FYSISEgiVAArgEUkXjQEISISkujErwJYRGJGl6GJiIQkQp/BKYBFJF50FYSISEg0BCEiEpIIFcAKYBGJlyhVwBlhd0BEJJWsAUu9bZn90sw+MLMFZvaYmbUws65mNtvMlpjZ42bWLNm+KoBFJFYyMyzhpS5m1hn4OdDb3Y8AMoGhwM3Abe7+bWADMCLZviqARSRWzCzhJQFZQEszywJaASuBU4Ang/2TgCHJ9lUBLCKxYtaQxQrNbG61pfDrdtx9BXAr8DmVwVsKzANK3L08OGw50DnZvupDOBGJlYbMBeHuRUBRbfvMrC0wGOgKlABPAIP2vIffSEsAb55/ZzpOIxHTQr/+pRGk8CKIU4HP3H1NZbs2DRgA5JpZVlAFFwArkj2BhiBEJFZSOAb8OdDPzFpZ5cEDgYXAK8APgmOGAdOT7WtaapCl67am4zQSEV3aNwdg1LSFIfdEmpK7zu2eknYyU1QCu/tsM3sSeAcoB+ZTOVzxv8AUM/tdsO2+ZM+hPwJFJFZSeSecu48Hxu+y+VOgbyraVwCLSKzoVmQRkZBE6VZkBbCIxIoqYBGRkESoAFYAi0i8ZEUogRXAIhIrEcpfBbCIxIseSy8iEpII5a8CWETiRVdBiIiEpL6J1psSBbCIxEqE8lcBLCLxYgk97a1pUACLSKyoAhYRCYkCWEQkJJqMR0QkJJkRes6PAlhEYkV3womIhERjwCIiIYlQAawAFpF4ydB1wCIi4VAFLCISkqwIDQIrgEUkVlQBi4iERJehiYiEJEL5qwAWkXiJ0I1wCmARiZcoDUFE6ZeFiEi9MswSXupjZrlm9qSZfWhmi8ysv5m1M7OXzezj4GvbpPua7AtFRJoia8CSgNuBF939MOAoYBEwFpjh7t2AGcF6UhTAIhIrZokvdbdjOcAJwH0A7r7N3UuAwcCk4LBJwJBk+6oAFpFYMbOGLIVmNrfaUlitqa7AGuABM5tvZvea2b5AvruvDI5ZBeQn21d9CCcisdKQqtLdi4Ci3ezOAo4BLnf32WZ2O7sMN7i7m5kn11NVwCISMyn8EG45sNzdZwfrT1IZyKvNrBNA8LU46b4m+0IRkaaoIUMQdXH3VcAyMzs02DQQWAg8AwwLtg0DpifbVw1BiEispLiqvByYbGbNgE+B4cEppprZCGApcF6yjSuARSRWUvlQTnf/N9C7ll0DU9G+AlhEYiU698EpgEUkZjIjdCuyAlhEYiVC+asAFpF4sQgNQiiARSRWVAGLiIRET0UWEQmJKmARkZBEaUJ2BbCIxEqEnkqvABaReNFVECIiIYnQCIQCOBW2bd3Kr0cNZ/v2bezYsYPjTz6Vn/73aG6aMJaPP/yAzMwsDut+JFeMuYasrH1qvP6l56fz6IN/A+BHF/2M084cnO63II0gr3UzRvQtqFrvsO8+PLdwDYvXfskFR3eieVYG67/azgNzVrClvKLG67vn78sPe3TEzJj5nw28tHhdOrsfWaqA9zL7NGvGH+64l5atWlFevp1fjhxGn37HMfC0sxg7/iYAbho/hheemcY5556/02s3bizlkfvv4c77p2AYoy8+n/7HnUyb7Oww3oqkUPGmbdz0z0+ByvkJfn/mIbz7RRk/61fAtPdX8/Har+jfJZdTD2nPcwvX7PRaA84/qhN/fmMpJZu3M+bkg3lvZRmryral/41ETJTGgDUfcAqYGS1btQKgvLycHeXlYEbfY4+vmnf00O5HsrZ4dY3Xzpv1Jsf06U92dg5tsrM5pk9/5s56I91vQRrZYXn7svbLbazfvJ281s34eO1XAHxYvIme+9f8ZXtQu5as+XIb677azg6HectLOapTm3R3O5JS+VTkRu9r2B2Iix07djBy2A8576yTOKZPf75zeI+qfeXl25nx4rP07jegxuvWri1mv7yOVesd8vJZuzbpCfaliepVkM3cZaUArNy4tSpMe3bOpm3Lmn+I5rbIYsPm7VXrGzaXk9Oy5vCV1JTipyI3qqQD2MyG17Gv6kF3RUW7e9xSvGRmZnLPpCd49OmX+WjRAj775OOqfXfcciNHHt2LI4/uFWIPJSyZBj06teGdFRsBeHjeF5xwcFvGntyVFlkZlFck/UgxqUWUKuA9GQO+Dnigth27POjOl67bugeniZbWbbI56pg+zJ39Jl2/1Y2H77ubkpINjB9zba3Hd+iQx7vz51atry1ezVE9a5v/WaLq8I6tWVayhbKtOwBYvWkbd7z5OVD5Qd0RHWsOLZRsKadttYq3bcssSqtVxLJ74cdq4uqsgM3svd0s77MHj2KOm5IN69lUVlndbN26hXfmvMUBXbrywjN/Z97smVx1/c1kZNT+r7pXvwHMe3smZRs3UrZxI/PenkmvWoYqJLp6F+QwZ3lp1Xrr5plAZVCccWgHXv9sQ43XLN2wmbzWzWjfah8yDXoV5PDeyk3p6nK0RWgMor4KOB84Hdj1J8SAmY3Sowhav24tt9xwNRUVO6ioqODEgafTb8CJDDq+J/n5nbii8CcAHHfiQH588UgWL/qA556eyq/GXUd2dg4XDr+Ey0dcAMCPh48kOzsnzLcjKdQs0zgsb18enb+yalufghxOOLgtAP/+ooy3lpYAkNMiiwuP6cRdM5dR4fD4v1dx2YADyTDjraUlrCzbe/6S3BNNYWghUea++/EnM7sPeMDda3wsb2aPuvuPEjjHXjUEIfXr0r45AKOmLQy5J9KU3HVud0hBXTrn09KEB9X7HJwTalrXWQG7+4g69iUSviIi6RWdAlg3YohIvOhOOBGRkERoCFgBLCLxEqH8VQCLSLxYhEpgBbCIxEqE8lcBLCLxEqH81WQ8IhIzKb4TzswyzWy+mT0XrHc1s9lmtsTMHjezZsl2VQEsIrFiDfgnQVcAi6qt3wzc5u7fpvIu4d3eL1EfBbCIxIpZ4kv9bVkBcBZwb7BuwCnAk8Ehk4AhyfZVASwisdKQAK4+dW6wFO7S3J+AK4GvnxnVHihx9/JgfTnQOdm+6kM4EYmVhtwJt8vUuTu3Y3Y2UOzu88zspJR0bhcKYBGJlRRehjYA+J6ZnQm0ALKB24FcM8sKquACYEWyJ9AQhIjESqougnD3ce5e4O4HAUOBf7r7hcArwA+Cw4YB05PtqwJYROKl8SdkHwP8ysyWUDkmfF+yDWkIQkRipTEmZHf3V4FXg+8/Bfqmol0FsIjESpTuhFMAi0i8RCiBFcAiEiuakF1EJCSaDU1EJCQRyl8FsIjEiyZkFxEJSYTyVwEsIvESofxVAItIzEQogRXAIhIrugxNRCQkGgMWEQlJhgJYRCQs0UlgBbCIxIqGIEREQhKh/FUAi0i8qAIWEQmJbkUWEQlJdOJXASwiMROhAlgBLCLxojvhRETCEp38VQCLSLxEKH8VwCISL43xWPrGogAWkViJUP6SEXYHRET2VqqARSRWolQBK4BFJFaidBmahiBEJFbMEl/qbscOMLNXzGyhmX1gZlcE29uZ2ctm9nHwtW2yfVUAi0ispCqAgXLg1+7eHegHjDaz7sBYYIa7dwNmBOtJUQCLSKxYA/6pi7uvdPd3gu/LgEVAZ2AwMCk4bBIwJNm+KoBFJFYaUgGbWaGZza22FNbeph0E9ARmA/nuvjLYtQrIT7av+hBORGKlIR/BuXsRUFRne2atgb8Dv3D3jdWnu3R3NzNPqqOoAhaRuLEGLPU1ZbYPleE72d2nBZtXm1mnYH8noDjprronHd6JavQTiEhs7PE1ZFvKE8+cFlm7P59VlrqTgPXu/otq228B1rn7RDMbC7Rz9yuT6Ws6AlgCZlYY/MkjUkU/F02TmR0HvA68D1QEm6+ichx4KnAgsBQ4z93XJ3UOBXD6mNlcd+8ddj+kadHPxd5LY8AiIiFRAIuIhEQBnF4a55Pa6OdiL6UxYBGRkKgCFhEJiQJYRCQkCuA0MbNBZvaRmS0JLt6WvZyZ3W9mxWa2IOy+SDgUwGlgZpnAX4AzgO7ABcG0drJ3exAYFHYnJDwK4PToCyxx90/dfRswhcop7WQv5u6vAUndQSXxoABOj87Asmrry4NtIrIXUwCLiIREAZweK4ADqq0XBNtEZC+mAE6POUA3M+tqZs2AocAzIfdJREKmAE4Ddy8HLgP+QeVzpaa6+wfh9krCZmaPAW8Bh5rZcjMbEXafJL10K7KISEhUAYuIhEQBLCISEgWwiEhIFMAiIiFRAIuIhEQBLCISEgWwiEhI/h9aX05XSn1/XwAAAABJRU5ErkJggg==\n", 2169 | "text/plain": [ 2170 | "
" 2171 | ] 2172 | }, 2173 | "metadata": { 2174 | "needs_background": "light" 2175 | }, 2176 | "output_type": "display_data" 2177 | } 2178 | ], 2179 | "source": [ 2180 | "dt = DecisionTreeClassifier()\n", 2181 | "classifier_report(\"Decision Tree\", dt)" 2182 | ] 2183 | }, 2184 | { 2185 | "cell_type": "code", 2186 | "execution_count": 51, 2187 | "metadata": {}, 2188 | "outputs": [ 2189 | { 2190 | "name": "stdout", 2191 | "output_type": "stream", 2192 | "text": [ 2193 | "Random Forest Classifier Report:\n", 2194 | "\n", 2195 | " precision recall f1-score support\n", 2196 | "\n", 2197 | " 0 0.82 0.85 0.83 157\n", 2198 | " 1 0.78 0.73 0.75 111\n", 2199 | "\n", 2200 | " accuracy 0.80 268\n", 2201 | " macro avg 0.80 0.79 0.79 268\n", 2202 | "weighted avg 0.80 0.80 0.80 268\n", 2203 | "\n", 2204 | "\n", 2205 | "\n", 2206 | "Confusion_matrix: \n", 2207 | "\n" 2208 | ] 2209 | }, 2210 | { 2211 | "data": { 2212 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAU7UlEQVR4nO3de5yWdZ3/8ddnZkBAkDGFUcEDKJpiB1uXtbXShBQVRHf9ech+qbGybWqav0qxdtEtV3vsdtC1NMoDVoJoKmBaKmll5YHiVyBYIZ5AmcFFEATEge/+MXc4wsDcDPfc11wXr+fjcT2Y67qvw+dWePPhe50ipYQkqfpqsi5AknZUBrAkZcQAlqSMGMCSlBEDWJIyUleFY3iZhaRyxfbuoOdhF5SdOWtmX7/dx9se1Qhgeh52QTUOo5xYM/t6ANY2Z1yIupQeVUmjrmUH/MqSCi3yM7JqAEsqlprarCsomwEsqVgi02HdbWIASyoWhyAkKSN2wJKUETtgScqIHbAkZcSrICQpIw5BSFJGHIKQpIzYAUtSRgxgScpIrSfhJCkbjgFLUkYcgpCkjNgBS1JG7IAlKSN2wJKUEW9FlqSMOAQhSRlxCEKSMmIHLEkZyVEA56dSSSpHTW35Uzsi4uaIaIqIua2W/WdEPBMRf4yIeyKivtVn4yNiQUT8KSKOa7fUjn5HSeqSIsqf2ncrMHKTZQ8Bh6aU3gv8GRjfctg4BDgDGFra5jsRsdWUN4AlFUvUlD+1I6X0S2DZJsseTCk1l2YfBwaWfh4DTEkpvZlSeg5YAAzb2v4NYEnFsg0dcESMi4hZraZx23i0TwEPlH4eALzU6rNFpWVb5Ek4SYUS23AZWkppIjCxg8f5EtAM/Kgj24MBLKlgtiWAt+MY5wCjgOEppVRavBjYu9VqA0vLtsghCEmFEjVR9tSh/UeMBL4InJRSWt3qo+nAGRGxU0QMAoYAT25tX3bAkgqlkh1wREwGjgZ2j4hFwARarnrYCXiodKzHU0qfTik9HRFTgXm0DE2cn1Jav7X9G8CSCqWSAZxSOrONxTdtZf2rgKvK3b8BLKlQqjEGXCkGsKRiyU/+GsCSisUOWJIyUlOTn4u7DGBJhWIHLElZyU/+GsCSisUOWJIyYgBLUkY6eotxFgxgSYViByxJGTGAJSkjBrAkZcQAlqSs5Cd/DWBJxeKtyJKUEYcgJCkr+clf3wnXUTdOOIsXZl7NrDsv37js3z5zIk/eMZ7Hp1zGjO+cz579+r5jm785ZB9WPnUtp4x4f5v7POzgvXlq6uXMnTaBr3/x1M4sX1Ww5JVXGHvO/+WU0Sdwykkn8qMfTALg+uu+xamnjOa0fxjDP5/3KZqaGtvcfvq99zD6+GMZffyxTL/3nmqWnmvR8rr5sqasGcAd9IMZjzPm/G+/Y9k3J81k2OlXc8QZ1/DAr+YyftzxGz+rqQm+etEYHn78mS3u87rLT+f8r9zOoWOuZP99+nHskYd0Wv3qfLV1tXz+i5dxz4z7+eHkO5gy+XaeXbCAcz71T9x1zwym3j2Njxx1NN+94dubbbti+XJuvOF6fjh5Kj+acic33nA9r69YkcG3yJ9CBXBEvDsiLo2I60rTpRFxcDWK68p+/ftnWbZi9TuWrXxj7cafe/XcibffVg2fOeMo7p35B5YuW9nm/vbYfRf67NyDJ+c8D8Dt9z3J6KPfW/nCVTX9+vXn4EOGArDzzr0ZPHgwTU2N9O7de+M6a9esaTMIfvPrxzjig0fSt76eXfr25YgPHsmvH/tV1WrPszwF8FbHgCPiUuBMYApvv155IDA5IqaklK7p5Ppy54rzR3PWqGGsWLWGkeOuA2Cvfn056Zj3cdx51/HdoWe1ud1e/etZ3LR84/zixuXs1b++ChWrGhYvXsQz8+fznve+D4D/vvabzJh+L7179+H7t9y22fpNTY3sscceG+cbGhq2OFShd8rTsyDa64DHAn+bUrompfTD0nQNMKz0WZsiYlxEzIqIWRMnTqxkvV3eFd+ewZDj/5UpD8zi06d/BID//MI/8uVrp72jI9aOY/Ubb/D/Lv4sX7js8o3d74UXfY4HZ/6CE0eNZsrtP8y4wmLJUwfcXgBvAPZqY/mepc/alFKamFI6PKV0+Lhx47anvty64/6nOHn4+wH4wCH7cNs15/LMT67klBGH8a3xp282vPBy03IGtOp4BzTU83Krjlj59NZbb3HJxZ/lhBNHM+Jjx272+Qknjubhhx7cbHn//g0sWbJk43xjYyP9+zd0aq1FkacAbu8ytIuBmRHxF+Cl0rJ9gAOACzqxrlzaf59+PPviUgBGHf1e/vx8yz8ZDx51xcZ1Jl75CR741VxmPPrHd2y75NXXWfnGWoa9Zz+enPM8Hx81jBum/KJqtavyUkpc8W9fYvDgwXzynHM3Ln/hhefZd9/9AHjkkZkMGjR4s23//sgPcd2139h44u23v3mMiy6+pCp1510XyNWybTWAU0o/jYgDaRlyGFBavBh4KqW0vrOL68omXX0OH/6bIexe35sFP/0KX7nxfkZ+aChD9u3Phg2JF19ZxmevmtLufh6fchlHnNEylH7R1VOZeOUn6LlTNx789Tx+9ti8zv4a6kSzf/877ps+jSEHHshp/zAGgAsvvoR7fnwXzz//HDU1wZ57DuDLE64E4Om5c7hz6hSu+Per6Ftfz7hPf4aPn95yOeI//8v59K2vz+qr5EpX6GzLFVUYl0w9D7NZ1tvWzL4egLXNGReiLqVHSzu43el50KU/KzvU/vS14zJNa++Ek1QoOWqADWBJxVKTo8vQDGBJhWIHLEkZydNJOJ8FIalQIsqf2t9X3BwRTRExt9Wyd0XEQxHxl9Kvu5aWR+lxDQsi4o8R8YH29m8ASyqUmpqasqcy3AqM3GTZZcDMlNIQYGZpHuB4YEhpGgfc0G6tZX4nScqFSnbAKaVfAss2WTwGmFT6eRJwcqvlt6UWjwP1EbHn1vZvAEsqlG25Fbn1c2tKUznPTmhIKb1S+nkJ8Nd7xAfw9h3DAIt4+wa2NnkSTlKhbMs5uJTSRKDDTwxLKaWI6PDdbAawpEKpwlUQjRGxZ0rpldIQQ1Np+WJg71brDSwt2yKHICQVSiXHgLdgOnB26eezgWmtln+ydDXEEcCKVkMVbbIDllQolbwTLiImA0cDu0fEImACcA0wNSLGAi8Ap5VWvx84AVgArAbO3WyHmzCAJRVKJYcgUkpnbuGj4W2sm4Dzt2X/BrCkQsnRjXAGsKRiydOtyAawpELJUf4awJKKxcdRSlJGHIKQpIwYwJKUkRzlrwEsqVjsgCUpIznKXwNYUrF4FYQkZaQmRy2wASypUHKUvwawpGLxJJwkZSRHQ8AGsKRi8SScJGUkMIAlKRM5aoANYEnF4kk4ScpIjvLXAJZULN6IIUkZ8SoIScpIjhpgA1hSsTgEIUkZyU/8GsCSCsbL0CQpIzk6B2cASyoWr4KQpIw4BCFJGclRA2wASyqWPHXANVkXIEmVFNswtbuviM9FxNMRMTciJkdEj4gYFBFPRMSCiLgjIrp3tFYDWFKh1NZE2dPWRMQA4LPA4SmlQ4Fa4Azga8A3U0oHAK8BYztaqwEsqVAiouypDHVAz4ioA3oBrwDHAHeVPp8EnNzRWg1gSYUSsS1TjIuIWa2mcX/dT0ppMfBfwIu0BO8K4HfA8pRSc2m1RcCAjtbqSThJhbItz4JIKU0EJrb1WUTsCowBBgHLgTuBkdtf4duqEsBrZl9fjcMoZ3r41786QQUvghgBPJdSWtqy37gbOBKoj4i6Uhc8EFjc0QM4BCGpUCo4BvwicERE9IqWlYcD84BHgFNL65wNTOtorVXpQZauam5/Je0w+vVu+W133tS5GVeiruR7px1akf3UVqgFTik9ERF3Ab8HmoHZtAxX/ASYEhFfLS27qaPH8B+BkgqlknfCpZQmABM2WbwQGFaJ/RvAkgrFW5ElKSN5uhXZAJZUKHbAkpSRHDXABrCkYqnLUQIbwJIKJUf5awBLKhZfSy9JGclR/hrAkorFqyAkKSPtPWi9KzGAJRVKjvLXAJZULFHW2966BgNYUqHYAUtSRgxgScqID+ORpIzU5ug9PwawpELxTjhJyohjwJKUkRw1wAawpGKp8TpgScqGHbAkZaQuR4PABrCkQrEDlqSMeBmaJGUkR/lrAEsqlhzdCGcASyoWhyAkKSMGsCRlJD/xawBLKpgcNcC5Gq+WpHZFRNlTGfuqj4i7IuKZiJgfER+MiHdFxEMR8ZfSr7t2tFYDWFKh1GzDVIZrgZ+mlN4NvA+YD1wGzEwpDQFmluY7XKskFUZNRNnT1kREX+AjwE0AKaV1KaXlwBhgUmm1ScDJHa61oxtKUle0LUMQETEuIma1msa12tUgYClwS0TMjojvR8TOQENK6ZXSOkuAho7W6kk4SYWyLV1lSmkiMHELH9cBHwAuTCk9ERHXsslwQ0opRUTqWKV2wJIKpoIn4RYBi1JKT5Tm76IlkBsjYs/SsfYEmjpaqwEsqVBiG6atSSktAV6KiINKi4YD84DpwNmlZWcD0zpaq0MQkgqltrIXAl8I/CgiugMLgXNpaVynRsRY4AXgtI7u3ACWVCiVzN+U0v8HDm/jo+GV2L8BLKlQIkc3IxvAkgolT7ciG8CSCsW3IktSRuyAJSkjPg9YkjKSo7fSG8CSisWrICQpIzkagTCAK+HNN9/kgvM+ybp161i/fj0fHX4sYz99AS8vXsSE8Z/n9RXLOejgofzrV66mW7fum23/g5u/x33TfkxNbS0Xf348f/f3H8rgW6gzjDhwNz48aFcSsHjFWm55cjEfHrwrI4bsRv8+O/G5e+ezat36Nrf94L71nHhIPwB+Mm8pv31hefUKz7E8dcA+C6ICunfvzrU33sykKfdw6+0/5vHfPMbcOX/ghuu+welnfZI7pv2UPrvswn333r3Zts8tXMDDD97PD+6cztf/+7t8/Zqvsn59238glS/1PesYfsBufPXhZ7niZwuoiWDYPn1Z8OpqvvGL53n1jXVb3LZX91pGD+3Pf8xcyH88/Cyjh/anVzf/uJajJsqfsub/0QqICHr12hmA5uZm1jc3EwS/f+oJjh5+LADHjxrDrx6dudm2jz36CCOOPYHu3buz14CBDNx7b+Y/Paeq9avz1NQE3WprqAnoXhssX9PMS8vX8j+r39rqdoc29GZe4ypWr1vP6rc2MK9xFYfu0adKVedbpR7IXg0OQVTI+vXrGfuJ/8Pil17klNPOZMDAvendpw91dS3/ifv1b2Dp0s2fWrd0aSND3/O+jfP9GvZgaVNj1epW51m+ppkH//QqXzvxQN5an5jXuIp5javK2ra+Vx2vtQrp19a8RX0v/7iWI/tYLV+HO+CIOHcrn218yvzEiVt61nGx1NbWcuvku7n7gZ8zf+4cXnh+YdYlKWO9utXw/r36MP7+P/OFGc/Qva6Gv9unb9ZlFd6O0gFfCdzS1gebPGU+LV3VvB2HyZc+fXbhA4cP4+k5f2DVypU0NzdTV1fH0qZG+vXrv9n6/fo10LRkycb5pY1L6Ne/w284URdycENvXn3jLVa92TKmP3vR6+y/ey+eeHFFu9suX93Mgf133ji/a89u/LnpjU6rtUiyj9XybbUDjog/bmGaw3a8B6loXnttGStXvg7Am2vX8tQTv2Xf/QZz2OHDeHTmgwA8cN80PnTUMZtte+RRH+XhB+9n3bp1vLx4ES+99CIHD31PVetX51i2+i0G79aT7rUtkfDuhp1Z8vqbZW07t3EVQxt606tbDb261TC0oTdzyxy+2OFV6onsVdBeB9wAHAe8tsnyAH7TKRXl0P+8upSrJlzOhvUb2JA2cMyI4zjyI0ez3+D9ueLyz/O971zHkIMOZtTJ/wjAY7/4Oc/Me5p/+pcLGbz/ARzzsZF84tSTqK2r5ZJLv0xtbW3G30iV8NyyNfxu0et8+WMHsCElXnxtLb9c+BrHDHkXIw/qxy496phw3AHMeWUlt816mX137cFR+7+L22a9zOp167lvfhNfGrE/ADPmNbF6C5er6Z26wtBCuSKlLb9PLiJuAm5JKT3Wxme3p5Q+XsYxdqghCLWvX++Wv/fPmzo340rUlXzvtEOhAn3pUwtXlP2SzL8d3DfTtN5qB5xSGruVz8oJX0mqrvw0wF6GJqlY8nQnnAEsqVByNARsAEsqlhzlrwEsqVgiRy2wASypUHKUvwawpGLJUf4awJIKJkcJbABLKhQvQ5OkjDgGLEkZMYAlKSMOQUhSRvLUAftOOEmFUunHAUdEbUTMjoj7SvODIuKJiFgQEXdExOavOi+TASypWCr/QPaLgPmt5r8GfDOldAAtz0rf4lMj22MASyqUSr4TLiIGAicC3y/NB3AMcFdplUnAyR2utaMbSlJXtC0NcOsXCJemcZvs7lvAF4ENpfndgOUppb++ZWIRMKCjtXoSTlKxbMNJuE1eIPzO3USMAppSSr+LiKMrUdqmDGBJhVLBy9COBE6KiBOAHsAuwLVAfUTUlbrggcDijh7AIQhJhRJR/rQ1KaXxKaWBKaX9gDOAn6eUzgIeAU4trXY2MK2jtRrAkgqlCm+lvxS4JCIW0DImfFNHd+QQhKRC6YwHsqeUHgUeLf28EBhWif0awJIKJU93whnAkgolR/lrAEsqmBwlsAEsqVB8GpokZcQxYEnKSI0BLElZyU8CG8CSCsUhCEnKSI7y1wCWVCx2wJKUkc64FbmzGMCSCiU/8WsASyqYHDXABrCkYvFOOEnKSn7y1wCWVCw5yl8DWFKxlPO6+a7CAJZUKDnKX98JJ0lZsQOWVCh56oANYEmF4mVokpQRO2BJyogBLEkZcQhCkjJiByxJGclR/hrAkgomRwkcKaXOPkanH0BSYWx3fK5tLj9zetRlG9fVCGCVRMS4lNLErOtQ1+Lvix2XtyJX17isC1CX5O+LHZQBLEkZMYAlKSMGcHU5zqe2+PtiB+VJOEnKiB2wJGXEAJakjBjAVRIRIyPiTxGxICIuy7oeZS8ibo6IpoiYm3UtyoYBXAURUQt8GzgeOAQ4MyIOybYqdQG3AiOzLkLZMYCrYxiwIKW0MKW0DpgCjMm4JmUspfRLYFnWdSg7BnB1DABeajW/qLRM0g7MAJakjBjA1bEY2LvV/MDSMkk7MAO4Op4ChkTEoIjoDpwBTM+4JkkZM4CrIKXUDFwA/AyYD0xNKT2dbVXKWkRMBn4LHBQRiyJibNY1qbq8FVmSMmIHLEkZMYAlKSMGsCRlxACWpIwYwJKUEQNYkjJiAEtSRv4X8InjEaexLjsAAAAASUVORK5CYII=\n", 2213 | "text/plain": [ 2214 | "
" 2215 | ] 2216 | }, 2217 | "metadata": { 2218 | "needs_background": "light" 2219 | }, 2220 | "output_type": "display_data" 2221 | } 2222 | ], 2223 | "source": [ 2224 | "# Random Forest Classifier\n", 2225 | "rf = RandomForestClassifier(n_estimators = 51)\n", 2226 | "classifier_report(\"Random Forest\", rf)" 2227 | ] 2228 | }, 2229 | { 2230 | "cell_type": "code", 2231 | "execution_count": 52, 2232 | "metadata": {}, 2233 | "outputs": [ 2234 | { 2235 | "name": "stdout", 2236 | "output_type": "stream", 2237 | "text": [ 2238 | "K-Neighbors Classifier Report:\n", 2239 | "\n", 2240 | " precision recall f1-score support\n", 2241 | "\n", 2242 | " 0 0.77 0.85 0.81 157\n", 2243 | " 1 0.76 0.64 0.69 111\n", 2244 | "\n", 2245 | " accuracy 0.76 268\n", 2246 | " macro avg 0.76 0.75 0.75 268\n", 2247 | "weighted avg 0.76 0.76 0.76 268\n", 2248 | "\n", 2249 | "\n", 2250 | "\n", 2251 | "Confusion_matrix: \n", 2252 | "\n" 2253 | ] 2254 | }, 2255 | { 2256 | "data": { 2257 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAUiElEQVR4nO3de5jXdZ338ed7Bs0DxihnQQkUXa21dI21aI1bvcETobvdSdmumruzrud1dxWzTe1wS7f3fRuGaWyUtB4QudeAstQwNTQQijI8pHhCEBjRQBNdBT/3H/PbcYQBfjPM/D7z/fJ8XNf3Yr7n93hxvXzz+Z4ipYQkqfbqchcgSTsqA1iSMjGAJSkTA1iSMjGAJSmTHjU4h7dZSKpWbO8Bdj303Koz543Fk7f7fNujFgHMroeeW4vTqCDeWDwZgDc3ZC5E3couNUmj7mUH/JUllVoUZ2TVAJZULnX1uSuomgEsqVwi67BuuxjAksrFIQhJysQOWJIysQOWpEzsgCUpE++CkKRMHIKQpEwcgpCkTOyAJSkTA1iSMqn3Ipwk5eEYsCRl4hCEJGViByxJmdgBS1ImdsCSlImPIktSJg5BSFImDkFIUiZ2wJKUSYECuDiVSlI16uqrn7YhIr4XEU0RsaTVsqsj4omIeCQi7oiIhlbrLo2IpRHx+4gYs81SO/o7SlK3FFH9tG03Asdusuwe4EMppUOAJ4FLm08bBwPjgQ9W9vl2RGw15Q1gSeUSddVP25BSegB4ZZNld6eUNlRm5wODKz+PA6anlP4zpfQssBQYsbXjG8CSyqUdHXBENEbEolZTYzvP9gXgJ5WfBwEvtFq3vLJsi7wIJ6lUoh23oaWUpgBTOniey4ANwM0d2R8MYEkl054A3o5znA6cCBydUkqVxSuAfVptNriybIscgpBUKlEXVU8dOn7EscDFwKdSSutbrZoNjI+I90XEUGA48PDWjmUHLKlUOrMDjohbgVFAn4hYDlxO810P7wPuqZxrfkrprJTSoxExA3iM5qGJc1JKG7d2fANYUql0ZgCnlD7bxuKpW9n+68DXqz2+ASypVGoxBtxZDGBJ5VKc/DWAJZWLHbAkZVJXV5ybuwxgSaViByxJuRQnfw1gSeViByxJmRjAkpRJRx8xzsEAllQqdsCSlIkBLEmZGMCSlIkBLEm5FCd/DWBJ5eKjyJKUiUMQkpRLcfLXb8J11A2Xn8rzc69i0e1fbFn25bNP4OHbLmX+9AnM+fY5DOzb6z37/NnB+/LawkmcfMxH2jzmoQftw8IZX2TJrMv5Pxd/uivLVw2sWrmSM0//a04eezwnf+oEbv73aQBMvvabfPrksXzmL8fx93/3BZqaVre5/+wf3sHY40Yz9rjRzP7hHbUsvdCi+XPzVU25GcAd9O9z5jPunOves+yaaXMZccpVHDF+Ij/5xRIubTyuZV1dXfC1C8bxs/lPbPGY137xFM756i18aNyV7LdvX0aPPLjL6lfXq+9Rzz9fPIE75tzJTbfexvRbb+HppUs5/Qt/y8w75jDjP2Zx5CdH8Z3rr9ts33Vr13LD9ZO56dYZ3Dz9dm64fjKvrluX4bconlIFcET8SURcEhHXVqZLIuKgWhTXnT3466d5Zd369yx77fU3W37ebdf38e7XquHs8Z/kh3N/y0uvvNbm8Qb0eT977L4LD//uOQBu+dHDjB11SOcXrprp27cfBx38QQB2370nw4YNo6lpNT179mzZ5s033mgzCB56cB5HfGwkvRoaeH+vXhzxsZE8OO8XNau9yIoUwFsdA46IS4DPAtN59/PKg4FbI2J6SmliF9dXOFecM5ZTTxzBuj++wbGN1wKwd99efOqoDzPm767lOx88tc399u7XwIqmtS3zK1avZe9+DTWoWLWwYsVynnj8cf70kA8D8K1J1zBn9g/p2XMPvvv9H2y2fVPTagYMGNAy379//y0OVei9ivQuiG11wGcCH00pTUwp3VSZJgIjKuvaFBGNEbEoIhZNmTKlM+vt9q64bg7Dj/tXpv9kEWedciQAV//LX/GlSbPe0xFrx7H+9df5pwvP518mfLGl+z3vgn/k7rn3c8KJY5l+y02ZKyyXInXA2wrgd4C921g+sLKuTSmlKSmlw1NKhzc2Nm5PfYV1250LOenojwBw2MH78oOJZ/DEj6/k5GMO5ZuXnrLZ8MKLTWsZ1KrjHdS/gRdbdcQqprfffpuLLjyf408YyzH/ffRm648/YSw/u+fuzZb369efVatWtcyvXr2afv36d2mtZVGkAN7WbWgXAnMj4inghcqyfYH9gXO7sK5C2m/fvjy97CUAThx1CE8+1/xPxoNOvKJlmylXfp6f/GIJc+575D37rlrzKq+9/iYj/vQDPPy75/jciSO4fvr9NatdnS+lxBVfvoxhw4bxN6ef0bL8+eefY8iQDwDw85/PZejQYZvt+/GRn+DaSf+35cLbLx+axwUXXlSTuouuG+Rq1bYawCmln0bEATQPOQyqLF4BLEwpbezq4rqzaVedzl/82XD6NPRk6U+/yldvuJNjP/FBhg/pxzvvJJatfIXzvz59m8eZP30CR4xvHkq/4KoZTLny8+z6vp24+8HHuGveY139a6gLLf71r/jR7FkMP+AAPvOX4wA478KLuOP/zeS5556lri4YOHAQX7r8SgAeXfI7bp8xnSu+8nV6NTTQeNbZfO6U5tsR//4fzqFXQ0OuX6VQukNnW62owbhk2vVQm2W9643FkwF4c0PmQtSt7NLcDm53eh54yV1Vh9rvvzEma1r7JJykUilQA2wASyqXugLdhmYASyoVO2BJyqRIF+F8F4SkUomoftr2seJ7EdEUEUtaLdsrIu6JiKcqf+5ZWR6V1zUsjYhHIuKwbR3fAJZUKnV1dVVPVbgROHaTZROAuSml4cDcyjzAccDwytQIXL/NWqv8nSSpEDqzA04pPQC8ssniccC0ys/TgJNaLf9BajYfaIiIgVs7vgEsqVTa8yhy6/fWVKZq3p3QP6W0svLzKuC/nhEfxLtPDAMs590H2NrkRThJpdKea3AppSlAh98YllJKEdHhp9kMYEmlUoO7IFZHxMCU0srKEENTZfkKYJ9W2w2uLNsihyAklUpnjgFvwWzgtMrPpwGzWi3/m8rdEEcA61oNVbTJDlhSqXTmk3ARcSswCugTEcuBy4GJwIyIOBN4HvhMZfM7geOBpcB64IzNDrgJA1hSqXTmEERK6bNbWHV0G9sm4Jz2HN8AllQqBXoQzgCWVC5FehTZAJZUKgXKXwNYUrn4OkpJysQhCEnKxACWpEwKlL8GsKRysQOWpEwKlL8GsKRy8S4IScqkrkAtsAEsqVQKlL8GsKRy8SKcJGVSoCFgA1hSuXgRTpIyCQxgScqiQA2wASypXLwIJ0mZFCh/DWBJ5eKDGJKUiXdBSFImBWqADWBJ5eIQhCRlUpz4NYAllYy3oUlSJgW6BmcASyoX74KQpEwcgpCkTArUABvAksqlSB1wXe4CJKkzRTumbR4r4h8j4tGIWBIRt0bELhExNCIWRMTSiLgtInbuaK0GsKRSqa+LqqetiYhBwPnA4SmlDwH1wHjgG8A1KaX9gT8AZ3a0VgNYUqlERNVTFXoAu0ZED2A3YCVwFDCzsn4acFJHazWAJZVKRHumaIyIRa2mxv86TkppBfC/gWU0B+864FfA2pTShspmy4FBHa3Vi3CSSqU974JIKU0BprS1LiL2BMYBQ4G1wO3Asdtf4btqEsBvLJ5ci9OoYHbxf//qAp14E8QxwLMppZeajxv/AYwEGiKiR6ULHgys6OgJHIKQVCqdOAa8DDgiInaL5o2PBh4Dfg58urLNacCsjtZakx7kyVXra3EaFcQBA3YDYOK9T2euRN3JhKP265Tj1HdSC5xSWhARM4FfAxuAxTQPV/wYmB4RX6ssm9rRc/iPQEml0plPwqWULgcu32TxM8CIzji+ASypVHwUWZIyKdKjyAawpFKxA5akTArUABvAksqlR4ES2ACWVCoFyl8DWFK5+Fl6ScqkQPlrAEsqF++CkKRMtvWi9e7EAJZUKgXKXwNYUrlEVV976x4MYEmlYgcsSZkYwJKUiS/jkaRM6gv0nR8DWFKp+CScJGXiGLAkZVKgBtgAllQudd4HLEl52AFLUiY9CjQIbABLKhU7YEnKxNvQJCmTAuWvASypXAr0IJwBLKlcHIKQpEwMYEnKpDjxawBLKpkCNcCFGq+WpG2KiKqnKo7VEBEzI+KJiHg8Ij4WEXtFxD0R8VTlzz07WqsBLKlU6toxVWES8NOU0p8AHwYeByYAc1NKw4G5lfkO1ypJpVEXUfW0NRHRCzgSmAqQUnorpbQWGAdMq2w2DTipw7V2dEdJ6o7aMwQREY0RsajV1NjqUEOBl4DvR8TiiPhuROwO9E8praxsswro39FavQgnqVTa01WmlKYAU7awugdwGHBeSmlBRExik+GGlFKKiNSxSu2AJZVMJ16EWw4sTyktqMzPpDmQV0fEwMq5BgJNHa3VAJZUKtGOaWtSSquAFyLiwMqio4HHgNnAaZVlpwGzOlqrQxCSSqW+c28EPg+4OSJ2Bp4BzqC5cZ0REWcCzwOf6ejBDWBJpdKZ+ZtS+g1weBurju6M4xvAkkolCvQwsgEsqVSK9CiyASypVPwqsiRlYgcsSZn4PmBJyqRAX6U3gCWVi3dBSFImBRqBMIA708aNG7mo8VT26tuPyydey6qVK7j6ygm89uo69jvgIC667GvstNNOm+13+01TuefOWdTV1dF4/sUcNuLjGapXZ1u3ajn3TZ3YMv/ampUceuJfs1tDb37z45tZu+oFxl5yDX2GHNDm/ssfXcSCGd8hpXc4YOQYDhnT4QeudihF6oB9F0QnmjPzFgYPGdoyf+MNkxj3P05lyi2z6bnHHtzz4zs222fZc0/zwL13cd2NM7ni6uu4/pqr2LhxYy3LVhfpNWAw4y6bzLjLJjP20kn02HkXhnzkY+y59xCOavwSA/b/0Bb3feedjcyf/m1Gn/sVTv7yDTyz8H7WrlxWw+qLqy6qn3IzgDvJmqbVLJw/j9EnngxASolHFi9k5CePAeDoMWOZP+++zfZbMO8+jjxqDDvtvDMDBg5i4KB9eOrxJbUsXTWw8onfskefAfTs3Z+GgfvSa8DgrW6/5rkn2aPv3uzRdyD1PXZi2OFHsuy3v6xRtcXWWS9kr0mtuQsoi3+bfDVnnHUBddH8n/TVdWvp2XMP6ns0j/L07tefl9ds/ta6l9e8RJ9+A1rm+/Tt1+Z2KrZnF93P0I+Oqnr79WtfZvc9+7TM77ZnH15f+3IXVFY+nfU2tFrocABHxBlbWdfylvkpU7b0ruPyePihB+jVsBf7H3hw7lLUDW3c8DbLHlnA0MM+kbuUHUKROuDtuQh3JfD9tlZs8pb59OSq9dtxmu7v8SW/4eGH7udXC+bx1ltvsf711/m3b13NH//4Ghs3bKC+Rw9eblpN7z79Ntu3d5++rGla1TK/5qWmNrdTcS1/dBG9992PXd9f/cdzd2vozet/WNMyv/4Pa9i9oXdXlFc6+WO1elvtgCPikS1Mv2M7voNUNqc1ns+NM+9i6m13cvGXJ3LIYR/ln//1f3LIRw7nwft/BsDcu+bw5yNHbbbviJGjeODeu3j7rbdYtXIFLy5fxvCDtnxxRsXz7ML7GXb4J9u1T58hB/Bq04u8tmYVGze8zTOLHmCfQ47oogpLpkBjENvqgPsDY4A/bLI8gIe6pKISOf2sC/hfV07gpqnfZtj+BzL6hJMAWPDgfTz1xGN8/syzGTJ0Pz7x30Zz9ml/RX19PWddOIH6+vq8havTvP2fb/LiE4v5+KnntSx7/jcPMf+263nzj+u457or2GvwMMac/zXWr32ZeTdNYvS5X6Guvp4jxv8Dd3/rS6R33mH4x0ez595DMv4mxdEdhhaqFSlt+XtyETEV+H5KaV4b625JKX2uinOUfghC7XPAgN0AmHjv05krUXcy4aj9oBP60oXPrKv6I5kfHdYra1pvtQNOKZ25lXXVhK8k1VZxGmCfhJNULkV6Es4AllQqBRoCNoAllUuB8tcAllQuUaAW2ACWVCoFyl8DWFK5FCh/DWBJJVOgBDaAJZWKt6FJUiaOAUtSJgawJGXiEIQkZVKkDthPEkkqlc5+HXBE1EfE4oj4UWV+aEQsiIilEXFbROzc0VoNYEnl0vkvZL8AeLzV/DeAa1JK+9P8rvQtvjVyWwxgSaXSmd+Ei4jBwAnAdyvzARwFzKxsMg04qcO1dnRHSeqO2tMAt/6AcGVq3ORw3wQuBt6pzPcG1qaUNlTmlwODOlqrF+EklUs7LsJt8gHh9x4m4kSgKaX0q4gY1RmlbcoAllQqnXgb2kjgUxFxPLAL8H5gEtAQET0qXfBgYEVHT+AQhKRSiah+2pqU0qUppcEppQ8A44F7U0qnAj8HPl3Z7DRgVkdrNYAllUoNvkp/CXBRRCyleUx4akcP5BCEpFLpiheyp5TuA+6r/PwMMKIzjmsASyqVIj0JZwBLKpUC5a8BLKlkCpTABrCkUvFtaJKUiWPAkpRJnQEsSbkUJ4ENYEml4hCEJGVSoPw1gCWVix2wJGXSFY8idxUDWFKpFCd+DWBJJVOgBtgAllQuPgknSbkUJ38NYEnlUqD8NYAllUs1n5vvLgxgSaVSoPz1m3CSlIsdsKRSKVIHbABLKhVvQ5OkTOyAJSkTA1iSMnEIQpIysQOWpEwKlL8GsKSSKVACR0qpq8/R5SeQVBrbHZ9vbqg+c3bpkTeuaxHAqoiIxpTSlNx1qHvx78WOy0eRa6sxdwHqlvx7sYMygCUpEwNYkjIxgGvLcT61xb8XOygvwklSJnbAkpSJASxJmRjANRIRx0bE7yNiaURMyF2P8ouI70VEU0QsyV2L8jCAayAi6oHrgOOAg4HPRsTBeatSN3AjcGzuIpSPAVwbI4ClKaVnUkpvAdOBcZlrUmYppQeAV3LXoXwM4NoYBLzQan55ZZmkHZgBLEmZGMC1sQLYp9X84MoySTswA7g2FgLDI2JoROwMjAdmZ65JUmYGcA2klDYA5wJ3AY8DM1JKj+atSrlFxK3AL4EDI2J5RJyZuybVlo8iS1ImdsCSlIkBLEmZGMCSlIkBLEmZGMCSlIkBLEmZGMCSlMn/B06XsN/LOI8cAAAAAElFTkSuQmCC\n", 2258 | "text/plain": [ 2259 | "
" 2260 | ] 2261 | }, 2262 | "metadata": { 2263 | "needs_background": "light" 2264 | }, 2265 | "output_type": "display_data" 2266 | } 2267 | ], 2268 | "source": [ 2269 | "# K-Neighbors Classifier\n", 2270 | "knn = KNeighborsClassifier(n_neighbors=3)\n", 2271 | "classifier_report(\"K-Neighbors\", knn)" 2272 | ] 2273 | }, 2274 | { 2275 | "cell_type": "markdown", 2276 | "metadata": {}, 2277 | "source": [ 2278 | "# Model selection\n", 2279 | "\n", 2280 | "Decision tree model performs well compared to other models such as Logistic regression, Random forest and K-Neighbors Classifier. Random forest model has selected for further prediction and analytics.\n" 2281 | ] 2282 | }, 2283 | { 2284 | "cell_type": "code", 2285 | "execution_count": 90, 2286 | "metadata": {}, 2287 | "outputs": [ 2288 | { 2289 | "data": { 2290 | "text/html": [ 2291 | "
\n", 2292 | "\n", 2305 | "\n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | "
Survived
PassengerId
8920
8931
8940
8950
8961
......
13050
13061
13070
13080
13090
\n", 2363 | "

418 rows × 1 columns

\n", 2364 | "
" 2365 | ], 2366 | "text/plain": [ 2367 | " Survived\n", 2368 | "PassengerId \n", 2369 | "892 0\n", 2370 | "893 1\n", 2371 | "894 0\n", 2372 | "895 0\n", 2373 | "896 1\n", 2374 | "... ...\n", 2375 | "1305 0\n", 2376 | "1306 1\n", 2377 | "1307 0\n", 2378 | "1308 0\n", 2379 | "1309 0\n", 2380 | "\n", 2381 | "[418 rows x 1 columns]" 2382 | ] 2383 | }, 2384 | "execution_count": 90, 2385 | "metadata": {}, 2386 | "output_type": "execute_result" 2387 | } 2388 | ], 2389 | "source": [ 2390 | "submission = pd.read_csv(\"gender_submission.csv\", index_col='PassengerId')\n", 2391 | "submission" 2392 | ] 2393 | }, 2394 | { 2395 | "cell_type": "code", 2396 | "execution_count": 91, 2397 | "metadata": {}, 2398 | "outputs": [], 2399 | "source": [ 2400 | "rf = DecisionTreeClassifier(random_state=41)\n", 2401 | "rf.fit(X_train, y_train)\n", 2402 | "y_test_pred = rf.predict(X_test)" 2403 | ] 2404 | }, 2405 | { 2406 | "cell_type": "code", 2407 | "execution_count": 92, 2408 | "metadata": {}, 2409 | "outputs": [ 2410 | { 2411 | "data": { 2412 | "text/plain": [ 2413 | "0 0\n", 2414 | "1 1\n", 2415 | "2 0\n", 2416 | "3 0\n", 2417 | "4 1\n", 2418 | " ..\n", 2419 | "413 0\n", 2420 | "414 1\n", 2421 | "415 0\n", 2422 | "416 0\n", 2423 | "417 0\n", 2424 | "Name: Survived, Length: 418, dtype: int32" 2425 | ] 2426 | }, 2427 | "execution_count": 92, 2428 | "metadata": {}, 2429 | "output_type": "execute_result" 2430 | } 2431 | ], 2432 | "source": [ 2433 | "test_survived = pd.Series(rf.predict(testDf), name = \"Survived\").astype(int)\n", 2434 | "test_survived" 2435 | ] 2436 | }, 2437 | { 2438 | "cell_type": "code", 2439 | "execution_count": 93, 2440 | "metadata": {}, 2441 | "outputs": [ 2442 | { 2443 | "name": "stdout", 2444 | "output_type": "stream", 2445 | "text": [ 2446 | "\n", 2447 | " precision recall f1-score support\n", 2448 | "\n", 2449 | " 0 0.87 0.92 0.90 266\n", 2450 | " 1 0.85 0.76 0.80 152\n", 2451 | "\n", 2452 | " accuracy 0.86 418\n", 2453 | " macro avg 0.86 0.84 0.85 418\n", 2454 | "weighted avg 0.86 0.86 0.86 418\n", 2455 | "\n", 2456 | "\n", 2457 | "\n", 2458 | "Confusion_matrix: \n", 2459 | "\n" 2460 | ] 2461 | }, 2462 | { 2463 | "data": { 2464 | "image/png": "\n", 2465 | "text/plain": [ 2466 | "
" 2467 | ] 2468 | }, 2469 | "metadata": { 2470 | "needs_background": "light" 2471 | }, 2472 | "output_type": "display_data" 2473 | } 2474 | ], 2475 | "source": [ 2476 | "print(\"\\n\", metrics.classification_report(submission, test_survived))\n", 2477 | "# Compute confusion matrix\n", 2478 | "print(\"\\n\\nConfusion_matrix: \\n\")\n", 2479 | "cnf_matrix = metrics.confusion_matrix(submission, test_survived)\n", 2480 | "ax= plt.subplot()\n", 2481 | "sns.heatmap(cnf_matrix, annot=True, ax = None, fmt= '.1f' , cmap= 'Blues', linewidths=0.5); #annot=True to annotate cells " 2482 | ] 2483 | }, 2484 | { 2485 | "cell_type": "code", 2486 | "execution_count": 107, 2487 | "metadata": {}, 2488 | "outputs": [], 2489 | "source": [ 2490 | "results = pd.concat([submission, test_survived],axis = 0)\n", 2491 | "submission.to_csv(\"titanic.csv\", index = False)" 2492 | ] 2493 | }, 2494 | { 2495 | "cell_type": "code", 2496 | "execution_count": 104, 2497 | "metadata": {}, 2498 | "outputs": [], 2499 | "source": [ 2500 | "submission['test_survived'] = pd.Series(test_survived)" 2501 | ] 2502 | }, 2503 | { 2504 | "cell_type": "code", 2505 | "execution_count": 105, 2506 | "metadata": {}, 2507 | "outputs": [ 2508 | { 2509 | "data": { 2510 | "text/html": [ 2511 | "
\n", 2512 | "\n", 2525 | "\n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | " \n", 2534 | " \n", 2535 | " \n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | " \n", 2545 | " \n", 2546 | " \n", 2547 | " \n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | " \n", 2559 | " \n", 2560 | " \n", 2561 | " \n", 2562 | " \n", 2563 | " \n", 2564 | " \n", 2565 | " \n", 2566 | " \n", 2567 | " \n", 2568 | " \n", 2569 | " \n", 2570 | " \n", 2571 | " \n", 2572 | " \n", 2573 | " \n", 2574 | " \n", 2575 | " \n", 2576 | " \n", 2577 | " \n", 2578 | " \n", 2579 | " \n", 2580 | " \n", 2581 | " \n", 2582 | " \n", 2583 | " \n", 2584 | " \n", 2585 | " \n", 2586 | " \n", 2587 | " \n", 2588 | " \n", 2589 | " \n", 2590 | " \n", 2591 | " \n", 2592 | " \n", 2593 | " \n", 2594 | " \n", 2595 | "
Survivedtest_survived
PassengerId
8920NaN
8931NaN
8940NaN
8950NaN
8961NaN
.........
13050NaN
13061NaN
13070NaN
13080NaN
13090NaN
\n", 2596 | "

418 rows × 2 columns

\n", 2597 | "
" 2598 | ], 2599 | "text/plain": [ 2600 | " Survived test_survived\n", 2601 | "PassengerId \n", 2602 | "892 0 NaN\n", 2603 | "893 1 NaN\n", 2604 | "894 0 NaN\n", 2605 | "895 0 NaN\n", 2606 | "896 1 NaN\n", 2607 | "... ... ...\n", 2608 | "1305 0 NaN\n", 2609 | "1306 1 NaN\n", 2610 | "1307 0 NaN\n", 2611 | "1308 0 NaN\n", 2612 | "1309 0 NaN\n", 2613 | "\n", 2614 | "[418 rows x 2 columns]" 2615 | ] 2616 | }, 2617 | "execution_count": 105, 2618 | "metadata": {}, 2619 | "output_type": "execute_result" 2620 | } 2621 | ], 2622 | "source": [ 2623 | "submission" 2624 | ] 2625 | }, 2626 | { 2627 | "cell_type": "code", 2628 | "execution_count": 106, 2629 | "metadata": {}, 2630 | "outputs": [], 2631 | "source": [ 2632 | "test_survived.to_csv(\"test_survived.csv\", index = False)" 2633 | ] 2634 | }, 2635 | { 2636 | "cell_type": "code", 2637 | "execution_count": 98, 2638 | "metadata": {}, 2639 | "outputs": [ 2640 | { 2641 | "data": { 2642 | "text/plain": [ 2643 | "pandas.core.series.Series" 2644 | ] 2645 | }, 2646 | "execution_count": 98, 2647 | "metadata": {}, 2648 | "output_type": "execute_result" 2649 | } 2650 | ], 2651 | "source": [ 2652 | "type(test_survived)" 2653 | ] 2654 | }, 2655 | { 2656 | "cell_type": "code", 2657 | "execution_count": 99, 2658 | "metadata": {}, 2659 | "outputs": [ 2660 | { 2661 | "data": { 2662 | "text/plain": [ 2663 | "pandas.core.frame.DataFrame" 2664 | ] 2665 | }, 2666 | "execution_count": 99, 2667 | "metadata": {}, 2668 | "output_type": "execute_result" 2669 | } 2670 | ], 2671 | "source": [ 2672 | "type(submission)" 2673 | ] 2674 | }, 2675 | { 2676 | "cell_type": "code", 2677 | "execution_count": 88, 2678 | "metadata": {}, 2679 | "outputs": [ 2680 | { 2681 | "data": { 2682 | "text/plain": [ 2683 | "836" 2684 | ] 2685 | }, 2686 | "execution_count": 88, 2687 | "metadata": {}, 2688 | "output_type": "execute_result" 2689 | } 2690 | ], 2691 | "source": [ 2692 | "len(results)" 2693 | ] 2694 | }, 2695 | { 2696 | "cell_type": "code", 2697 | "execution_count": 67, 2698 | "metadata": {}, 2699 | "outputs": [ 2700 | { 2701 | "data": { 2702 | "text/plain": [ 2703 | "418" 2704 | ] 2705 | }, 2706 | "execution_count": 67, 2707 | "metadata": {}, 2708 | "output_type": "execute_result" 2709 | } 2710 | ], 2711 | "source": [ 2712 | "len(submission)" 2713 | ] 2714 | }, 2715 | { 2716 | "cell_type": "code", 2717 | "execution_count": 64, 2718 | "metadata": {}, 2719 | "outputs": [ 2720 | { 2721 | "data": { 2722 | "text/plain": [ 2723 | "418" 2724 | ] 2725 | }, 2726 | "execution_count": 64, 2727 | "metadata": {}, 2728 | "output_type": "execute_result" 2729 | } 2730 | ], 2731 | "source": [ 2732 | "len(test_survived)" 2733 | ] 2734 | }, 2735 | { 2736 | "cell_type": "code", 2737 | "execution_count": 66, 2738 | "metadata": {}, 2739 | "outputs": [ 2740 | { 2741 | "data": { 2742 | "text/plain": [ 2743 | "418" 2744 | ] 2745 | }, 2746 | "execution_count": 66, 2747 | "metadata": {}, 2748 | "output_type": "execute_result" 2749 | } 2750 | ], 2751 | "source": [ 2752 | "len(testDf)" 2753 | ] 2754 | }, 2755 | { 2756 | "cell_type": "code", 2757 | "execution_count": 48, 2758 | "metadata": {}, 2759 | "outputs": [], 2760 | "source": [ 2761 | "rf = DecisionTreeClassifier(random_state=41)\n", 2762 | "rf.fit(X_train, y_train)\n", 2763 | "y_test_pred = rf.predict(testDf)" 2764 | ] 2765 | }, 2766 | { 2767 | "cell_type": "code", 2768 | "execution_count": 49, 2769 | "metadata": {}, 2770 | "outputs": [ 2771 | { 2772 | "data": { 2773 | "text/plain": [ 2774 | "array([0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,\n", 2775 | " 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,\n", 2776 | " 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", 2777 | " 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,\n", 2778 | " 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n", 2779 | " 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", 2780 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,\n", 2781 | " 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,\n", 2782 | " 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 2783 | " 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,\n", 2784 | " 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,\n", 2785 | " 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,\n", 2786 | " 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,\n", 2787 | " 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,\n", 2788 | " 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,\n", 2789 | " 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,\n", 2790 | " 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,\n", 2791 | " 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,\n", 2792 | " 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0],\n", 2793 | " dtype=int64)" 2794 | ] 2795 | }, 2796 | "execution_count": 49, 2797 | "metadata": {}, 2798 | "output_type": "execute_result" 2799 | } 2800 | ], 2801 | "source": [ 2802 | "y_test_pred" 2803 | ] 2804 | }, 2805 | { 2806 | "cell_type": "code", 2807 | "execution_count": 50, 2808 | "metadata": {}, 2809 | "outputs": [], 2810 | "source": [ 2811 | "submission = pd.read_csv(\"gender_submission.csv\", index_col='PassengerId')\n", 2812 | "submission" 2813 | ] 2814 | }, 2815 | { 2816 | "cell_type": "code", 2817 | "execution_count": 52, 2818 | "metadata": {}, 2819 | "outputs": [ 2820 | { 2821 | "name": "stdout", 2822 | "output_type": "stream", 2823 | "text": [ 2824 | "\n", 2825 | " precision recall f1-score support\n", 2826 | "\n", 2827 | " 0 0.87 0.92 0.90 266\n", 2828 | " 1 0.85 0.76 0.80 152\n", 2829 | "\n", 2830 | " accuracy 0.86 418\n", 2831 | " macro avg 0.86 0.84 0.85 418\n", 2832 | "weighted avg 0.86 0.86 0.86 418\n", 2833 | "\n", 2834 | "\n", 2835 | "\n", 2836 | "Confusion_matrix: \n", 2837 | "\n" 2838 | ] 2839 | }, 2840 | { 2841 | "data": { 2842 | "image/png": "\n", 2843 | "text/plain": [ 2844 | "
" 2845 | ] 2846 | }, 2847 | "metadata": { 2848 | "needs_background": "light" 2849 | }, 2850 | "output_type": "display_data" 2851 | } 2852 | ], 2853 | "source": [ 2854 | "print(\"\\n\", metrics.classification_report(submission, y_test_pred))\n", 2855 | "# Compute confusion matrix\n", 2856 | "print(\"\\n\\nConfusion_matrix: \\n\")\n", 2857 | "cnf_matrix = metrics.confusion_matrix(submission, y_test_pred)\n", 2858 | "ax= plt.subplot()\n", 2859 | "sns.heatmap(cnf_matrix, annot=True, ax = None, fmt= '.1f' , cmap= 'Blues', linewidths=0.5); #annot=True to annotate cells " 2860 | ] 2861 | }, 2862 | { 2863 | "cell_type": "markdown", 2864 | "metadata": {}, 2865 | "source": [ 2866 | "# Conclution: \n", 2867 | "\n", 2868 | "The results as per the Accuracy for Decision Tree is 86%.\n", 2869 | "\n", 2870 | "The training set should be used to build machine learning models. The test set should be used to see how well the model performs on unseen data.\n", 2871 | "\n", 2872 | "For the test set, they do not provide the ground truth for each passenger. It is the challenge to predict these outcomes. For each passenger in the test set, use the trained model to predict whether or not they survived the sinking of the Titanic.\n" 2873 | ] 2874 | } 2875 | ], 2876 | "metadata": { 2877 | "kernelspec": { 2878 | "display_name": "Python 3 (ipykernel)", 2879 | "language": "python", 2880 | "name": "python3" 2881 | }, 2882 | "language_info": { 2883 | "codemirror_mode": { 2884 | "name": "ipython", 2885 | "version": 3 2886 | }, 2887 | "file_extension": ".py", 2888 | "mimetype": "text/x-python", 2889 | "name": "python", 2890 | "nbconvert_exporter": "python", 2891 | "pygments_lexer": "ipython3", 2892 | "version": "3.11.5" 2893 | } 2894 | }, 2895 | "nbformat": 4, 2896 | "nbformat_minor": 4 2897 | } 2898 | --------------------------------------------------------------------------------