├── README.md
└── Predict_survival.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # predict_survival
2 |
3 | The data has been split into two groups:
4 |
5 | training set (train.csv)
6 | test set (test.csv)
7 | The training set should be used to build your machine learning models. For the training set, we provide the outcome (also known as the “ground truth”) for each passenger. Your model will be based on “features” like passengers’ gender and class. You can also use feature engineering to create new features.
8 |
9 | The test set should be used to see how well your model performs on unseen data. For the test set, we do not provide the ground truth for each passenger. It is your job to predict these outcomes. For each passenger in the test set, use the model you trained to predict whether or not they survived the sinking of the Titanic.
10 |
11 | We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.
12 |
--------------------------------------------------------------------------------
/Predict_survival.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Import data wrangling and analytics library"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 7,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import pandas as pd\n",
18 | "import matplotlib.pyplot as plt\n",
19 | "import seaborn as sns\n",
20 | "import os\n",
21 | "\n",
22 | "\n",
23 | "from sklearn.model_selection import train_test_split\n",
24 | "from sklearn.ensemble import RandomForestClassifier\n",
25 | "from sklearn.linear_model import LogisticRegression\n",
26 | "from sklearn.naive_bayes import GaussianNB\n",
27 | "from sklearn.tree import DecisionTreeClassifier\n",
28 | "from sklearn.neighbors import KNeighborsClassifier\n",
29 | "from sklearn.preprocessing import MinMaxScaler\n",
30 | "from sklearn import metrics\n",
31 | "from sklearn.preprocessing import LabelEncoder\n",
32 | "from sklearn.feature_selection import RFE\n",
33 | "from sklearn import metrics\n",
34 | "\n",
35 | "\n",
36 | "import warnings\n",
37 | "from sklearn.exceptions import DataConversionWarning\n",
38 | "warnings.filterwarnings(action='ignore', category=UserWarning)\n",
39 | "\n",
40 | "\n",
41 | "import warnings\n",
42 | "from sklearn.exceptions import DataConversionWarning\n",
43 | "warnings.filterwarnings(action='ignore', category=DataConversionWarning)\n",
44 | "\n",
45 | "import warnings\n",
46 | "from sklearn.exceptions import DataConversionWarning\n",
47 | "warnings.filterwarnings(action='ignore', category=DataConversionWarning)\n",
48 | "\n",
49 | "import warnings\n",
50 | "warnings.simplefilter(action='ignore', category=FutureWarning)\n"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "# Data read and Analysis"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 8,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "trainDf = pd.read_csv('train.csv')\n",
67 | "testDf = pd.read_csv('test.csv')"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 9,
73 | "metadata": {},
74 | "outputs": [
75 | {
76 | "data": {
77 | "text/html": [
78 | "
\n",
79 | "\n",
92 | "
\n",
93 | " \n",
94 | " \n",
95 | " \n",
96 | " PassengerId \n",
97 | " Survived \n",
98 | " Pclass \n",
99 | " Name \n",
100 | " Sex \n",
101 | " Age \n",
102 | " SibSp \n",
103 | " Parch \n",
104 | " Ticket \n",
105 | " Fare \n",
106 | " Cabin \n",
107 | " Embarked \n",
108 | " \n",
109 | " \n",
110 | " \n",
111 | " \n",
112 | " 0 \n",
113 | " 1 \n",
114 | " 0 \n",
115 | " 3 \n",
116 | " Braund, Mr. Owen Harris \n",
117 | " male \n",
118 | " 22.0 \n",
119 | " 1 \n",
120 | " 0 \n",
121 | " A/5 21171 \n",
122 | " 7.2500 \n",
123 | " NaN \n",
124 | " S \n",
125 | " \n",
126 | " \n",
127 | " 1 \n",
128 | " 2 \n",
129 | " 1 \n",
130 | " 1 \n",
131 | " Cumings, Mrs. John Bradley (Florence Briggs Th... \n",
132 | " female \n",
133 | " 38.0 \n",
134 | " 1 \n",
135 | " 0 \n",
136 | " PC 17599 \n",
137 | " 71.2833 \n",
138 | " C85 \n",
139 | " C \n",
140 | " \n",
141 | " \n",
142 | " 2 \n",
143 | " 3 \n",
144 | " 1 \n",
145 | " 3 \n",
146 | " Heikkinen, Miss. Laina \n",
147 | " female \n",
148 | " 26.0 \n",
149 | " 0 \n",
150 | " 0 \n",
151 | " STON/O2. 3101282 \n",
152 | " 7.9250 \n",
153 | " NaN \n",
154 | " S \n",
155 | " \n",
156 | " \n",
157 | " 3 \n",
158 | " 4 \n",
159 | " 1 \n",
160 | " 1 \n",
161 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) \n",
162 | " female \n",
163 | " 35.0 \n",
164 | " 1 \n",
165 | " 0 \n",
166 | " 113803 \n",
167 | " 53.1000 \n",
168 | " C123 \n",
169 | " S \n",
170 | " \n",
171 | " \n",
172 | " 4 \n",
173 | " 5 \n",
174 | " 0 \n",
175 | " 3 \n",
176 | " Allen, Mr. William Henry \n",
177 | " male \n",
178 | " 35.0 \n",
179 | " 0 \n",
180 | " 0 \n",
181 | " 373450 \n",
182 | " 8.0500 \n",
183 | " NaN \n",
184 | " S \n",
185 | " \n",
186 | " \n",
187 | "
\n",
188 | "
"
189 | ],
190 | "text/plain": [
191 | " PassengerId Survived Pclass \\\n",
192 | "0 1 0 3 \n",
193 | "1 2 1 1 \n",
194 | "2 3 1 3 \n",
195 | "3 4 1 1 \n",
196 | "4 5 0 3 \n",
197 | "\n",
198 | " Name Sex Age SibSp \\\n",
199 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
200 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
201 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
202 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
203 | "4 Allen, Mr. William Henry male 35.0 0 \n",
204 | "\n",
205 | " Parch Ticket Fare Cabin Embarked \n",
206 | "0 0 A/5 21171 7.2500 NaN S \n",
207 | "1 0 PC 17599 71.2833 C85 C \n",
208 | "2 0 STON/O2. 3101282 7.9250 NaN S \n",
209 | "3 0 113803 53.1000 C123 S \n",
210 | "4 0 373450 8.0500 NaN S "
211 | ]
212 | },
213 | "execution_count": 9,
214 | "metadata": {},
215 | "output_type": "execute_result"
216 | }
217 | ],
218 | "source": [
219 | "trainDf.head()"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 10,
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "name": "stdout",
229 | "output_type": "stream",
230 | "text": [
231 | "\n",
232 | "RangeIndex: 891 entries, 0 to 890\n",
233 | "Data columns (total 12 columns):\n",
234 | " # Column Non-Null Count Dtype \n",
235 | "--- ------ -------------- ----- \n",
236 | " 0 PassengerId 891 non-null int64 \n",
237 | " 1 Survived 891 non-null int64 \n",
238 | " 2 Pclass 891 non-null int64 \n",
239 | " 3 Name 891 non-null object \n",
240 | " 4 Sex 891 non-null object \n",
241 | " 5 Age 714 non-null float64\n",
242 | " 6 SibSp 891 non-null int64 \n",
243 | " 7 Parch 891 non-null int64 \n",
244 | " 8 Ticket 891 non-null object \n",
245 | " 9 Fare 891 non-null float64\n",
246 | " 10 Cabin 204 non-null object \n",
247 | " 11 Embarked 889 non-null object \n",
248 | "dtypes: float64(2), int64(5), object(5)\n",
249 | "memory usage: 83.7+ KB\n"
250 | ]
251 | }
252 | ],
253 | "source": [
254 | "trainDf.info()"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 11,
260 | "metadata": {},
261 | "outputs": [
262 | {
263 | "name": "stdout",
264 | "output_type": "stream",
265 | "text": [
266 | "\n",
267 | "RangeIndex: 418 entries, 0 to 417\n",
268 | "Data columns (total 11 columns):\n",
269 | " # Column Non-Null Count Dtype \n",
270 | "--- ------ -------------- ----- \n",
271 | " 0 PassengerId 418 non-null int64 \n",
272 | " 1 Pclass 418 non-null int64 \n",
273 | " 2 Name 418 non-null object \n",
274 | " 3 Sex 418 non-null object \n",
275 | " 4 Age 332 non-null float64\n",
276 | " 5 SibSp 418 non-null int64 \n",
277 | " 6 Parch 418 non-null int64 \n",
278 | " 7 Ticket 418 non-null object \n",
279 | " 8 Fare 417 non-null float64\n",
280 | " 9 Cabin 91 non-null object \n",
281 | " 10 Embarked 418 non-null object \n",
282 | "dtypes: float64(2), int64(4), object(5)\n",
283 | "memory usage: 36.0+ KB\n"
284 | ]
285 | }
286 | ],
287 | "source": [
288 | "testDf.info()"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 6,
294 | "metadata": {
295 | "scrolled": true
296 | },
297 | "outputs": [
298 | {
299 | "ename": "KeyboardInterrupt",
300 | "evalue": "",
301 | "output_type": "error",
302 | "traceback": [
303 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
304 | "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
305 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrainDf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
306 | "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas_profiling\\__init__.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfig\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mConfig\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcontroller\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas_decorator\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofile_report\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mversion\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0m__version__\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
307 | "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas_profiling\\controller\\pandas_decorator.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofile_report\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfileReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
308 | "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\pandas_profiling\\profile_report.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 18\u001b[0m )\n\u001b[0;32m 19\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mserialize_report\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mSerializeReport\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 20\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdataframe\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mhash_dataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrename_index\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 21\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas_profiling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpaths\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mget_config\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
309 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap.py\u001b[0m in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n",
310 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap.py\u001b[0m in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n",
311 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap.py\u001b[0m in \u001b[0;36m_load_unlocked\u001b[1;34m(spec)\u001b[0m\n",
312 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap_external.py\u001b[0m in \u001b[0;36mexec_module\u001b[1;34m(self, module)\u001b[0m\n",
313 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap_external.py\u001b[0m in \u001b[0;36mget_code\u001b[1;34m(self, fullname)\u001b[0m\n",
314 | "\u001b[1;32m~\\anaconda3\\lib\\importlib\\_bootstrap_external.py\u001b[0m in \u001b[0;36mget_data\u001b[1;34m(self, path)\u001b[0m\n",
315 | "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
316 | ]
317 | }
318 | ],
319 | "source": [
320 | "from pandas_profiling import ProfileReport\n",
321 | "ProfileReport(trainDf)"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | "# Data Cleaning"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 12,
334 | "metadata": {
335 | "scrolled": true
336 | },
337 | "outputs": [
338 | {
339 | "data": {
340 | "text/plain": [
341 | "PassengerId 0\n",
342 | "Survived 0\n",
343 | "Pclass 0\n",
344 | "Name 0\n",
345 | "Sex 0\n",
346 | "Age 177\n",
347 | "SibSp 0\n",
348 | "Parch 0\n",
349 | "Ticket 0\n",
350 | "Fare 0\n",
351 | "Cabin 687\n",
352 | "Embarked 2\n",
353 | "dtype: int64"
354 | ]
355 | },
356 | "execution_count": 12,
357 | "metadata": {},
358 | "output_type": "execute_result"
359 | }
360 | ],
361 | "source": [
362 | "trainDf.isna().sum()"
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "execution_count": 13,
368 | "metadata": {
369 | "scrolled": true
370 | },
371 | "outputs": [
372 | {
373 | "data": {
374 | "text/plain": [
375 | "PassengerId 0\n",
376 | "Pclass 0\n",
377 | "Name 0\n",
378 | "Sex 0\n",
379 | "Age 86\n",
380 | "SibSp 0\n",
381 | "Parch 0\n",
382 | "Ticket 0\n",
383 | "Fare 1\n",
384 | "Cabin 327\n",
385 | "Embarked 0\n",
386 | "dtype: int64"
387 | ]
388 | },
389 | "execution_count": 13,
390 | "metadata": {},
391 | "output_type": "execute_result"
392 | }
393 | ],
394 | "source": [
395 | "testDf.isnull().sum()"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 14,
401 | "metadata": {},
402 | "outputs": [
403 | {
404 | "data": {
405 | "text/html": [
406 | "\n",
407 | "\n",
420 | "
\n",
421 | " \n",
422 | " \n",
423 | " \n",
424 | " PassengerId \n",
425 | " Survived \n",
426 | " Pclass \n",
427 | " Name \n",
428 | " Sex \n",
429 | " Age \n",
430 | " SibSp \n",
431 | " Parch \n",
432 | " Ticket \n",
433 | " Fare \n",
434 | " Cabin \n",
435 | " Embarked \n",
436 | " \n",
437 | " \n",
438 | " \n",
439 | " \n",
440 | " 61 \n",
441 | " 62 \n",
442 | " 1 \n",
443 | " 1 \n",
444 | " Icard, Miss. Amelie \n",
445 | " female \n",
446 | " 38.0 \n",
447 | " 0 \n",
448 | " 0 \n",
449 | " 113572 \n",
450 | " 80.0 \n",
451 | " B28 \n",
452 | " NaN \n",
453 | " \n",
454 | " \n",
455 | " 829 \n",
456 | " 830 \n",
457 | " 1 \n",
458 | " 1 \n",
459 | " Stone, Mrs. George Nelson (Martha Evelyn) \n",
460 | " female \n",
461 | " 62.0 \n",
462 | " 0 \n",
463 | " 0 \n",
464 | " 113572 \n",
465 | " 80.0 \n",
466 | " B28 \n",
467 | " NaN \n",
468 | " \n",
469 | " \n",
470 | "
\n",
471 | "
"
472 | ],
473 | "text/plain": [
474 | " PassengerId Survived Pclass Name \\\n",
475 | "61 62 1 1 Icard, Miss. Amelie \n",
476 | "829 830 1 1 Stone, Mrs. George Nelson (Martha Evelyn) \n",
477 | "\n",
478 | " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n",
479 | "61 female 38.0 0 0 113572 80.0 B28 NaN \n",
480 | "829 female 62.0 0 0 113572 80.0 B28 NaN "
481 | ]
482 | },
483 | "execution_count": 14,
484 | "metadata": {},
485 | "output_type": "execute_result"
486 | }
487 | ],
488 | "source": [
489 | "trainDf[trainDf[\"Embarked\"].isnull()]"
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | "execution_count": 15,
495 | "metadata": {},
496 | "outputs": [
497 | {
498 | "data": {
499 | "text/html": [
500 | "\n",
501 | "\n",
514 | "
\n",
515 | " \n",
516 | " \n",
517 | " \n",
518 | " PassengerId \n",
519 | " Survived \n",
520 | " Pclass \n",
521 | " Name \n",
522 | " Sex \n",
523 | " Age \n",
524 | " SibSp \n",
525 | " Parch \n",
526 | " Ticket \n",
527 | " Fare \n",
528 | " Cabin \n",
529 | " Embarked \n",
530 | " \n",
531 | " \n",
532 | " \n",
533 | " \n",
534 | "
\n",
535 | "
"
536 | ],
537 | "text/plain": [
538 | "Empty DataFrame\n",
539 | "Columns: [PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked]\n",
540 | "Index: []"
541 | ]
542 | },
543 | "execution_count": 15,
544 | "metadata": {},
545 | "output_type": "execute_result"
546 | }
547 | ],
548 | "source": [
549 | "trainDf[\"Embarked\"] = trainDf[\"Embarked\"].fillna(\"C\")\n",
550 | "trainDf[trainDf[\"Embarked\"].isnull()]"
551 | ]
552 | },
553 | {
554 | "cell_type": "code",
555 | "execution_count": 16,
556 | "metadata": {},
557 | "outputs": [
558 | {
559 | "data": {
560 | "text/plain": [
561 | "PassengerId 0\n",
562 | "Survived 0\n",
563 | "Pclass 0\n",
564 | "Name 0\n",
565 | "Sex 0\n",
566 | "Age 177\n",
567 | "SibSp 0\n",
568 | "Parch 0\n",
569 | "Ticket 0\n",
570 | "Fare 0\n",
571 | "Cabin 687\n",
572 | "Embarked 0\n",
573 | "dtype: int64"
574 | ]
575 | },
576 | "execution_count": 16,
577 | "metadata": {},
578 | "output_type": "execute_result"
579 | }
580 | ],
581 | "source": [
582 | "trainDf.isnull().sum()"
583 | ]
584 | },
585 | {
586 | "cell_type": "code",
587 | "execution_count": 17,
588 | "metadata": {},
589 | "outputs": [
590 | {
591 | "data": {
592 | "text/html": [
593 | "\n",
594 | "\n",
607 | "
\n",
608 | " \n",
609 | " \n",
610 | " \n",
611 | " PassengerId \n",
612 | " Pclass \n",
613 | " Name \n",
614 | " Sex \n",
615 | " Age \n",
616 | " SibSp \n",
617 | " Parch \n",
618 | " Ticket \n",
619 | " Fare \n",
620 | " Cabin \n",
621 | " Embarked \n",
622 | " \n",
623 | " \n",
624 | " \n",
625 | " \n",
626 | " 152 \n",
627 | " 1044 \n",
628 | " 3 \n",
629 | " Storey, Mr. Thomas \n",
630 | " male \n",
631 | " 60.5 \n",
632 | " 0 \n",
633 | " 0 \n",
634 | " 3701 \n",
635 | " NaN \n",
636 | " NaN \n",
637 | " S \n",
638 | " \n",
639 | " \n",
640 | "
\n",
641 | "
"
642 | ],
643 | "text/plain": [
644 | " PassengerId Pclass Name Sex Age SibSp Parch Ticket \\\n",
645 | "152 1044 3 Storey, Mr. Thomas male 60.5 0 0 3701 \n",
646 | "\n",
647 | " Fare Cabin Embarked \n",
648 | "152 NaN NaN S "
649 | ]
650 | },
651 | "execution_count": 17,
652 | "metadata": {},
653 | "output_type": "execute_result"
654 | }
655 | ],
656 | "source": [
657 | "testDf[testDf[\"Fare\"].isnull()]"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": 18,
663 | "metadata": {},
664 | "outputs": [
665 | {
666 | "data": {
667 | "text/html": [
668 | "\n",
669 | "\n",
682 | "
\n",
683 | " \n",
684 | " \n",
685 | " \n",
686 | " PassengerId \n",
687 | " Pclass \n",
688 | " Name \n",
689 | " Sex \n",
690 | " Age \n",
691 | " SibSp \n",
692 | " Parch \n",
693 | " Ticket \n",
694 | " Fare \n",
695 | " Cabin \n",
696 | " Embarked \n",
697 | " \n",
698 | " \n",
699 | " \n",
700 | " \n",
701 | "
\n",
702 | "
"
703 | ],
704 | "text/plain": [
705 | "Empty DataFrame\n",
706 | "Columns: [PassengerId, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked]\n",
707 | "Index: []"
708 | ]
709 | },
710 | "execution_count": 18,
711 | "metadata": {},
712 | "output_type": "execute_result"
713 | }
714 | ],
715 | "source": [
716 | "testDf[\"Fare\"] = testDf[\"Fare\"].fillna(np.mean(testDf[testDf[\"Pclass\"] == 3][\"Fare\"]))\n",
717 | "testDf[testDf[\"Fare\"].isnull()]"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": 19,
723 | "metadata": {},
724 | "outputs": [
725 | {
726 | "data": {
727 | "text/plain": [
728 | "PassengerId 0\n",
729 | "Survived 0\n",
730 | "Pclass 0\n",
731 | "Name 0\n",
732 | "Sex 0\n",
733 | "SibSp 0\n",
734 | "Parch 0\n",
735 | "Ticket 0\n",
736 | "Fare 0\n",
737 | "Embarked 0\n",
738 | "dtype: int64"
739 | ]
740 | },
741 | "execution_count": 19,
742 | "metadata": {},
743 | "output_type": "execute_result"
744 | }
745 | ],
746 | "source": [
747 | "trainDf.dropna(axis = 1, how = 'any', inplace = True)\n",
748 | "trainDf.isnull().sum()"
749 | ]
750 | },
751 | {
752 | "cell_type": "code",
753 | "execution_count": 20,
754 | "metadata": {},
755 | "outputs": [
756 | {
757 | "data": {
758 | "text/plain": [
759 | "PassengerId 0\n",
760 | "Pclass 0\n",
761 | "Name 0\n",
762 | "Sex 0\n",
763 | "SibSp 0\n",
764 | "Parch 0\n",
765 | "Ticket 0\n",
766 | "Fare 0\n",
767 | "Embarked 0\n",
768 | "dtype: int64"
769 | ]
770 | },
771 | "execution_count": 20,
772 | "metadata": {},
773 | "output_type": "execute_result"
774 | }
775 | ],
776 | "source": [
777 | "testDf.dropna(axis = 1, how = 'any', inplace = True)\n",
778 | "testDf.isnull().sum()"
779 | ]
780 | },
781 | {
782 | "cell_type": "code",
783 | "execution_count": 21,
784 | "metadata": {},
785 | "outputs": [
786 | {
787 | "data": {
788 | "text/html": [
789 | "\n",
790 | "\n",
803 | "
\n",
804 | " \n",
805 | " \n",
806 | " \n",
807 | " PassengerId \n",
808 | " Survived \n",
809 | " Pclass \n",
810 | " Name \n",
811 | " Sex \n",
812 | " SibSp \n",
813 | " Parch \n",
814 | " Ticket \n",
815 | " Fare \n",
816 | " Embarked \n",
817 | " \n",
818 | " \n",
819 | " \n",
820 | " \n",
821 | " 0 \n",
822 | " 1 \n",
823 | " 0 \n",
824 | " 3 \n",
825 | " Braund, Mr. Owen Harris \n",
826 | " male \n",
827 | " 1 \n",
828 | " 0 \n",
829 | " A/5 21171 \n",
830 | " 7.2500 \n",
831 | " S \n",
832 | " \n",
833 | " \n",
834 | " 1 \n",
835 | " 2 \n",
836 | " 1 \n",
837 | " 1 \n",
838 | " Cumings, Mrs. John Bradley (Florence Briggs Th... \n",
839 | " female \n",
840 | " 1 \n",
841 | " 0 \n",
842 | " PC 17599 \n",
843 | " 71.2833 \n",
844 | " C \n",
845 | " \n",
846 | " \n",
847 | " 2 \n",
848 | " 3 \n",
849 | " 1 \n",
850 | " 3 \n",
851 | " Heikkinen, Miss. Laina \n",
852 | " female \n",
853 | " 0 \n",
854 | " 0 \n",
855 | " STON/O2. 3101282 \n",
856 | " 7.9250 \n",
857 | " S \n",
858 | " \n",
859 | " \n",
860 | " 3 \n",
861 | " 4 \n",
862 | " 1 \n",
863 | " 1 \n",
864 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) \n",
865 | " female \n",
866 | " 1 \n",
867 | " 0 \n",
868 | " 113803 \n",
869 | " 53.1000 \n",
870 | " S \n",
871 | " \n",
872 | " \n",
873 | " 4 \n",
874 | " 5 \n",
875 | " 0 \n",
876 | " 3 \n",
877 | " Allen, Mr. William Henry \n",
878 | " male \n",
879 | " 0 \n",
880 | " 0 \n",
881 | " 373450 \n",
882 | " 8.0500 \n",
883 | " S \n",
884 | " \n",
885 | " \n",
886 | " 5 \n",
887 | " 6 \n",
888 | " 0 \n",
889 | " 3 \n",
890 | " Moran, Mr. James \n",
891 | " male \n",
892 | " 0 \n",
893 | " 0 \n",
894 | " 330877 \n",
895 | " 8.4583 \n",
896 | " Q \n",
897 | " \n",
898 | " \n",
899 | " 6 \n",
900 | " 7 \n",
901 | " 0 \n",
902 | " 1 \n",
903 | " McCarthy, Mr. Timothy J \n",
904 | " male \n",
905 | " 0 \n",
906 | " 0 \n",
907 | " 17463 \n",
908 | " 51.8625 \n",
909 | " S \n",
910 | " \n",
911 | " \n",
912 | " 7 \n",
913 | " 8 \n",
914 | " 0 \n",
915 | " 3 \n",
916 | " Palsson, Master. Gosta Leonard \n",
917 | " male \n",
918 | " 3 \n",
919 | " 1 \n",
920 | " 349909 \n",
921 | " 21.0750 \n",
922 | " S \n",
923 | " \n",
924 | " \n",
925 | " 8 \n",
926 | " 9 \n",
927 | " 1 \n",
928 | " 3 \n",
929 | " Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) \n",
930 | " female \n",
931 | " 0 \n",
932 | " 2 \n",
933 | " 347742 \n",
934 | " 11.1333 \n",
935 | " S \n",
936 | " \n",
937 | " \n",
938 | " 9 \n",
939 | " 10 \n",
940 | " 1 \n",
941 | " 2 \n",
942 | " Nasser, Mrs. Nicholas (Adele Achem) \n",
943 | " female \n",
944 | " 1 \n",
945 | " 0 \n",
946 | " 237736 \n",
947 | " 30.0708 \n",
948 | " C \n",
949 | " \n",
950 | " \n",
951 | "
\n",
952 | "
"
953 | ],
954 | "text/plain": [
955 | " PassengerId Survived Pclass \\\n",
956 | "0 1 0 3 \n",
957 | "1 2 1 1 \n",
958 | "2 3 1 3 \n",
959 | "3 4 1 1 \n",
960 | "4 5 0 3 \n",
961 | "5 6 0 3 \n",
962 | "6 7 0 1 \n",
963 | "7 8 0 3 \n",
964 | "8 9 1 3 \n",
965 | "9 10 1 2 \n",
966 | "\n",
967 | " Name Sex SibSp Parch \\\n",
968 | "0 Braund, Mr. Owen Harris male 1 0 \n",
969 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 1 0 \n",
970 | "2 Heikkinen, Miss. Laina female 0 0 \n",
971 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 1 0 \n",
972 | "4 Allen, Mr. William Henry male 0 0 \n",
973 | "5 Moran, Mr. James male 0 0 \n",
974 | "6 McCarthy, Mr. Timothy J male 0 0 \n",
975 | "7 Palsson, Master. Gosta Leonard male 3 1 \n",
976 | "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 0 2 \n",
977 | "9 Nasser, Mrs. Nicholas (Adele Achem) female 1 0 \n",
978 | "\n",
979 | " Ticket Fare Embarked \n",
980 | "0 A/5 21171 7.2500 S \n",
981 | "1 PC 17599 71.2833 C \n",
982 | "2 STON/O2. 3101282 7.9250 S \n",
983 | "3 113803 53.1000 S \n",
984 | "4 373450 8.0500 S \n",
985 | "5 330877 8.4583 Q \n",
986 | "6 17463 51.8625 S \n",
987 | "7 349909 21.0750 S \n",
988 | "8 347742 11.1333 S \n",
989 | "9 237736 30.0708 C "
990 | ]
991 | },
992 | "execution_count": 21,
993 | "metadata": {},
994 | "output_type": "execute_result"
995 | }
996 | ],
997 | "source": [
998 | "trainDf.head(10)"
999 | ]
1000 | },
1001 | {
1002 | "cell_type": "code",
1003 | "execution_count": 22,
1004 | "metadata": {},
1005 | "outputs": [],
1006 | "source": [
1007 | "# set the PassengerId as index and drop the variable Name due to unique value \n",
1008 | "trainDf.set_index('PassengerId', inplace=True)\n",
1009 | "trainDf.drop(['Name', 'Ticket'], axis=1, inplace=True)"
1010 | ]
1011 | },
1012 | {
1013 | "cell_type": "code",
1014 | "execution_count": 23,
1015 | "metadata": {},
1016 | "outputs": [],
1017 | "source": [
1018 | "# set the PassengerId as index and drop the variable Name due to unique value \n",
1019 | "testDf.set_index('PassengerId', inplace=True)\n",
1020 | "testDf.drop(['Name', 'Ticket'], axis=1, inplace=True)"
1021 | ]
1022 | },
1023 | {
1024 | "cell_type": "code",
1025 | "execution_count": 24,
1026 | "metadata": {},
1027 | "outputs": [
1028 | {
1029 | "data": {
1030 | "text/html": [
1031 | "\n",
1032 | "\n",
1045 | "
\n",
1046 | " \n",
1047 | " \n",
1048 | " \n",
1049 | " Survived \n",
1050 | " Pclass \n",
1051 | " Sex \n",
1052 | " SibSp \n",
1053 | " Parch \n",
1054 | " Fare \n",
1055 | " Embarked \n",
1056 | " \n",
1057 | " \n",
1058 | " PassengerId \n",
1059 | " \n",
1060 | " \n",
1061 | " \n",
1062 | " \n",
1063 | " \n",
1064 | " \n",
1065 | " \n",
1066 | " \n",
1067 | " \n",
1068 | " \n",
1069 | " \n",
1070 | " 1 \n",
1071 | " 0 \n",
1072 | " 3 \n",
1073 | " male \n",
1074 | " 1 \n",
1075 | " 0 \n",
1076 | " 7.2500 \n",
1077 | " S \n",
1078 | " \n",
1079 | " \n",
1080 | " 2 \n",
1081 | " 1 \n",
1082 | " 1 \n",
1083 | " female \n",
1084 | " 1 \n",
1085 | " 0 \n",
1086 | " 71.2833 \n",
1087 | " C \n",
1088 | " \n",
1089 | " \n",
1090 | " 3 \n",
1091 | " 1 \n",
1092 | " 3 \n",
1093 | " female \n",
1094 | " 0 \n",
1095 | " 0 \n",
1096 | " 7.9250 \n",
1097 | " S \n",
1098 | " \n",
1099 | " \n",
1100 | " 4 \n",
1101 | " 1 \n",
1102 | " 1 \n",
1103 | " female \n",
1104 | " 1 \n",
1105 | " 0 \n",
1106 | " 53.1000 \n",
1107 | " S \n",
1108 | " \n",
1109 | " \n",
1110 | " 5 \n",
1111 | " 0 \n",
1112 | " 3 \n",
1113 | " male \n",
1114 | " 0 \n",
1115 | " 0 \n",
1116 | " 8.0500 \n",
1117 | " S \n",
1118 | " \n",
1119 | " \n",
1120 | " ... \n",
1121 | " ... \n",
1122 | " ... \n",
1123 | " ... \n",
1124 | " ... \n",
1125 | " ... \n",
1126 | " ... \n",
1127 | " ... \n",
1128 | " \n",
1129 | " \n",
1130 | " 887 \n",
1131 | " 0 \n",
1132 | " 2 \n",
1133 | " male \n",
1134 | " 0 \n",
1135 | " 0 \n",
1136 | " 13.0000 \n",
1137 | " S \n",
1138 | " \n",
1139 | " \n",
1140 | " 888 \n",
1141 | " 1 \n",
1142 | " 1 \n",
1143 | " female \n",
1144 | " 0 \n",
1145 | " 0 \n",
1146 | " 30.0000 \n",
1147 | " S \n",
1148 | " \n",
1149 | " \n",
1150 | " 889 \n",
1151 | " 0 \n",
1152 | " 3 \n",
1153 | " female \n",
1154 | " 1 \n",
1155 | " 2 \n",
1156 | " 23.4500 \n",
1157 | " S \n",
1158 | " \n",
1159 | " \n",
1160 | " 890 \n",
1161 | " 1 \n",
1162 | " 1 \n",
1163 | " male \n",
1164 | " 0 \n",
1165 | " 0 \n",
1166 | " 30.0000 \n",
1167 | " C \n",
1168 | " \n",
1169 | " \n",
1170 | " 891 \n",
1171 | " 0 \n",
1172 | " 3 \n",
1173 | " male \n",
1174 | " 0 \n",
1175 | " 0 \n",
1176 | " 7.7500 \n",
1177 | " Q \n",
1178 | " \n",
1179 | " \n",
1180 | "
\n",
1181 | "
891 rows × 7 columns
\n",
1182 | "
"
1183 | ],
1184 | "text/plain": [
1185 | " Survived Pclass Sex SibSp Parch Fare Embarked\n",
1186 | "PassengerId \n",
1187 | "1 0 3 male 1 0 7.2500 S\n",
1188 | "2 1 1 female 1 0 71.2833 C\n",
1189 | "3 1 3 female 0 0 7.9250 S\n",
1190 | "4 1 1 female 1 0 53.1000 S\n",
1191 | "5 0 3 male 0 0 8.0500 S\n",
1192 | "... ... ... ... ... ... ... ...\n",
1193 | "887 0 2 male 0 0 13.0000 S\n",
1194 | "888 1 1 female 0 0 30.0000 S\n",
1195 | "889 0 3 female 1 2 23.4500 S\n",
1196 | "890 1 1 male 0 0 30.0000 C\n",
1197 | "891 0 3 male 0 0 7.7500 Q\n",
1198 | "\n",
1199 | "[891 rows x 7 columns]"
1200 | ]
1201 | },
1202 | "execution_count": 24,
1203 | "metadata": {},
1204 | "output_type": "execute_result"
1205 | }
1206 | ],
1207 | "source": [
1208 | "trainDf"
1209 | ]
1210 | },
1211 | {
1212 | "cell_type": "markdown",
1213 | "metadata": {},
1214 | "source": [
1215 | "# Exploratory data analysis (EDA)"
1216 | ]
1217 | },
1218 | {
1219 | "cell_type": "code",
1220 | "execution_count": 25,
1221 | "metadata": {
1222 | "scrolled": true
1223 | },
1224 | "outputs": [
1225 | {
1226 | "data": {
1227 | "text/plain": [
1228 | ""
1229 | ]
1230 | },
1231 | "execution_count": 25,
1232 | "metadata": {},
1233 | "output_type": "execute_result"
1234 | },
1235 | {
1236 | "data": {
1237 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEICAYAAACwDehOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAARfklEQVR4nO3de7DndV3H8ecLFjVRuchpo11sTRkduihwQkybMSkvVEKmaKksuNPWRGTZjWzKxkvZTQMrakfUxTGV0GRzLNsQb3nJs4EgoLGRxm4gxxtgjDjouz9+n/34Yzm7/Hbhe36Hc56Pmd/8vt/P9/P7/N5n9rv72u/nezmpKiRJAjhg2gVIkpYOQ0GS1BkKkqTOUJAkdYaCJKkzFCRJ3aChkOTQJBcn+UySa5M8McnhSbYmua69H9b6Jsl5SbYnuTLJcUPWJkm6uwx5n0KSzcCHq+oNSR4APBh4GfDlqnpNknOAw6rqt5OcDJwNnAw8ATi3qp6wt/GPOOKIWrdu3WD1S9JytG3bti9W1cxC2wYLhSSHAFcA31tjX5Lks8BTqurGJEcCH6iqxyT527b8tt377ek7Zmdna25ubpD6JWm5SrKtqmYX2jbk9NEjgXngTUkuT/KGJAcDq8f+ob8JWN2W1wA3jH1+R2u7iyQbk8wlmZufnx+wfElaeYYMhVXAccD5VXUs8H/AOeMd2hHEPh2qVNWmqpqtqtmZmQWPfiRJ+2nIUNgB7KiqT7T1ixmFxBfatBHt/ea2fSdw1Njn17Y2SdIiGSwUquom4IYkj2lNJwHXAFuA9a1tPXBJW94CnN6uQjoRuGVv5xMkSfe9VQOPfzbw1nbl0fXAmYyC6KIkG4DPA6e1vu9ldOXRduD21leStIgGDYWqugJY6Az3SQv0LeCsIeuRJO2ddzRLkjpDQZLUGQqSpG7oE81L3vG/eeG0S9AStO1PT592CdJUeKQgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkbNBSSfC7JVUmuSDLX2g5PsjXJde39sNaeJOcl2Z7kyiTHDVmbJOnuFuNI4Uer6vFVNdvWzwEuraqjgUvbOsAzgaPbayNw/iLUJkkaM43po1OAzW15M3DqWPuFNfJx4NAkR06hPklasYYOhQL+Jcm2JBtb2+qqurEt3wSsbstrgBvGPrujtd1Fko1J5pLMzc/PD1W3JK1IqwYe/8lVtTPJdwJbk3xmfGNVVZLalwGrahOwCWB2dnafPitJ2rtBjxSqamd7vxn4B+AE4Au7poXa+82t+07gqLGPr21tkqRFMlgoJDk4yUN3LQNPAz4NbAHWt27rgUva8hbg9HYV0onALWPTTJKkRTDk9NFq4B+S7Pqev6uqf07ySeCiJBuAzwOntf7vBU4GtgO3A2cOWJskaQGDhUJVXQ88boH2LwEnLdBewFlD1SNJumfe0SxJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSusFDIcmBSS5P8p62/sgkn0iyPck7kjygtT+wrW9v29cNXZsk6a4W40jhJcC1Y+t/DLyuqh4NfAXY0No3AF9p7a9r/SRJi2jQUEiyFvgJ4A1tPcBTgYtbl83AqW35lLZO235S6y9JWiRDHyn8BfBbwLfa+sOBr1bVnW19B7CmLa8BbgBo229p/e8iycYkc0nm5ufnByxdklaewUIhyU8CN1fVtvty3KraVFWzVTU7MzNzXw4tSSveqgHHfhLwrCQnAw8CHgacCxyaZFU7GlgL7Gz9dwJHATuSrAIOAb40YH2SpN0MdqRQVb9TVWurah3wfOD9VfUC4DLgOa3beuCStrylrdO2v7+qaqj6JEl3N437FH4beGmS7YzOGVzQ2i8AHt7aXwqcM4XaJGlFG3L6qKuqDwAfaMvXAycs0OfrwHMXox5J0sK8o1mS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkbqJQSHLpJG2SpPu3vf6O5iQPAh4MHJHkMCBt08OANQPXJklaZHsNBeAXgF8FvhvYxrdD4VbgL4crS5I0DXsNhao6Fzg3ydlV9fpFqkmSNCX3dKQAQFW9PskPA+vGP1NVFw5UlyRpCiYKhSRvAR4FXAF8szUXYChI0jIyUSgAs8AxVVVDFiNJmq5JQ+HTwHcBNw5Yi6Qx//OKH5h2CVqCHvH7Vw06/qShcARwTZJ/B+7Y1VhVzxqkKknSVEwaCn+wrwO3exw+BDywfc/FVfXyJI8E3g48nNFlri+qqm8keSCjcxTHA18CnldVn9vX75Uk7b9Jrz764H6MfQfw1Kr6WpKDgI8k+SfgpcDrqurtSf4G2ACc396/UlWPTvJ84I+B5+3H90qS9tOkj7m4Lcmt7fX1JN9McuvePlMjX2urB7VXAU8FLm7tm4FT2/IpbZ22/aQku26WkyQtgolCoaoeWlUPq6qHAd8B/Azw1/f0uSQHJrkCuBnYCvwX8NWqurN12cG3H5exBrihfd+dwC2Mpph2H3Njkrkkc/Pz85OUL0ma0D4/JbUdAbwbePoEfb9ZVY8H1gInAI/d1+9bYMxNVTVbVbMzMzP3djhJ0phJb1579tjqAYzuW/j6pF9SVV9NchnwRODQJKva0cBaYGfrthM4CtiRZBVwCKMTzpKkRTLpkcJPjb2eDtzG6BzAHiWZSXJoW/4O4MeBa4HLgOe0buuBS9rylrZO2/5+b5aTpMU16dVHZ+7H2EcCm5McyCh8Lqqq9yS5Bnh7klcBlwMXtP4XAG9Jsh34MvD8/fhOSdK9MOn00Vrg9cCTWtOHgZdU1Y49faaqrgSOXaD9ekbnF3Zv/zrw3EnqkSQNY9Lpozcxmt757vb6x9YmSVpGJg2Fmap6U1Xd2V5vBrz0R5KWmUlD4UtJXtjuOzgwyQvxyiBJWnYmDYUXA6cBNzF6UupzgDMGqkmSNCWTPhDvFcD6qvoKQJLDgT9jFBaSpGVi0iOFH9wVCABV9WUWuLJIknT/NmkoHJDksF0r7Uhh0qMMSdL9xKT/sP858LEkf9/Wnwu8epiSJEnTMukdzRcmmWP02GuAZ1fVNcOVJUmahomngFoIGASStIzt86OzJUnLl6EgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSd1goZDkqCSXJbkmydVJXtLaD0+yNcl17f2w1p4k5yXZnuTKJMcNVZskaWFDHincCfx6VR0DnAicleQY4Bzg0qo6Gri0rQM8Ezi6vTYC5w9YmyRpAYOFQlXdWFX/0ZZvA64F1gCnAJtbt83AqW35FODCGvk4cGiSI4eqT5J0d4tyTiHJOuBY4BPA6qq6sW26CVjdltcAN4x9bEdr232sjUnmkszNz88PV7QkrUCDh0KShwDvBH61qm4d31ZVBdS+jFdVm6pqtqpmZ2Zm7sNKJUmDhkKSgxgFwlur6l2t+Qu7poXa+82tfSdw1NjH17Y2SdIiGfLqowAXANdW1WvHNm0B1rfl9cAlY+2nt6uQTgRuGZtmkiQtglUDjv0k4EXAVUmuaG0vA14DXJRkA/B54LS27b3AycB24HbgzAFrkyQtYLBQqKqPANnD5pMW6F/AWUPVI0m6Z97RLEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6wUIhyRuT3Jzk02NthyfZmuS69n5Ya0+S85JsT3JlkuOGqkuStGdDHim8GXjGbm3nAJdW1dHApW0d4JnA0e21ETh/wLokSXswWChU1YeAL+/WfAqwuS1vBk4da7+wRj4OHJrkyKFqkyQtbLHPKayuqhvb8k3A6ra8BrhhrN+O1nY3STYmmUsyNz8/P1ylkrQCTe1Ec1UVUPvxuU1VNVtVszMzMwNUJkkr12KHwhd2TQu195tb+07gqLF+a1ubJGkRLXYobAHWt+X1wCVj7ae3q5BOBG4Zm2aSJC2SVUMNnORtwFOAI5LsAF4OvAa4KMkG4PPAaa37e4GTge3A7cCZQ9UlSdqzwUKhqn52D5tOWqBvAWcNVYskaTLe0SxJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSuiUVCkmekeSzSbYnOWfa9UjSSrNkQiHJgcBfAc8EjgF+Nskx061KklaWJRMKwAnA9qq6vqq+AbwdOGXKNUnSirJq2gWMWQPcMLa+A3jC7p2SbAQ2ttWvJfnsItS2UhwBfHHaRSwF+bP10y5Bd+W+ucvLc1+M8j172rCUQmEiVbUJ2DTtOpajJHNVNTvtOqTduW8unqU0fbQTOGpsfW1rkyQtkqUUCp8Ejk7yyCQPAJ4PbJlyTZK0oiyZ6aOqujPJLwPvAw4E3lhVV0+5rJXGaTktVe6biyRVNe0aJElLxFKaPpIkTZmhIEnqDAUtKMlTkrxn2nVoeUjyK0muTfLWgcb/gyS/McTYK82SOdEsaVn7JeDHqmrHtAvR3nmksIwlWZfkM0nenOQ/k7w1yY8l+bck1yU5ob0+luTyJB9N8pgFxjk4yRuT/Hvr5+NHNLEkfwN8L/BPSX53oX0pyRlJ3p1ka5LPJfnlJC9tfT6e5PDW7+eTfDLJp5K8M8mDF/i+RyX55yTbknw4yWMX9ye+fzMUlr9HA38OPLa9fg54MvAbwMuAzwA/UlXHAr8P/OECY/wu8P6qOgH4UeBPkxy8CLVrGaiqXwT+l9G+czB73pe+H3g28EPAq4Hb2375MeD01uddVfVDVfU44FpgwwJfuQk4u6qOZ7Sf//UwP9ny5PTR8vffVXUVQJKrgUurqpJcBawDDgE2JzkaKOCgBcZ4GvCssTnbBwGPYPSXUtoXe9qXAC6rqtuA25LcAvxja78K+MG2/P1JXgUcCjyE0X1NXZKHAD8M/H3SnxH0wAF+jmXLUFj+7hhb/tbY+rcY/fm/ktFfxp9Osg74wAJjBPiZqvLhg7q3FtyXkjyBe95XAd4MnFpVn0pyBvCU3cY/APhqVT3+Pq16BXH6SIfw7WdMnbGHPu8Dzk77r1eSYxehLi1P93ZfeihwY5KDgBfsvrGqbgX+O8lz2/hJ8rh7WfOKYijoT4A/SnI5ez5yfCWjaaUr2xTUKxerOC0793Zf+j3gE8C/MToftpAXABuSfAq4Gn8vyz7xMReSpM4jBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoK0n9pzfK5OcmWSK9oNWNL9mnc0S/shyROBnwSOq6o7khwBPGDKZUn3mkcK0v45EvhiVd0BUFVfrKr/TXJ8kg+2J3S+L8mRSQ5J8tldT6BN8rYkPz/V6qU98OY1aT+0B699BHgw8K/AO4CPAh8ETqmq+STPA55eVS9O8uPAK4BzgTOq6hlTKl3aK6ePpP1QVV9LcjzwI4weAf0O4FWMHv+8tT3a50DgxtZ/a3sez18BPotHS5ZHCtJ9IMlzgLOAB1XVExfYfgCjo4h1wMm7HmcuLTWeU5D2Q5LHtN9BscvjGf1+iZl2EpokByX5vrb919r2nwPe1J7yKS05HilI+6FNHb2e0S97uRPYDmwE1gLnMXok+SrgL4APAe8GTqiq25K8Fritql6+6IVL98BQkCR1Th9JkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6v4fFZiFomQxq2MAAAAASUVORK5CYII=\n",
1238 | "text/plain": [
1239 | ""
1240 | ]
1241 | },
1242 | "metadata": {
1243 | "needs_background": "light"
1244 | },
1245 | "output_type": "display_data"
1246 | }
1247 | ],
1248 | "source": [
1249 | "sns.countplot(trainDf['Sex'], data=trainDf, )"
1250 | ]
1251 | },
1252 | {
1253 | "cell_type": "code",
1254 | "execution_count": 26,
1255 | "metadata": {},
1256 | "outputs": [
1257 | {
1258 | "data": {
1259 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAULklEQVR4nO3df7RV5X3n8fdXQEnEHxFuMsolXlJNEwlI6tVqGbOoaaNhHMxkkKtJCVRSMlFTOpl2xrGZaExsbZo2teoki7VMwIbFD7UTLasxy5hoWzXaew1KQK0kJuFSUgENEbPwB37nj7N5cosXOcDd91wu79daZ7H3s5/znO9Zbvi4fz0nMhNJkgAOa3UBkqShw1CQJBWGgiSpMBQkSYWhIEkqRra6gAMxbty47OjoaHUZknRQ6enp2ZKZbf1tO6hDoaOjg+7u7laXIUkHlYj48Z62efpIklQYCpKkwlCQJBUH9TUFSRpoL7/8Mr29vezYsaPVpRyw0aNH097ezqhRo5p+j6EgSX309vZy1FFH0dHRQUS0upz9lpls3bqV3t5eJk6c2PT7PH0kSX3s2LGDsWPHHtSBABARjB07dp+PeAwFSdrNwR4Iu+zP9zAUJEmFoSBJTbj22muZNGkSU6ZMYerUqTz00EMHPOadd97JddddNwDVwZgxYwZknEP+QvNpf3RLq0sYMnr+/COtLkEakh588EFWrVrFI488whFHHMGWLVt46aWXmnrvK6+8wsiR/f9TO3PmTGbOnDmQpR4wjxQkaS82bdrEuHHjOOKIIwAYN24cJ5xwAh0dHWzZsgWA7u5upk+fDsDVV1/NnDlzmDZtGnPmzOHMM89k7dq1Zbzp06fT3d3N4sWLufzyy9m2bRsnnngir776KgAvvPACEyZM4OWXX+YHP/gB5513Hqeddhpnn302TzzxBABPP/00Z511FpMnT+ZTn/rUgH1XQ0GS9uJ973sfGzZs4O1vfzuXXnop9913317fs27dOr71rW+xbNkyurq6WLlyJdAImE2bNtHZ2Vn6HnPMMUydOrWMu2rVKs4991xGjRrFggULuOGGG+jp6eELX/gCl156KQALFy7k4x//OGvWrOH4448fsO9qKEjSXowZM4aenh4WLVpEW1sbXV1dLF68+HXfM3PmTN7whjcAMHv2bG677TYAVq5cyaxZs17Tv6urixUrVgCwfPlyurq62L59Ow888AAXXnghU6dO5WMf+xibNm0C4P777+fiiy8GYM6cOQP1Vb2mIEnNGDFiBNOnT2f69OlMnjyZJUuWMHLkyHLKZ/fnAY488siyPH78eMaOHctjjz3GihUr+PKXv/ya8WfOnMmVV17Js88+S09PD+eccw4vvPACxx57LKtXr+63pjpunfVIQZL24sknn+Spp54q66tXr+bEE0+ko6ODnp4eAG6//fbXHaOrq4vPf/7zbNu2jSlTprxm+5gxYzj99NNZuHAh559/PiNGjODoo49m4sSJ3HrrrUDjKeVHH30UgGnTprF8+XIAli5dOiDfEwwFSdqr7du3M3fuXE455RSmTJnCunXruPrqq7nqqqtYuHAhnZ2djBgx4nXHmDVrFsuXL2f27Nl77NPV1cXXvvY1urq6StvSpUu5+eabOfXUU5k0aRJ33HEHANdffz033XQTkydPZuPGjQPzRYHIzAEbbLB1dnbmgf7Ijrek/pK3pErw+OOP8853vrPVZQyY/r5PRPRkZmd//T1SkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCp9olqR9NNC3sjdzO/hdd93FwoUL2blzJx/96Ee54oorBrSGXTxSkKQhbufOnVx22WV84xvfYN26dSxbtox169bV8lmGgiQNcQ8//DAnnXQSb3vb2zj88MO56KKLypPNA81QkKQhbuPGjUyYMKGst7e3D+jUFn0ZCpKkwlCQpCFu/PjxbNiwoaz39vYyfvz4Wj7LUJCkIe7000/nqaee4umnn+all15i+fLltf22s7ekStI+GuwZhUeOHMmNN97Iueeey86dO7nkkkuYNGlSPZ9Vy6iSpAE1Y8YMZsyYUfvnePpIklQYCpKkwlCQJBWGgiSpqD0UImJERHwvIlZV6xMj4qGIWB8RKyLi8Kr9iGp9fbW9o+7aJEn/3mAcKSwEHu+z/mfAFzPzJOA5YH7VPh94rmr/YtVPkjSIar0lNSLagf8EXAt8MiICOAf4UNVlCXA18CXggmoZ4DbgxoiIzMw6a5SkffWTayYP6Hhv/fSavfa55JJLWLVqFW9+85v5/ve/P6Cf31fdRwp/BfxP4NVqfSzws8x8pVrvBXY9qz0e2ABQbd9W9f93ImJBRHRHRPfmzZtrLF2Sho558+Zx11131f45tYVCRJwPPJOZPQM5bmYuyszOzOxsa2sbyKElach6z3vew3HHHVf759R5+mgaMDMiZgCjgaOB64FjI2JkdTTQDuya/3UjMAHojYiRwDHA1hrrkyTtprYjhcz835nZnpkdwEXAtzPzw8B3gFlVt7nArl+KuLNap9r+ba8nSNLgasVzCv+LxkXn9TSuGdxctd8MjK3aPwnU8wOkkqQ9GpQJ8TLzXuDeavmHwBn99NkBXDgY9UiS+ucsqZK0j5q5hXSgXXzxxdx7771s2bKF9vZ2PvOZzzB//vy9v3EfGQqSdBBYtmzZoHyOcx9JkgpDQZJUGAqStJvhcjf8/nwPQ0GS+hg9ejRbt2496IMhM9m6dSujR4/ep/d5oVmS+mhvb6e3t5fhMLfa6NGjaW9v36f3GAqS1MeoUaOYOHFiq8toGU8fSZIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqSitlCIiNER8XBEPBoRayPiM1X7xIh4KCLWR8SKiDi8aj+iWl9fbe+oqzZJUv/qPFJ4ETgnM08FpgLnRcSZwJ8BX8zMk4DngPlV//nAc1X7F6t+kqRBVFsoZMP2anVU9UrgHOC2qn0J8IFq+YJqnWr7eyMi6qpPkvRatV5TiIgREbEaeAa4G/gB8LPMfKXq0guMr5bHAxsAqu3bgLH9jLkgIrojonvz5s11li9Jh5xaQyEzd2bmVKAdOAN4xwCMuSgzOzOzs62t7UCHkyT1MSh3H2Xmz4DvAGcBx0bEyGpTO7CxWt4ITACoth8DbB2M+iRJDXXefdQWEcdWy28Afht4nEY4zKq6zQXuqJbvrNaptn87M7Ou+iRJrzVy71322/HAkogYQSN8VmbmqohYByyPiM8B3wNurvrfDPxNRKwHngUuqrE2SVI/aguFzHwMeHc/7T+kcX1h9/YdwIV11SNJ2jufaJYkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUNBUKEXFPM22SpIPb6z68FhGjgTcC4yLiTcCuqayP5pezm0qShom9PdH8MeAPgBOAHn4ZCj8HbqyvLElSK7xuKGTm9cD1EfGJzLxhkGqSJLVIU3MfZeYNEfEbQEff92TmLTXVJUlqgaZCISL+BvgVYDWws2pOwFCQpGGk2VlSO4FT/H0DSRremn1O4fvAf6izEElS6zV7pDAOWBcRDwMv7mrMzJm1VCVJaolmQ+HqOouQJA0Nzd59dF/dhUiSWq/Zu4+ep3G3EcDhwCjghcw8uq7CJEmDr9kjhaN2LUdEABcAZ9ZVlCSpNfZ5ltRs+Dpw7sCXI0lqpWZPH32wz+phNJ5b2FFLRZKklmn27qP/3Gf5FeBHNE4hSZKGkWavKfxu3YVIklqv2dNH7cANwLSq6R+BhZnZW1dhkrTLT66Z3OoShoy3fnpNreM3e6H5q8CdNH5X4QTg76o2SdIw0mwotGXmVzPzleq1GGirsS5JUgs0GwpbI+J3ImJE9fodYGudhUmSBl+zoXAJMBv4KbAJmAXMq6kmSVKLNHtL6jXA3Mx8DiAijgO+QCMsJEnDRLNHClN2BQJAZj4LvLuekiRJrdJsKBwWEW/atVIdKTR7lCFJOkg0+w/7XwAPRsSt1fqFwLX1lCRJapVmn2i+JSK6gXOqpg9m5rr6ypIktULTp4CqEDAIJGkY2+eps5sVERMi4jsRsS4i1kbEwqr9uIi4OyKeqv58U9UeEfHXEbE+Ih6LiF+rqzZJUv9qCwUas6n+j8w8hcYP8lwWEacAVwD3ZObJwD3VOsD7gZOr1wLgSzXWJknqR22hkJmbMvORavl54HFgPI0pt5dU3ZYAH6iWLwBuqX7E57vAsRFxfF31SZJeq84jhSIiOmg81/AQ8JbM3FRt+inwlmp5PLChz9t6q7bdx1oQEd0R0b158+b6ipakQ1DtoRARY4DbgT/IzJ/33ZaZCeS+jJeZizKzMzM729qck0+SBlKtoRARo2gEwtLM/Nuq+d92nRaq/nymat8ITOjz9vaqTZI0SOq8+yiAm4HHM/Mv+2y6E5hbLc8F7ujT/pHqLqQzgW19TjNJkgZBnVNVTAPmAGsiYnXVdiVwHbAyIuYDP6Yx+yrA3wMzgPXALwB/AlSSBlltoZCZ/wTEHja/t5/+CVxWVz2SpL0blLuPJEkHB0NBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqRiZF0DR8RXgPOBZzLzXVXbccAKoAP4ETA7M5+LiACuB2YAvwDmZeYjddWm/v3kmsmtLmHIeOun17S6BKkl6jxSWAyct1vbFcA9mXkycE+1DvB+4OTqtQD4Uo11SZL2oLZQyMx/AJ7drfkCYEm1vAT4QJ/2W7Lhu8CxEXF8XbVJkvo32NcU3pKZm6rlnwJvqZbHAxv69Out2iRJg6hlF5ozM4Hc1/dFxIKI6I6I7s2bN9dQmSQdugY7FP5t12mh6s9nqvaNwIQ+/dqrttfIzEWZ2ZmZnW1tbbUWK0mHmsEOhTuBudXyXOCOPu0fiYYzgW19TjNJkgZJnbekLgOmA+Miohe4CrgOWBkR84EfA7Or7n9P43bU9TRuSf3duuqSJO1ZbaGQmRfvYdN7++mbwGV11SJJao5PNEuSCkNBklQYCpKkorZrCpIOzGl/dEurSxgy/t9Rra7g0OGRgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFUMqFCLivIh4MiLWR8QVra5Hkg41QyYUImIEcBPwfuAU4OKIOKW1VUnSoWXIhAJwBrA+M3+YmS8By4ELWlyTJB1SRra6gD7GAxv6rPcCv757p4hYACyoVrdHxJODUNsh4UQYB2xpdR1DwlXR6grUh/tmHwOzb564pw1DKRSakpmLgEWtrmM4iojuzOxsdR3S7tw3B89QOn20EZjQZ729apMkDZKhFAr/DJwcERMj4nDgIuDOFtckSYeUIXP6KDNfiYjLgW8CI4CvZObaFpd1qPG0nIYq981BEpnZ6hokSUPEUDp9JElqMUNBklQYCupXREyPiFWtrkPDQ0T8fkQ8HhFLaxr/6oj4wzrGPtQMmQvNkoa1S4HfyszeVhei1+eRwjAWER0R8URELI6If4mIpRHxWxFxf0Q8FRFnVK8HI+J7EfFARPxqP+McGRFfiYiHq35OP6KmRcSXgbcB34iIP+5vX4qIeRHx9Yi4OyJ+FBGXR8Qnqz7fjYjjqn6/FxH/HBGPRsTtEfHGfj7vVyLirojoiYh/jIh3DO43PrgZCsPfScBfAO+oXh8C/iPwh8CVwBPA2Zn5buDTwJ/0M8YfA9/OzDOA3wT+PCKOHITaNQxk5n8D/pXGvnMke96X3gV8EDgduBb4RbVfPgh8pOrzt5l5emaeCjwOzO/nIxcBn8jM02js5/+3nm82PHn6aPh7OjPXAETEWuCezMyIWAN0AMcASyLiZCCBUf2M8T5gZp9ztqOBt9L4Syntiz3tSwDfyczngecjYhvwd1X7GmBKtfyuiPgccCwwhsZzTUVEjAF+A7g1oswRdEQN32PYMhSGvxf7LL/aZ/1VGv/9P0vjL+N/iYgO4N5+xgjgv2amkw/qQPW7L0XEr7P3fRVgMfCBzHw0IuYB03cb/zDgZ5k5dUCrPoR4+kjH8Ms5pubtoc83gU9E9b9eEfHuQahLw9OB7ktHAZsiYhTw4d03ZubPgacj4sJq/IiIUw+w5kOKoaDPA38aEd9jz0eOn6VxWumx6hTUZwerOA07B7ov/R/gIeB+GtfD+vNhYH5EPAqsxd9l2SdOcyFJKjxSkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEj7qZrHZ21EPBYRq6sHsKSDmk80S/shIs4Czgd+LTNfjIhxwOEtLks6YB4pSPvneGBLZr4IkJlbMvNfI+K0iLivmqHzmxFxfEQcExFP7pqBNiKWRcTvtbR6aQ98eE3aD9XEa/8EvBH4FrACeAC4D7ggMzdHRBdwbmZeEhG/DVwDXA/My8zzWlS69Lo8fSTth8zcHhGnAWfTmAJ6BfA5GtM/311N7TMC2FT1v7uaj+cmwLl4NGR5pCANgIiYBVwGjM7Ms/rZfhiNo4gOYMau6cylocZrCtJ+iIhfrX6DYpepNH5foq26CE1EjIqISdX2/15t/xDw1WqWT2nI8UhB2g/VqaMbaPzYyyvAemAB0A78NY0pyUcCfwX8A/B14IzMfD4i/hJ4PjOvGvTCpb0wFCRJhaePJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBX/Hxg+KR5WewrDAAAAAElFTkSuQmCC\n",
1260 | "text/plain": [
1261 | ""
1262 | ]
1263 | },
1264 | "metadata": {
1265 | "needs_background": "light"
1266 | },
1267 | "output_type": "display_data"
1268 | }
1269 | ],
1270 | "source": [
1271 | "sns.countplot('Sex',hue='Survived',data=trainDf)\n",
1272 | "plt.show()"
1273 | ]
1274 | },
1275 | {
1276 | "cell_type": "code",
1277 | "execution_count": 27,
1278 | "metadata": {},
1279 | "outputs": [
1280 | {
1281 | "data": {
1282 | "text/plain": [
1283 | ""
1284 | ]
1285 | },
1286 | "execution_count": 27,
1287 | "metadata": {},
1288 | "output_type": "execute_result"
1289 | },
1290 | {
1291 | "data": {
1292 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPZElEQVR4nO3dfazeZX3H8fcHCrKJ8mA7hm23stloWFTUM8SHZE72IMxZ4gQxOio26ZawReOcY1syH+IWzZwOp7I1Qy1kExDn6IxTCQ9zGlBPJ/I4Z8dgtII9PCo6nWXf/XGuc3Eop+Vu6e/cp5z3K7lzX7/rd/1+9/cmzflw/Z7uVBWSJAEcMO4CJEkLh6EgSeoMBUlSZyhIkjpDQZLULRl3AY/F0qVLa9WqVeMuQ5L2K5s3b76rqpbNtW6/DoVVq1YxOTk57jIkab+S5LZdrfPwkSSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKnbr+9o3hee9/vnj7sELUCb//yMcZcgjYUzBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpGzQUktya5Pok1yaZbH1HJrksyTfb+xGtP0k+kGRLkuuSPHfI2iRJjzQfM4VfrKrjqmqiLZ8NXF5Vq4HL2zLAScDq9loPnDsPtUmSZhnH4aM1wMbW3gicMqv//Jp2DXB4kqPHUJ8kLVpDh0IBn0+yOcn61ndUVd3R2ncCR7X2cuD2WdtubX0Pk2R9kskkk1NTU0PVLUmL0tA/x/niqtqW5CeAy5L8++yVVVVJak92WFUbgA0AExMTe7StJGn3Bp0pVNW29r4d+BRwPPDtmcNC7X17G74NWDlr8xWtT5I0TwYLhSRPTPKkmTbwK8ANwCZgbRu2Fri0tTcBZ7SrkE4A7p91mEmSNA+GPHx0FPCpJDOf8/dV9dkkXwUuTrIOuA04rY3/DHAysAX4PnDmgLVJkuYwWChU1S3As+fovxs4cY7+As4aqh5J0qPzjmZJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYOHQpIDk3wtyafb8jFJvpxkS5KLkhzc+p/Qlre09auGrk2S9HDzMVN4I3DzrOX3AO+vqqcB9wLrWv864N7W//42TpI0jwYNhSQrgF8D/rYtB3gpcEkbshE4pbXXtGXa+hPbeEnSPBl6pvCXwFuB/2vLTwHuq6odbXkrsLy1lwO3A7T197fxD5NkfZLJJJNTU1MDli5Ji89goZDk5cD2qtq8L/dbVRuqaqKqJpYtW7Yvdy1Ji96SAff9IuAVSU4GDgGeDJwDHJ5kSZsNrAC2tfHbgJXA1iRLgMOAuwesT5K0k8FmClX1h1W1oqpWAacDV1TVa4ErgVe1YWuBS1t7U1umrb+iqmqo+iRJjzSO+xT+AHhzki1MnzM4r/WfBzyl9b8ZOHsMtUnSojbk4aOuqq4CrmrtW4Dj5xjzA+DU+ahHkjQ372iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpG5efmRH0p7773c+c9wlaAH6qT+5ftD9O1OQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqRupFBIcvkofZKk/dtu72hOcgjw48DSJEcAaaueDCwfuDZJ0jx7tMdc/BbwJuCpwGYeCoXvAB8crixJ0jjs9vBRVZ1TVccAb6mqn6mqY9rr2VW121BIckiSryT5epIbk7yj9R+T5MtJtiS5KMnBrf8JbXlLW79qX31JSdJoRnogXlX9VZIXAqtmb1NV5+9msx8CL62qB5IcBHwxyT8DbwbeX1UXJvlrYB1wbnu/t6qeluR04D3Aq/fmS0mS9s6oJ5ovAN4LvBj4+faa2N02Ne2BtnhQexXwUuCS1r8ROKW117Rl2voTk8wcrpIkzYNRH509ARxbVbUnO09yINPnIp4GfAj4T+C+qtrRhmzloRPWy4HbAapqR5L7gacAd+3JZ0qS9t6o9yncAPzknu68qh6squOAFcDxwDP2dB87S7I+yWSSyampqce6O0nSLKPOFJYCNyX5CtPnCgCoqleMsnFV3ZfkSuAFwOFJlrTZwgpgWxu2DVgJbE2yBDgMuHuOfW0ANgBMTEzs0cxFkrR7o4bC2/d0x0mWAT9qgfBjwC8zffL4SuBVwIXAWuDStsmmtnx1W3/Fnh6ukiQ9NqNeffQve7Hvo4GN7bzCAcDFVfXpJDcBFyZ5F/A14Lw2/jzggiRbgHuA0/fiMyVJj8FIoZDku0xfOQRwMNNXEn2vqp68q22q6jrgOXP038L0+YWd+38AnDpKPZKkYYw6U3jSTLtdJroGOGGooiRJ47HHT0lt9x/8I/Cr+74cSdI4jXr46JWzFg9g+r6FHwxSkSRpbEa9+ujXZ7V3ALcyfQhJkvQ4Muo5hTOHLkSSNH6jPvtoRZJPJdneXp9MsmLo4iRJ82vUE80fZfrmsqe21z+1PknS48ioobCsqj5aVTva62PAsgHrkiSNwaihcHeS1yU5sL1exxzPJZIk7d9GDYU3AKcBdwJ3MP1sotcPVJMkaUxGvST1ncDaqroXIMmRTP/ozhuGKkySNP9GnSk8ayYQAKrqHuZ4rpEkaf82aigckOSImYU2Uxh1liFJ2k+M+of9L4Crk3yiLZ8K/OkwJUmSxmXUO5rPTzIJvLR1vbKqbhquLEnSOIx8CKiFgEEgSY9je/zobEnS45ehIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUDRYKSVYmuTLJTUluTPLG1n9kksuSfLO9H9H6k+QDSbYkuS7Jc4eqTZI0tyFnCjuA36uqY4ETgLOSHAucDVxeVauBy9sywEnA6vZaD5w7YG2SpDkMFgpVdUdV/Vtrfxe4GVgOrAE2tmEbgVNaew1wfk27Bjg8ydFD1SdJeqR5OaeQZBXwHODLwFFVdUdbdSdwVGsvB26ftdnW1rfzvtYnmUwyOTU1NVzRkrQIDR4KSQ4FPgm8qaq+M3tdVRVQe7K/qtpQVRNVNbFs2bJ9WKkkadBQSHIQ04Hwd1X1D6372zOHhdr79ta/DVg5a/MVrU+SNE+GvPoowHnAzVX1vlmrNgFrW3stcOms/jPaVUgnAPfPOswkSZoHSwbc94uA3wSuT3Jt6/sj4N3AxUnWAbcBp7V1nwFOBrYA3wfOHLA2SdIcBguFqvoikF2sPnGO8QWcNVQ9kqRH5x3NkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoGC4UkH0myPckNs/qOTHJZkm+29yNaf5J8IMmWJNclee5QdUmSdm3ImcLHgJft1Hc2cHlVrQYub8sAJwGr22s9cO6AdUmSdmGwUKiqLwD37NS9BtjY2huBU2b1n1/TrgEOT3L0ULVJkuY23+cUjqqqO1r7TuCo1l4O3D5r3NbW9whJ1ieZTDI5NTU1XKWStAiN7URzVRVQe7HdhqqaqKqJZcuWDVCZJC1e8x0K3545LNTet7f+bcDKWeNWtD5J0jya71DYBKxt7bXApbP6z2hXIZ0A3D/rMJMkaZ4sGWrHST4OvARYmmQr8Dbg3cDFSdYBtwGnteGfAU4GtgDfB84cqi5J0q4NFgpV9ZpdrDpxjrEFnDVULZKk0XhHsySpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJK6BRUKSV6W5BtJtiQ5e9z1SNJis2BCIcmBwIeAk4BjgdckOXa8VUnS4rJgQgE4HthSVbdU1f8CFwJrxlyTJC0qS8ZdwCzLgdtnLW8Fnr/zoCTrgfVt8YEk35iH2haLpcBd4y5iIch71467BD2c/zZnvC37Yi8/vasVCykURlJVG4AN467j8SjJZFVNjLsOaWf+25w/C+nw0TZg5azlFa1PkjRPFlIofBVYneSYJAcDpwObxlyTJC0qC+bwUVXtSPI7wOeAA4GPVNWNYy5rsfGwnBYq/23Ok1TVuGuQJC0QC+nwkSRpzAwFSVJnKMjHi2jBSvKRJNuT3DDuWhYLQ2GR8/EiWuA+Brxs3EUsJoaCfLyIFqyq+gJwz7jrWEwMBc31eJHlY6pF0pgZCpKkzlCQjxeR1BkK8vEikjpDYZGrqh3AzONFbgYu9vEiWiiSfBy4Gnh6kq1J1o27psc7H3MhSeqcKUiSOkNBktQZCpKkzlCQJHWGgiSpMxQkIMkfJ7kxyXVJrk3y/H2wz1fsq6fOJnlgX+xHejRekqpFL8kLgPcBL6mqHyZZChxcVd8aYdsl7V6PoWt8oKoOHfpzJGcKEhwN3FVVPwSoqruq6ltJbm0BQZKJJFe19tuTXJDkS8AFSa5J8nMzO0tyVRv/+iQfTHJYktuSHNDWPzHJ7UkOSvKzST6bZHOSf03yjDbmmCRXJ7k+ybvm+b+HFjFDQYLPAyuT/EeSDyf5hRG2ORb4pap6DXARcBpAkqOBo6tqcmZgVd0PXAvM7PflwOeq6kdM/yD971bV84C3AB9uY84Bzq2qZwJ3PNYvKI3KUNCiV1UPAM8D1gNTwEVJXv8om22qqv9p7YuBV7X2acAlc4y/CHh1a5/ePuNQ4IXAJ5JcC/wN07MWgBcBH2/tC/bk+0iPxZJxFyAtBFX1IHAVcFWS64G1wA4e+h+nQ3ba5Huztt2W5O4kz2L6D/9vz/ERm4A/S3Ik0wF0BfBE4L6qOm5XZe3dt5H2njMFLXpJnp5k9ayu44DbgFuZ/gMO8BuPspuLgLcCh1XVdTuvbLORrzJ9WOjTVfVgVX0H+K8kp7Y6kuTZbZMvMT2jAHjtHn8paS8ZChIcCmxMclOS65g+X/B24B3AOUkmgQcfZR+XMP1H/OLdjLkIeF17n/FaYF2SrwM38tBPob4ROKvNWvwlPM0bL0mVJHXOFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1/w/4gcjVwj04NgAAAABJRU5ErkJggg==\n",
1293 | "text/plain": [
1294 | ""
1295 | ]
1296 | },
1297 | "metadata": {
1298 | "needs_background": "light"
1299 | },
1300 | "output_type": "display_data"
1301 | }
1302 | ],
1303 | "source": [
1304 | "sns.countplot('Survived', data=trainDf)"
1305 | ]
1306 | },
1307 | {
1308 | "cell_type": "code",
1309 | "execution_count": 28,
1310 | "metadata": {},
1311 | "outputs": [
1312 | {
1313 | "data": {
1314 | "text/plain": [
1315 | ""
1316 | ]
1317 | },
1318 | "execution_count": 28,
1319 | "metadata": {},
1320 | "output_type": "execute_result"
1321 | },
1322 | {
1323 | "data": {
1324 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPzklEQVR4nO3dfcyddX3H8ffHFsQHtDzcY9h21mnjgk5RG2SyLA7iBrgJMWA0CpV1VhM0GPcgM5lTo4tGNyZsMWmGUoxPKDI6Q9xIQVEj6F3lGY0dEWkD9OZRmToH++6P+9eft+UunEqvc9re71dycn7X9/qdi++dk/Dp9XhSVUiSBPCESTcgSdpzGAqSpM5QkCR1hoIkqTMUJEnd4kk38HgceuihtWLFikm3IUl7lU2bNt1dVVPzrdurQ2HFihVMT09Pug1J2qskuW1n6zx8JEnqDAVJUjdoKCT5YZIbklybZLrVDk5yeZIftPeDWj1Jzk2yOcn1SV48ZG+SpEcax57CH1bVkVW1qi2fDWysqpXAxrYMcAKwsr3WAh8bQ2+SpDkmcfjoJGB9G68HTp5Tv7BmXQ0sSXL4BPqTpAVr6FAo4D+TbEqyttUOq6o72vhO4LA2XgrcPuezW1rtVyRZm2Q6yfTMzMxQfUvSgjT0Jam/X1Vbk/wGcHmS781dWVWVZJce01pV64B1AKtWrfIRr5K0Gw26p1BVW9v7NuAS4Cjgru2Hhdr7tjZ9K7B8zseXtZokaUwGC4UkT0ly4PYx8EfAjcAGYHWbthq4tI03AKe3q5COBh6Yc5hJkjQGQx4+Ogy4JMn2/86nq+rLSb4NXJRkDXAb8Jo2/zLgRGAz8FPgjAF7kzQmx5x3zKRbWBC+8bZv7JbtDBYKVXUr8MJ56vcAx81TL+DMofqRJD0272iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYOHQpJFSb6b5Ett+VlJrkmyOcnnkuzf6k9sy5vb+hVD9yZJ+lXj2FM4C7hlzvKHgHOq6jnAfcCaVl8D3Nfq57R5kqQxGjQUkiwDXgn8a1sOcCzwhTZlPXByG5/Ulmnrj2vzJUljMvSewj8Bfw38X1s+BLi/qh5qy1uApW28FLgdoK1/oM3/FUnWJplOMj0zMzNg65K08AwWCkn+BNhWVZt253aral1VraqqVVNTU7tz05K04C0ecNvHAK9KciJwAPA04KPAkiSL297AMmBrm78VWA5sSbIYeDpwz4D9SZJ2MNieQlX9TVUtq6oVwGuBK6rq9cCVwClt2mrg0jbe0JZp66+oqhqqP0nSI03iPoV3Au9IspnZcwbnt/r5wCGt/g7g7An0JkkL2pCHj7qq+grwlTa+FThqnjk/B04dRz+SpPl5R7MkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpGywUkhyQ5FtJrktyU5L3tvqzklyTZHOSzyXZv9Wf2JY3t/UrhupNkjS/IfcU/gc4tqpeCBwJHJ/kaOBDwDlV9RzgPmBNm78GuK/Vz2nzJEljNFgo1KwH2+J+7VXAscAXWn09cHIbn9SWaeuPS5Kh+pMkPdKg5xSSLEpyLbANuBz4L+D+qnqoTdkCLG3jpcDtAG39A8Ah82xzbZLpJNMzMzNDti9JC86goVBVD1fVkcAy4Cjgd3bDNtdV1aqqWjU1NfV4NydJmmMsVx9V1f3AlcDvAUuSLG6rlgFb23grsBygrX86cM84+pMkzRry6qOpJEva+EnAK4BbmA2HU9q01cClbbyhLdPWX1FVNVR/kqRHWvzYU35thwPrkyxiNnwuqqovJbkZ+GyS9wPfBc5v888HPplkM3Av8NoBe5MkzWOwUKiq64EXzVO/ldnzCzvWfw6cOlQ/kqTH5h3NkqRupFBIsnGUmiRp7/aoh4+SHAA8GTg0yUHA9pvJnsYv7y+QJO0jHuucwpuBtwPPADbxy1D4MfDPw7UlSZqERw2Fqvoo8NEkb6uq88bUkyRpQka6+qiqzkvyMmDF3M9U1YUD9SVJmoCRQiHJJ4FnA9cCD7dyAYaCJO1DRr1PYRVwhHcYS9K+bdT7FG4EfnPIRiRJkzfqnsKhwM1JvsXsj+cAUFWvGqQrSdJEjBoK7xmyCUnSnmHUq4++OnQjkqTJG/Xqo58we7URwP7M/rTmf1fV04ZqTJI0fqPuKRy4fdx+N/kk4OihmpIkTcYuPyW1Zv0b8Me7vx1J0iSNevjo1XMWn8DsfQs/H6QjSdLEjHr10Z/OGT8E/JDZQ0iSpH3IqOcUzhi6EUnS5I36IzvLklySZFt7XZxk2dDNSZLGa9QTzZ8ANjD7uwrPAP691SRJ+5BRQ2Gqqj5RVQ+11wXA1IB9SZImYNRQuCfJG5Isaq83APcM2ZgkafxGDYU/A14D3AncAZwCvHGgniRJEzLqJanvA1ZX1X0ASQ4GPsJsWEiS9hGj7im8YHsgAFTVvcCLhmlJkjQpo4bCE5IctH2h7SmMupchSdpLjPo/9n8Avpnk8235VOADw7QkSZqUUe9ovjDJNHBsK726qm4eri1J0iSMfAiohYBBIEn7sF1+dLYkad+1YE4Wv+SvLpx0CwvCpg+fPukWJD0O7ilIkjpDQZLUDRYKSZYnuTLJzUluSnJWqx+c5PIkP2jvB7V6kpybZHOS65O8eKjeJEnzG3JP4SHgL6rqCOBo4MwkRwBnAxuraiWwsS0DnACsbK+1wMcG7E2SNI/BQqGq7qiq77TxT4BbgKXM/ozn+jZtPXByG58EXFizrgaWJDl8qP4kSY80lnMKSVYw+6yka4DDquqOtupO4LA2XgrcPudjW1ptx22tTTKdZHpmZma4piVpARo8FJI8FbgYeHtV/XjuuqoqoHZle1W1rqpWVdWqqSl/50eSdqdBQyHJfswGwqeq6outfNf2w0LtfVurbwWWz/n4slaTJI3JkFcfBTgfuKWq/nHOqg3A6jZeDVw6p356uwrpaOCBOYeZJEljMOQdzccApwE3JLm21d4FfBC4KMka4DZmf9EN4DLgRGAz8FPgjAF7kyTNY7BQqKqvA9nJ6uPmmV/AmUP1I0l6bN7RLEnqFswD8bR3+9H7fnfSLezzfuvdN0y6Be0B3FOQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1A0WCkk+nmRbkhvn1A5OcnmSH7T3g1o9Sc5NsjnJ9UlePFRfkqSdG3JP4QLg+B1qZwMbq2olsLEtA5wArGyvtcDHBuxLkrQTg4VCVV0F3LtD+SRgfRuvB06eU7+wZl0NLEly+FC9SZLmN+5zCodV1R1tfCdwWBsvBW6fM29Lq0mSxmhiJ5qrqoDa1c8lWZtkOsn0zMzMAJ1J0sI17lC4a/thofa+rdW3AsvnzFvWao9QVeuqalVVrZqamhq0WUlaaMYdChuA1W28Grh0Tv30dhXS0cADcw4zSZLGZPFQG07yGeDlwKFJtgB/B3wQuCjJGuA24DVt+mXAicBm4KfAGUP1JUnaucFCoapet5NVx80zt4Azh+pFkjQa72iWJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKnbo0IhyfFJvp9kc5KzJ92PJC00e0woJFkE/AtwAnAE8LokR0y2K0laWPaYUACOAjZX1a1V9Qvgs8BJE+5JkhaUVNWkewAgySnA8VX15235NOClVfXWHeatBda2xecC3x9ro+N1KHD3pJvQr8Xvbu+2r39/z6yqqflWLB53J49XVa0D1k26j3FIMl1Vqybdh3ad393ebSF/f3vS4aOtwPI5y8taTZI0JntSKHwbWJnkWUn2B14LbJhwT5K0oOwxh4+q6qEkbwX+A1gEfLyqbppwW5O2IA6T7aP87vZuC/b722NONEuSJm9POnwkSZowQ0GS1BkKe6AkH0+yLcmNk+5FuybJ8iRXJrk5yU1Jzpp0TxpdkgOSfCvJde37e++kexo3zynsgZL8AfAgcGFVPX/S/Wh0SQ4HDq+q7yQ5ENgEnFxVN0+4NY0gSYCnVNWDSfYDvg6cVVVXT7i1sXFPYQ9UVVcB9066D+26qrqjqr7Txj8BbgGWTrYrjapmPdgW92uvBfUvZ0NBGkiSFcCLgGsm3Ip2QZJFSa4FtgGXV9WC+v4MBWkASZ4KXAy8vap+POl+NLqqeriqjmT2qQpHJVlQh3ANBWk3a8eiLwY+VVVfnHQ/+vVU1f3AlcDxE25lrAwFaTdqJyrPB26pqn+cdD/aNUmmkixp4ycBrwC+N9GmxsxQ2AMl+QzwTeC5SbYkWTPpnjSyY4DTgGOTXNteJ066KY3scODKJNcz+zy2y6vqSxPuaay8JFWS1LmnIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAeRZKH22WlNyb5fJInP8rc9yT5y3H2J+1uhoL06H5WVUe2p9X+AnjLpBuShmQoSKP7GvAcgCSnJ7m+PXf/kztOTPKmJN9u6y/evoeR5NS213Fdkqta7XntGf7Xtm2uHOtfJc3hzWvSo0jyYFU9NcliZp9n9GXgKuAS4GVVdXeSg6vq3iTvAR6sqo8kOaSq7mnbeD9wV1Wdl+QG4Piq2ppkSVXdn+Q84Oqq+lSS/YFFVfWzifzBWvDcU5Ae3ZPaY5SngR8x+1yjY4HPV9XdAFU1329fPD/J11oIvB54Xqt/A7ggyZuARa32TeBdSd4JPNNA0CQtnnQD0h7uZ+0xyt3sM+8e0wXM/uLadUneCLwcoKrekuSlwCuBTUleUlWfTnJNq12W5M1VdcXu+xOk0bmnIO26K4BTkxwCkOTgeeYcCNzRHqP9+u3FJM+uqmuq6t3ADLA8yW8Dt1bVucClwAsG/wuknXBPQdpFVXVTkg8AX03yMPBd4I07TPtbZn9xbaa9H9jqH24nkgNsBK4D3gmcluR/gTuBvx/8j5B2whPNkqTOw0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSuv8HlYj/r/k4eJoAAAAASUVORK5CYII=\n",
1325 | "text/plain": [
1326 | ""
1327 | ]
1328 | },
1329 | "metadata": {
1330 | "needs_background": "light"
1331 | },
1332 | "output_type": "display_data"
1333 | }
1334 | ],
1335 | "source": [
1336 | "sns.countplot('Pclass', data=trainDf)"
1337 | ]
1338 | },
1339 | {
1340 | "cell_type": "code",
1341 | "execution_count": 29,
1342 | "metadata": {},
1343 | "outputs": [
1344 | {
1345 | "data": {
1346 | "image/png": "\n",
1347 | "text/plain": [
1348 | ""
1349 | ]
1350 | },
1351 | "metadata": {
1352 | "needs_background": "light"
1353 | },
1354 | "output_type": "display_data"
1355 | }
1356 | ],
1357 | "source": [
1358 | "sns.countplot('Embarked',hue='Survived',data=trainDf)\n",
1359 | "plt.show()"
1360 | ]
1361 | },
1362 | {
1363 | "cell_type": "code",
1364 | "execution_count": 30,
1365 | "metadata": {},
1366 | "outputs": [
1367 | {
1368 | "data": {
1369 | "image/png": "\n",
1370 | "text/plain": [
1371 | ""
1372 | ]
1373 | },
1374 | "metadata": {
1375 | "needs_background": "light"
1376 | },
1377 | "output_type": "display_data"
1378 | }
1379 | ],
1380 | "source": [
1381 | "plt.figure(figsize=(16,9))\n",
1382 | "sns.heatmap(trainDf.corr(), annot=True, cmap=\"cubehelix\")\n",
1383 | "plt.show()"
1384 | ]
1385 | },
1386 | {
1387 | "cell_type": "code",
1388 | "execution_count": 31,
1389 | "metadata": {},
1390 | "outputs": [
1391 | {
1392 | "data": {
1393 | "text/plain": [
1394 | "array([[,\n",
1395 | " ],\n",
1396 | " [,\n",
1397 | " ],\n",
1398 | " [, ]],\n",
1399 | " dtype=object)"
1400 | ]
1401 | },
1402 | "execution_count": 31,
1403 | "metadata": {},
1404 | "output_type": "execute_result"
1405 | },
1406 | {
1407 | "data": {
1408 | "image/png": "\n",
1409 | "text/plain": [
1410 | ""
1411 | ]
1412 | },
1413 | "metadata": {
1414 | "needs_background": "light"
1415 | },
1416 | "output_type": "display_data"
1417 | }
1418 | ],
1419 | "source": [
1420 | "trainDf.hist(figsize=(16,9))"
1421 | ]
1422 | },
1423 | {
1424 | "cell_type": "markdown",
1425 | "metadata": {},
1426 | "source": [
1427 | "# Data type transformation"
1428 | ]
1429 | },
1430 | {
1431 | "cell_type": "code",
1432 | "execution_count": 32,
1433 | "metadata": {},
1434 | "outputs": [
1435 | {
1436 | "data": {
1437 | "text/html": [
1438 | "\n",
1439 | "\n",
1452 | "
\n",
1453 | " \n",
1454 | " \n",
1455 | " \n",
1456 | " Survived \n",
1457 | " Pclass \n",
1458 | " Sex \n",
1459 | " SibSp \n",
1460 | " Parch \n",
1461 | " Fare \n",
1462 | " Embarked \n",
1463 | " \n",
1464 | " \n",
1465 | " PassengerId \n",
1466 | " \n",
1467 | " \n",
1468 | " \n",
1469 | " \n",
1470 | " \n",
1471 | " \n",
1472 | " \n",
1473 | " \n",
1474 | " \n",
1475 | " \n",
1476 | " \n",
1477 | " 1 \n",
1478 | " 0 \n",
1479 | " 3 \n",
1480 | " male \n",
1481 | " 1 \n",
1482 | " 0 \n",
1483 | " 7.2500 \n",
1484 | " S \n",
1485 | " \n",
1486 | " \n",
1487 | " 2 \n",
1488 | " 1 \n",
1489 | " 1 \n",
1490 | " female \n",
1491 | " 1 \n",
1492 | " 0 \n",
1493 | " 71.2833 \n",
1494 | " C \n",
1495 | " \n",
1496 | " \n",
1497 | " 3 \n",
1498 | " 1 \n",
1499 | " 3 \n",
1500 | " female \n",
1501 | " 0 \n",
1502 | " 0 \n",
1503 | " 7.9250 \n",
1504 | " S \n",
1505 | " \n",
1506 | " \n",
1507 | " 4 \n",
1508 | " 1 \n",
1509 | " 1 \n",
1510 | " female \n",
1511 | " 1 \n",
1512 | " 0 \n",
1513 | " 53.1000 \n",
1514 | " S \n",
1515 | " \n",
1516 | " \n",
1517 | " 5 \n",
1518 | " 0 \n",
1519 | " 3 \n",
1520 | " male \n",
1521 | " 0 \n",
1522 | " 0 \n",
1523 | " 8.0500 \n",
1524 | " S \n",
1525 | " \n",
1526 | " \n",
1527 | " 6 \n",
1528 | " 0 \n",
1529 | " 3 \n",
1530 | " male \n",
1531 | " 0 \n",
1532 | " 0 \n",
1533 | " 8.4583 \n",
1534 | " Q \n",
1535 | " \n",
1536 | " \n",
1537 | " 7 \n",
1538 | " 0 \n",
1539 | " 1 \n",
1540 | " male \n",
1541 | " 0 \n",
1542 | " 0 \n",
1543 | " 51.8625 \n",
1544 | " S \n",
1545 | " \n",
1546 | " \n",
1547 | " 8 \n",
1548 | " 0 \n",
1549 | " 3 \n",
1550 | " male \n",
1551 | " 3 \n",
1552 | " 1 \n",
1553 | " 21.0750 \n",
1554 | " S \n",
1555 | " \n",
1556 | " \n",
1557 | " 9 \n",
1558 | " 1 \n",
1559 | " 3 \n",
1560 | " female \n",
1561 | " 0 \n",
1562 | " 2 \n",
1563 | " 11.1333 \n",
1564 | " S \n",
1565 | " \n",
1566 | " \n",
1567 | " 10 \n",
1568 | " 1 \n",
1569 | " 2 \n",
1570 | " female \n",
1571 | " 1 \n",
1572 | " 0 \n",
1573 | " 30.0708 \n",
1574 | " C \n",
1575 | " \n",
1576 | " \n",
1577 | "
\n",
1578 | "
"
1579 | ],
1580 | "text/plain": [
1581 | " Survived Pclass Sex SibSp Parch Fare Embarked\n",
1582 | "PassengerId \n",
1583 | "1 0 3 male 1 0 7.2500 S\n",
1584 | "2 1 1 female 1 0 71.2833 C\n",
1585 | "3 1 3 female 0 0 7.9250 S\n",
1586 | "4 1 1 female 1 0 53.1000 S\n",
1587 | "5 0 3 male 0 0 8.0500 S\n",
1588 | "6 0 3 male 0 0 8.4583 Q\n",
1589 | "7 0 1 male 0 0 51.8625 S\n",
1590 | "8 0 3 male 3 1 21.0750 S\n",
1591 | "9 1 3 female 0 2 11.1333 S\n",
1592 | "10 1 2 female 1 0 30.0708 C"
1593 | ]
1594 | },
1595 | "execution_count": 32,
1596 | "metadata": {},
1597 | "output_type": "execute_result"
1598 | }
1599 | ],
1600 | "source": [
1601 | "trainDf.head(10)"
1602 | ]
1603 | },
1604 | {
1605 | "cell_type": "code",
1606 | "execution_count": 33,
1607 | "metadata": {},
1608 | "outputs": [
1609 | {
1610 | "data": {
1611 | "text/html": [
1612 | "\n",
1613 | "\n",
1626 | "
\n",
1627 | " \n",
1628 | " \n",
1629 | " \n",
1630 | " Pclass \n",
1631 | " Sex \n",
1632 | " SibSp \n",
1633 | " Parch \n",
1634 | " Fare \n",
1635 | " Embarked \n",
1636 | " \n",
1637 | " \n",
1638 | " PassengerId \n",
1639 | " \n",
1640 | " \n",
1641 | " \n",
1642 | " \n",
1643 | " \n",
1644 | " \n",
1645 | " \n",
1646 | " \n",
1647 | " \n",
1648 | " \n",
1649 | " 892 \n",
1650 | " 3 \n",
1651 | " male \n",
1652 | " 0 \n",
1653 | " 0 \n",
1654 | " 7.8292 \n",
1655 | " Q \n",
1656 | " \n",
1657 | " \n",
1658 | " 893 \n",
1659 | " 3 \n",
1660 | " female \n",
1661 | " 1 \n",
1662 | " 0 \n",
1663 | " 7.0000 \n",
1664 | " S \n",
1665 | " \n",
1666 | " \n",
1667 | " 894 \n",
1668 | " 2 \n",
1669 | " male \n",
1670 | " 0 \n",
1671 | " 0 \n",
1672 | " 9.6875 \n",
1673 | " Q \n",
1674 | " \n",
1675 | " \n",
1676 | " 895 \n",
1677 | " 3 \n",
1678 | " male \n",
1679 | " 0 \n",
1680 | " 0 \n",
1681 | " 8.6625 \n",
1682 | " S \n",
1683 | " \n",
1684 | " \n",
1685 | " 896 \n",
1686 | " 3 \n",
1687 | " female \n",
1688 | " 1 \n",
1689 | " 1 \n",
1690 | " 12.2875 \n",
1691 | " S \n",
1692 | " \n",
1693 | " \n",
1694 | "
\n",
1695 | "
"
1696 | ],
1697 | "text/plain": [
1698 | " Pclass Sex SibSp Parch Fare Embarked\n",
1699 | "PassengerId \n",
1700 | "892 3 male 0 0 7.8292 Q\n",
1701 | "893 3 female 1 0 7.0000 S\n",
1702 | "894 2 male 0 0 9.6875 Q\n",
1703 | "895 3 male 0 0 8.6625 S\n",
1704 | "896 3 female 1 1 12.2875 S"
1705 | ]
1706 | },
1707 | "execution_count": 33,
1708 | "metadata": {},
1709 | "output_type": "execute_result"
1710 | }
1711 | ],
1712 | "source": [
1713 | "testDf.head()"
1714 | ]
1715 | },
1716 | {
1717 | "cell_type": "code",
1718 | "execution_count": 34,
1719 | "metadata": {},
1720 | "outputs": [],
1721 | "source": [
1722 | "le = LabelEncoder()\n",
1723 | "# dtype transform of train dataset\n",
1724 | "trainDf.Sex = le.fit_transform(trainDf.Sex)\n",
1725 | "trainDf.Embarked = le.fit_transform(trainDf.Embarked)\n",
1726 | "\n",
1727 | "# dtype transform of test dataset\n",
1728 | "testDf.Sex = le.fit_transform(testDf.Sex)\n",
1729 | "testDf.Embarked = le.fit_transform(testDf.Embarked)"
1730 | ]
1731 | },
1732 | {
1733 | "cell_type": "markdown",
1734 | "metadata": {},
1735 | "source": [
1736 | "# Feature Ranking"
1737 | ]
1738 | },
1739 | {
1740 | "cell_type": "code",
1741 | "execution_count": 35,
1742 | "metadata": {},
1743 | "outputs": [],
1744 | "source": [
1745 | "# Extract the input variable and target variable\n",
1746 | "X = trainDf.drop('Survived', axis=1)\n",
1747 | "\n",
1748 | "Y = trainDf[['Survived']]\n",
1749 | "\n",
1750 | "# Store the column/feature names into a list \"colnames\"\n",
1751 | "colnames = list(trainDf.drop('Survived', axis=1))"
1752 | ]
1753 | },
1754 | {
1755 | "cell_type": "code",
1756 | "execution_count": 36,
1757 | "metadata": {},
1758 | "outputs": [],
1759 | "source": [
1760 | "# Define dictionary to store our rankings\n",
1761 | "ranks = {}\n",
1762 | "# Create our function which stores the feature rankings to the ranks dictionary\n",
1763 | "def ranking(ranks, names, order=1):\n",
1764 | " minmax = MinMaxScaler()\n",
1765 | " ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0]\n",
1766 | " ranks = map(lambda x: round(x,2), ranks)\n",
1767 | " return dict(zip(names, ranks))"
1768 | ]
1769 | },
1770 | {
1771 | "cell_type": "code",
1772 | "execution_count": 37,
1773 | "metadata": {},
1774 | "outputs": [],
1775 | "source": [
1776 | "# Construct Recursive Feature Elimination ( RFE ) of the Logistic Regression model\n",
1777 | "lr = LogisticRegression(random_state= 42) #lr = LinearRegression(normalize=True)\n",
1778 | "lr.fit(X,Y)\n",
1779 | "\n",
1780 | "#stop the search when only the last feature is left\n",
1781 | "rfe = RFE(lr, n_features_to_select=1, ) #verbose =3\n",
1782 | "rfe.fit(X,Y)\n",
1783 | "ranks[\"RFE\"] = ranking(list(map(float, rfe.ranking_)), colnames, order=-1)"
1784 | ]
1785 | },
1786 | {
1787 | "cell_type": "code",
1788 | "execution_count": 38,
1789 | "metadata": {},
1790 | "outputs": [
1791 | {
1792 | "data": {
1793 | "text/plain": [
1794 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
1795 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
1796 | " multi_class='auto', n_jobs=None, penalty='l2',\n",
1797 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
1798 | " warm_start=False)"
1799 | ]
1800 | },
1801 | "execution_count": 38,
1802 | "metadata": {},
1803 | "output_type": "execute_result"
1804 | }
1805 | ],
1806 | "source": [
1807 | "# Using Logistic Regression\n",
1808 | "lr = LogisticRegression()\n",
1809 | "lr.fit(X,Y)\n",
1810 | "\n",
1811 | "#ranks[\"LogReg\"] = ranking(np.abs(lr.coef_), colnames)"
1812 | ]
1813 | },
1814 | {
1815 | "cell_type": "code",
1816 | "execution_count": 39,
1817 | "metadata": {},
1818 | "outputs": [],
1819 | "source": [
1820 | "# Decision Tree Classifier\n",
1821 | "\n",
1822 | "dt = DecisionTreeClassifier()\n",
1823 | "dt.fit(X,Y)\n",
1824 | "ranks[\"DT\"] = ranking(dt.feature_importances_, colnames)"
1825 | ]
1826 | },
1827 | {
1828 | "cell_type": "code",
1829 | "execution_count": 40,
1830 | "metadata": {},
1831 | "outputs": [],
1832 | "source": [
1833 | "# Random Forest Classifier\n",
1834 | "\n",
1835 | "rf = RandomForestClassifier(n_jobs=-1, n_estimators=9, ) #verbose=3\n",
1836 | "rf.fit(X,Y)\n",
1837 | "ranks[\"RF\"] = ranking(rf.feature_importances_, colnames)"
1838 | ]
1839 | },
1840 | {
1841 | "cell_type": "markdown",
1842 | "metadata": {},
1843 | "source": [
1844 | "# Creating the Feature Ranking Matrix\n",
1845 | "We combine the scores from the various methods above and output it in a matrix form for convenient viewing as such:"
1846 | ]
1847 | },
1848 | {
1849 | "cell_type": "code",
1850 | "execution_count": 41,
1851 | "metadata": {},
1852 | "outputs": [
1853 | {
1854 | "name": "stdout",
1855 | "output_type": "stream",
1856 | "text": [
1857 | "\tDT\tRF\tRFE\tMean\n",
1858 | "Pclass\t0.22\t0.25\t0.8\t0.42\n",
1859 | "Sex\t1.0\t0.69\t1.0\t0.9\n",
1860 | "SibSp\t0.1\t0.03\t0.4\t0.18\n",
1861 | "Parch\t0.06\t0.07\t0.2\t0.11\n",
1862 | "Fare\t0.92\t1.0\t0.0\t0.64\n",
1863 | "Embarked\t0.0\t0.0\t0.6\t0.2\n"
1864 | ]
1865 | }
1866 | ],
1867 | "source": [
1868 | "# Create empty dictionary to store the mean value calculated from all the scores\n",
1869 | "r = {}\n",
1870 | "for name in colnames:\n",
1871 | " \n",
1872 | " r[name] = round(np.mean([ranks[method][name] for method in ranks.keys()]), 2)\n",
1873 | " \n",
1874 | "methods = sorted(ranks.keys())\n",
1875 | "ranks[\"Mean\"] = r\n",
1876 | "methods.append(\"Mean\")\n",
1877 | " \n",
1878 | "print(\"\\t%s\" % \"\\t\".join(methods))\n",
1879 | "for name in colnames:\n",
1880 | " print(\"%s\\t%s\" % (name, \"\\t\".join(map(str, [ranks[method][name] for method in methods]))))"
1881 | ]
1882 | },
1883 | {
1884 | "cell_type": "code",
1885 | "execution_count": 42,
1886 | "metadata": {},
1887 | "outputs": [],
1888 | "source": [
1889 | "# Put the mean scores into a Pandas dataframe\n",
1890 | "meanplot = pd.DataFrame(list(r.items()), columns= ['Feature','Mean Ranking'])\n",
1891 | "\n",
1892 | "# Sort the dataframe\n",
1893 | "meanplot = meanplot.sort_values('Mean Ranking', ascending=False)"
1894 | ]
1895 | },
1896 | {
1897 | "cell_type": "code",
1898 | "execution_count": 43,
1899 | "metadata": {
1900 | "scrolled": true
1901 | },
1902 | "outputs": [
1903 | {
1904 | "data": {
1905 | "text/plain": [
1906 | ""
1907 | ]
1908 | },
1909 | "execution_count": 43,
1910 | "metadata": {},
1911 | "output_type": "execute_result"
1912 | },
1913 | {
1914 | "data": {
1915 | "image/png": "\n",
1916 | "text/plain": [
1917 | ""
1918 | ]
1919 | },
1920 | "metadata": {
1921 | "needs_background": "light"
1922 | },
1923 | "output_type": "display_data"
1924 | }
1925 | ],
1926 | "source": [
1927 | "# Let's plot the ranking of the features\n",
1928 | "sns.factorplot(x=\"Mean Ranking\", y=\"Feature\", data = meanplot, kind=\"bar\", \n",
1929 | " size=5, aspect=1.9, palette='coolwarm')"
1930 | ]
1931 | },
1932 | {
1933 | "cell_type": "code",
1934 | "execution_count": 44,
1935 | "metadata": {},
1936 | "outputs": [
1937 | {
1938 | "data": {
1939 | "text/html": [
1940 | "\n",
1941 | "\n",
1954 | "
\n",
1955 | " \n",
1956 | " \n",
1957 | " \n",
1958 | " Feature \n",
1959 | " Mean Ranking \n",
1960 | " \n",
1961 | " \n",
1962 | " \n",
1963 | " \n",
1964 | " 1 \n",
1965 | " Sex \n",
1966 | " 0.90 \n",
1967 | " \n",
1968 | " \n",
1969 | " 4 \n",
1970 | " Fare \n",
1971 | " 0.64 \n",
1972 | " \n",
1973 | " \n",
1974 | " 0 \n",
1975 | " Pclass \n",
1976 | " 0.42 \n",
1977 | " \n",
1978 | " \n",
1979 | " 5 \n",
1980 | " Embarked \n",
1981 | " 0.20 \n",
1982 | " \n",
1983 | " \n",
1984 | " 2 \n",
1985 | " SibSp \n",
1986 | " 0.18 \n",
1987 | " \n",
1988 | " \n",
1989 | " 3 \n",
1990 | " Parch \n",
1991 | " 0.11 \n",
1992 | " \n",
1993 | " \n",
1994 | "
\n",
1995 | "
"
1996 | ],
1997 | "text/plain": [
1998 | " Feature Mean Ranking\n",
1999 | "1 Sex 0.90\n",
2000 | "4 Fare 0.64\n",
2001 | "0 Pclass 0.42\n",
2002 | "5 Embarked 0.20\n",
2003 | "2 SibSp 0.18\n",
2004 | "3 Parch 0.11"
2005 | ]
2006 | },
2007 | "execution_count": 44,
2008 | "metadata": {},
2009 | "output_type": "execute_result"
2010 | }
2011 | ],
2012 | "source": [
2013 | "meanplot = meanplot.sort_values('Mean Ranking', ascending=False)\n",
2014 | "meanplot"
2015 | ]
2016 | },
2017 | {
2018 | "cell_type": "code",
2019 | "execution_count": 45,
2020 | "metadata": {},
2021 | "outputs": [
2022 | {
2023 | "data": {
2024 | "text/plain": [
2025 | "['Sex', 'Fare', 'Pclass', 'Embarked']"
2026 | ]
2027 | },
2028 | "execution_count": 45,
2029 | "metadata": {},
2030 | "output_type": "execute_result"
2031 | }
2032 | ],
2033 | "source": [
2034 | "columnName = meanplot.loc[meanplot['Mean Ranking'] >= 0.20]\n",
2035 | "columnName = list(columnName.Feature)\n",
2036 | "columnName"
2037 | ]
2038 | },
2039 | {
2040 | "cell_type": "markdown",
2041 | "metadata": {},
2042 | "source": [
2043 | "# Predictive Modeling"
2044 | ]
2045 | },
2046 | {
2047 | "cell_type": "code",
2048 | "execution_count": 46,
2049 | "metadata": {},
2050 | "outputs": [],
2051 | "source": [
2052 | "# Extract the input variable and target variable\n",
2053 | "X = trainDf[columnName]\n",
2054 | "y = trainDf[['Survived']]\n",
2055 | "\n",
2056 | "testDf = testDf[columnName]\n"
2057 | ]
2058 | },
2059 | {
2060 | "cell_type": "code",
2061 | "execution_count": 47,
2062 | "metadata": {},
2063 | "outputs": [],
2064 | "source": [
2065 | "# split original data [i.e X and y] into 70:30 \n",
2066 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)"
2067 | ]
2068 | },
2069 | {
2070 | "cell_type": "markdown",
2071 | "metadata": {},
2072 | "source": [
2073 | "### Utility functions"
2074 | ]
2075 | },
2076 | {
2077 | "cell_type": "code",
2078 | "execution_count": 48,
2079 | "metadata": {},
2080 | "outputs": [],
2081 | "source": [
2082 | "def classifier_report(ModelName, model_object):\n",
2083 | " model_object.fit(X_train, y_train)\n",
2084 | " y_test_pred = model_object.predict(X_test)\n",
2085 | " print(ModelName, \"Classifier Report:\")\n",
2086 | " print(\"\\n\", metrics.classification_report(y_test, y_test_pred))\n",
2087 | " # Compute confusion matrix\n",
2088 | " print(\"\\n\\nConfusion_matrix: \\n\")\n",
2089 | " cnf_matrix = metrics.confusion_matrix(y_test, y_test_pred)\n",
2090 | " ax= plt.subplot()\n",
2091 | " sns.heatmap(cnf_matrix, annot=True, ax = None, fmt= '.1f' , cmap= 'Blues', linewidths=0.5); #annot=True to annotate cells "
2092 | ]
2093 | },
2094 | {
2095 | "cell_type": "code",
2096 | "execution_count": 49,
2097 | "metadata": {},
2098 | "outputs": [
2099 | {
2100 | "name": "stdout",
2101 | "output_type": "stream",
2102 | "text": [
2103 | "Logistic Regression Classifier Report:\n",
2104 | "\n",
2105 | " precision recall f1-score support\n",
2106 | "\n",
2107 | " 0 0.81 0.78 0.79 157\n",
2108 | " 1 0.70 0.75 0.72 111\n",
2109 | "\n",
2110 | " accuracy 0.76 268\n",
2111 | " macro avg 0.76 0.76 0.76 268\n",
2112 | "weighted avg 0.77 0.76 0.77 268\n",
2113 | "\n",
2114 | "\n",
2115 | "\n",
2116 | "Confusion_matrix: \n",
2117 | "\n"
2118 | ]
2119 | },
2120 | {
2121 | "data": {
2122 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVj0lEQVR4nO3deXhV9bXG8e9KQsSAJCIkKDiA4ly8thRE1FpnqQq21qK1MmjjjFwr4gxXq7de+1ir1iGKc51qragdrA+tM4NUrSBgpaASCiQaSJgzrftHjhgJSU4OJ+eXvXk/PvvJ2fvsYUXjm1/WHo65OyIiknlZoQsQEdlWKYBFRAJRAIuIBKIAFhEJRAEsIhJITgaOocssRCRZtrU72P7gi5POnPXv3bXVx9samQhgtj/44kwcRiJi/Xt3AbCiqiZwJdKRFHXrFLqEjFMLQkTixbKSn1rbldmDZlZmZnMbLbvVzBaY2Qdm9gczK2j03lVmttDMPjKz41vbvwJYROIlKzv5qXUPAydstuwV4EB3HwD8C7gKwMz2B0YCByS2udvMWjyIAlhE4sUs+akV7v46ULHZsr+6e21idgbQJ/F6OPCUu29098XAQmBQS/tXAItIvLShBWFmxWY2u9FU3MajjQX+nHjdG1jS6L3SxLJmZeQknIhIxiQxsv2Su5cAJakdxq4BaoHfprI9KIBFJG6SOLm21YcwGw2cBBztXz3RbCmwa6PV+iSWNUstCBGJlzT2gLe8ezsBuAI4xd3XNXrrBWCkmW1nZn2B/sCslvalEbCIxEtyVzckxcyeBI4EephZKTCJhqsetgNesYYQn+Hu57v7h2b2DDCPhtbERe5e19L+FcAiEi9pbEG4+xlbWDylhfVvAm5Kdv8KYBGJlxRbCyEogEUkXjJwEi5dFMAiEi8KYBGRQLLTdxKuvSmARSRe1AMWEQlELQgRkUA0AhYRCUQjYBGRQDQCFhEJJI23Irc3BbCIxItaECIigagFISISiEbAIiKBKIBFRALRSTgRkUDUAxYRCUQtCBGRQDQCFhEJwxTAIiJhKIBFRAKxLAWwiEgQGgGLiASiABYRCUQBLCISSnTyVwEsIvGiEbCISCBZWboTTkQkCI2ARURCiU7+KoBFJF40AhYRCUQBLCISiG5FFhEJRCNgEZFAFMAiIoEogEVEAlEAi4iEEp38JTr37ImIJCErKyvpqTVm9qCZlZnZ3EbLupvZK2b2ceLrjonlZmZ3mNlCM/vAzL7Zaq1b9Z2KiHQwZpb0lISHgRM2W3YlMM3d+wPTEvMAJwL9E1MxcE9rO1cAi0i8WBumVrj760DFZouHA48kXj8CjGi0/FFvMAMoMLOdW9q/esApunfSjznxiAMpr1jNwB/eDMDN40cw7IgDqa6pY3Hp5xRPepzKNes5avC+3DjuFHI75VBdU8vVtz/Pa+/8q8k+d+yWx2O3jGX3Xbrz6X8qOOuKKaxavT7T35qkycaNG7mkeBQ1NdXU1dZx5NHHMva8i7l58jW8/95sunbpCsBVk26i/z77Ntn+zy9N5dEH7wPg7LHnceJJwzNaf1S15SScmRXTMFr9Uom7l7SyWZG7L0u8Xg4UJV73BpY0Wq80sWwZzVAAp+ixF2dw79Ov8cCNZ29aNm3GAq678wXq6ur5+bjhTBh7HNfeMZUvVq3htPH3say8kv333JkX776IPY+/tsk+Lx9zLK/O+ohfPvQKl485lsvHNGwv0ZSbm8vt9zxIXl4etbU1XHTu2Qw+9HAALhz3M448+rhmt62qrOTh++/h/kefxgzO/cmPOOyII9mhW36myo+stgRwImxbC9yWtncz81S3b7UFYWb7mtnERHP5jsTr/VI9YFy89e6/qahc97Vl02YsoK6uHoBZcxbTu6gAgH9+VMqy8koA5v17GZ2360Rup6a/+046cgCPvzgTgMdfnMnJ3x3Qjt+BtDczIy8vD4Da2lpqa2uTDodZM95i4OAhdMvPZ4du+QwcPISZ099qz3JjI8094C1Z8WVrIfG1LLF8KbBro/X6JJY1q8UANrOJwFM0dEtmJSYDnjSzK1vadlt39vAhvPzWvCbLTz3mv3h/wRKqa2qbvFe40w4s/7wKgOWfV1G40w7tXqe0r7q6Osae+QOGH3cEAwcPYf8DG36p3n/3HYw+41TuvO0Wqqurm2xXXraCwqJem+YLC4soL1uRsbqjzLIs6SlFLwCjEq9HAVMbLT87cTXEIUBlo1bFFrXWgjgHOMDdaxovNLPbgA+BX2xpo8Z9lfvuu6+VQ8TPFeccT11dPU/96Z2vLd+vXy9+Pm44J134m6T24yn/YSMdRXZ2Ng8+8XtWr67i2gmXsmjhxxRfPJ6ddupBTU0Nt948mScemcLon14QutTYSOeNGGb2JHAk0MPMSoFJNOTeM2Z2DvApcHpi9T8Bw4CFwDpgTGv7b60FUQ/ssoXlOyfe2yJ3L3H3ge4+sLi4uLnVYumskwcz7IgDGX3Nw19b3ruwgKdvK+bc6x5jcennW9y27IvV9OrRDYBePbpRXrG6vcuVDNlhh24c/K1BzJz+Jj169MTMyM3NZdjJI5g/b06T9XsWFlG2Yvmm+bKyFfQsLGqynjSVzhaEu5/h7ju7eyd37+PuU9z9C3c/2t37u/sx7l6RWNfd/SJ339Pdv+Hus1vbf2sBPB6YZmZ/NrOSxPQXGq59u7T1fxXblmMP3Y/LRh/DaePvY/2Gr/5oyO+6Pc/deT7X3TGV6f9c1Oz2f3xtDmedPBhoCPKXXv2g3WuW9rNqZQWrVze0lDZu2MDsWdPZfY++fP55OQDuzhuv/o2+/fo32XbQIUN5Z+bbrK6qZHVVJe/MfJtBhwzNaP1RZZb8FJp5K3/nmlkWMIiGyymgoan8jrvXJXkM3/7gi1OvsIN65H9Hc/i3+tOjoCtlFVXceO+fmDDmOLbLzeGLyrUAzJrzCeNueoqJ5x7PhLHHsfCz8k3bn3zBXZSvXMPd15/JA8++ybvzPqN7fhcev2Usu+68I58tq+CsKx5kZdW65kqIrPXv3QXAiqqaVtaMtn9//BE3T76Guvo6vN757jHHM/qnF3DpBWNZtXIluLPX3vvws6smkZeXx4J5c5n63DNMvPYGAP74wnM8/tD9APxkTDHDTjk15LfT7oq6dYI03Ejcf8Jfkm7efXzrCUFjuNUAToNYBrCkblsJYGmbdAXwPhNfTjrUPrrl+KABrOuARSRWOkJrIVkKYBGJlSx9JJGISBgaAYuIBKIHsouIBBKh/FUAi0i8JPOg9Y5CASwisaIRsIhIIOoBi4gEEqH8VQCLSLxoBCwiEkiE8lcBLCLxojvhREQCUQtCRCSQCOWvAlhE4kUjYBGRQCKUvwpgEYkXnYQTEQlELQgRkUAUwCIigUQofxXAIhIvGgGLiAQSofxVAItIvOgqCBGRQLIiNARWAItIrEQofxXAIhIvOgknIhJIhFrACmARiRedhBMRCcRQAIuIBBGhAbACWETiRSfhREQCiVD+KoBFJF50I4aISCBRugoiK3QBIiLpZJb81Pq+7L/N7EMzm2tmT5pZZzPra2YzzWyhmT1tZrmp1qoAFpFYyTJLemqJmfUGxgED3f1AIBsYCdwC/Mrd9wJWAuekXGuqG4qIdETWhikJOcD2ZpYD5AHLgKOAZxPvPwKMSLVWBbCIxIqZJT21xN2XAr8EPqMheCuBfwCr3L02sVop0DvVWhXAIhIrWZb8ZGbFZja70VT85X7MbEdgONAX2AXoApyQzlp1FYSIxEpbroJw9xKgpJm3jwEWu3s5gJk9BwwFCswsJzEK7gMsTbnWVDcUEemI0tWCoKH1cIiZ5VnDykcD84C/A6cl1hkFTE21VgWwiMRKW1oQLXH3mTScbHsXmENDXpYAE4HLzGwhsBMwJdVa1YIQkVhJ57Mg3H0SMGmzxYuAQenYvwJYRGIlOvfBKYBFJGayI3QrsgJYRGJFj6MUEQkkQvmrABaReNHjKDez/r27MnEYiZiibp1ClyAxFKH81QhYROJFPeDNbKhtfR3ZdnRO/NSNeGB22EKkQ3n+3IFp2U+2AlhEJIwIXYWmABaReFEAi4gEoh6wiEggGgGLiAQSoQGwAlhE4iUnQgmsABaRWIlQ/iqARSRedCuyiEggEcpfBbCIxIuughARCUQPZBcRCSRC+asAFpF4sQh9KpwCWERiRSNgEZFAFMAiIoHoYTwiIoFkZ4WuIHkKYBGJFd0JJyISiHrAIiKBRGgArAAWkXjJ0nXAIiJhaAQsIhJIToSawApgEYkVjYBFRALRZWgiIoFEKH8VwCISLxG6EU4BLCLxohaEiEggCmARkUCiE7/RapeIiLTKLPmp9X1ZgZk9a2YLzGy+mQ0xs+5m9oqZfZz4umOqtSqARSRWzCzpKQm/Bv7i7vsCBwHzgSuBae7eH5iWmE+JAlhEYiWrDVNLzCwfOAKYAuDu1e6+ChgOPJJY7RFgxNbUKiISG1lmSU9mVmxmsxtNxY121RcoBx4ys/fM7AEz6wIUufuyxDrLgaJUa9VJOBGJlbZ8JJG7lwAlzbydA3wTuMTdZ5rZr9ms3eDubmaeaq0aAYtIrKSrBQGUAqXuPjMx/ywNgbzCzHYGSHwt25paRURiI10n4dx9ObDEzPZJLDoamAe8AIxKLBsFTE21VrUgRCRW0nwd8CXAb80sF1gEjKFh4PqMmZ0DfAqcnurOFcAiEivZabwTzt3fBwZu4a2j07F/BbCIxEqE7kRWAItIvFiEbkZWAItIrGgELCISiD4VWUQkEI2ARUQC0fOARUQCidCn0iuARSRedBWEiEggEepAKIDTYfmyZVxz1RVUfPEFmHHaD0/nxz8ZxYL58/n5DZOo3riR7Jxsrr52Mt8YMKDJ9i88/wfuv+8eAH563gWcMuLUTH8L0k5OPrCIY/fpgTt8unIdd77+Cecduht79uiCGfyncgN3vPYJG2rrm2z7g4N6cczePah3uH/6Z7y/tCrAdxA9GgFvY7Jzsrn8iivZb/8DWLt2DSN/+AMOGTKUX912K+dfeBGHHf4d3nj9NW6/7VamPPzY17atXLWKe++5iyef/j1mxsjTv8+R3z2Kbvn5gb4bSZfueZ046YBCLnl2LtV1zoSj+nF4v+5MmbGE9TUNgTtmcB+G7V/Icx8s/9q2fQo6c1i/7lzy+w/pnteJG4btzYW/m0t9yg8+3HZEqQesp6GlQc+ehey3/wEAdOnSlX79+lFWtgLDWLNmLQBrVq+mZ8/CJtu+/dabHDJkKPkFBXTLz+eQIUN56803Mlq/tJ9sM3JzssgyyM3JomJdzabwhYZlW8rUwbsX8OaiCmrrnbI11Syr2kj/nl0yV3iEteWB7KFpBJxmS5eWsmD+fL4x4CCuuPJqLig+h9t+eQv19fU8+tunmqxfVraCXr16bZovKiqirGxFJkuWdlKxrobn5yzn/pEDqK6t5/2lVZvaCJccsQff2jWfJSvX89CM0ibbds/L5V/lazbNf7G2mu55ucDaTJUfWeFjNXkpj4DNbEwL7236mI+SkuYeNh8/69au5WfjxzHhyqvp2rUrzzz9JBMmXsVfp73GhIlXMfm6a0KXKBnUJTebQbsXcN7Tcxj7xAd0zsniO3t1B+DO1z9h7BP/pHTVBg7rl/KH6soWRGkEvDUtiP9p7g13L3H3ge4+sLi4uLnVYqWmpobLxo9j2PdO5phjjwPgxal/4OjE6+OOP5G5cz5osl1hYRHLl3/V/1uxYgWFhSl/xJR0IAf17kbZ6o1Ubailzp3pn6xi38Kum96vd3hjUQVD+jYN4Ip11fTokrtpfqcuuVSsq85I3VFnbZhCazGAzeyDZqY5bMUH0cWNuzP5+mvo168fZ4/+6g+DnoWFzH5nFgCzZs5gt933aLLtoUMPY/rbb1JVWUlVZSXT336TQ4celqnSpR2Vr6lm78Ku5GY3/G82YJcdKF21gV7dttu0zqDdCli6akOTbWd9uorD+nUnJ8so7JrLzt0683G52g9JiVACt9YDLgKOB1ZuttyAt9ulogh6791/8NILU+m/996c/v3hAFwy/jKun3wj//eLm6mrrSV3u+24fvINAHw4dw6/e+YpJt9wE/kFBRSffyFn/ug0AM674CLyCwpCfSuSRh+Xr+XtxSu57dT9qKuHxV+s4+UF5dz4vX3I65QFGJ9UrOPetz4F4Nu75bNXjy48+e5/WLJqA28tXsldpx1AXT2UvP2proBIUkdoLSTL3Jv/r2pmU4CH3P3NLbz3hLufmcQxfEPtVlQosdM58Wt/xAOzwxYiHcrz5w6ENIxL31lUmfSvqm/3yw+a1i2OgN39nBbeSyZ8RUQyKzoDYF2GJiLxojvhREQCiVALWAEsIvESofxVAItIvFiEhsAKYBGJlQjlrwJYROIlQvmrABaRmIlQAiuARSRWdBmaiEgg6gGLiASiABYRCUQtCBGRQDQCFhEJJEL5qwAWkZiJUAIrgEUkVqL0QHYFsIjESnTiVwEsInEToQRWAItIrETpMrSt+Vh6EZEOxyz5Kbn9WbaZvWdmLyXm+5rZTDNbaGZPm1luqrUqgEUkVtrhU+kvBeY3mr8F+JW770XDJ8Y3+9mZrVEAi0ismFnSUxL76gN8D3ggMW/AUcCziVUeAUakWqsCWERipS0tCDMrNrPZjabizXZ3O3AFUJ+Y3wlY5e61iflSoHeqteoknIjESltOwbl7CVCyxf2YnQSUufs/zOzINJTWhAJYROIlfRdBDAVOMbNhQGegG/BroMDMchKj4D7A0lQPoBaEiMSKteGflrj7Ve7ex933AEYCf3P3HwN/B05LrDYKmJpqrQpgEYmVdF+GtgUTgcvMbCENPeEpqe5ILQgRiZWsdrgPw91fBV5NvF4EDErHfhXAIhIz0bkTTgEsIrESoYehKYBFJF4ilL8KYBGJF42ARUQCSeYW445CASwisRKd+FUAi0jMRGgArAAWkXiJ0gPZFcAiEi/RyV8FsIjES4TyVwEsIvGij6UXEQkkQvmrp6GJiISiEbCIxEqURsAKYBGJFV2GJiISiEbAIiKBKIBFRAJRC0JEJBCNgEVEAolQ/iqARSRmIpTA5u7tfYx2P4CIxMZWx+eG2uQzp3NO2LjORABLgpkVu3tJ6DqkY9HPxbZLtyJnVnHoAqRD0s/FNkoBLCISiAJYRCQQBXBmqc8nW6Kfi22UTsKJiASiEbCISCAKYBGRQBTAGWJmJ5jZR2a20MyuDF2PhGdmD5pZmZnNDV2LhKEAzgAzywZ+A5wI7A+cYWb7h61KOoCHgRNCFyHhKIAzYxCw0N0XuXs18BQwPHBNEpi7vw5UhK5DwlEAZ0ZvYEmj+dLEMhHZhimARUQCUQBnxlJg10bzfRLLRGQbpgDOjHeA/mbW18xygZHAC4FrEpHAFMAZ4O61wMXAy8B84Bl3/zBsVRKamT0JTAf2MbNSMzsndE2SWboVWUQkEI2ARUQCUQCLiASiABYRCUQBLCISiAJYRCQQBbCISCAKYBGRQP4foAFULroBnqEAAAAASUVORK5CYII=\n",
2123 | "text/plain": [
2124 | ""
2125 | ]
2126 | },
2127 | "metadata": {
2128 | "needs_background": "light"
2129 | },
2130 | "output_type": "display_data"
2131 | }
2132 | ],
2133 | "source": [
2134 | "#Logistic Regression\n",
2135 | "\n",
2136 | "lr = LogisticRegression()\n",
2137 | "classifier_report(\"Logistic Regression\", lr)"
2138 | ]
2139 | },
2140 | {
2141 | "cell_type": "code",
2142 | "execution_count": 50,
2143 | "metadata": {},
2144 | "outputs": [
2145 | {
2146 | "name": "stdout",
2147 | "output_type": "stream",
2148 | "text": [
2149 | "Decision Tree Classifier Report:\n",
2150 | "\n",
2151 | " precision recall f1-score support\n",
2152 | "\n",
2153 | " 0 0.81 0.89 0.85 157\n",
2154 | " 1 0.81 0.71 0.76 111\n",
2155 | "\n",
2156 | " accuracy 0.81 268\n",
2157 | " macro avg 0.81 0.80 0.80 268\n",
2158 | "weighted avg 0.81 0.81 0.81 268\n",
2159 | "\n",
2160 | "\n",
2161 | "\n",
2162 | "Confusion_matrix: \n",
2163 | "\n"
2164 | ]
2165 | },
2166 | {
2167 | "data": {
2168 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAV6ElEQVR4nO3deXwV5b3H8c8viWxCEhYTkChii1pUFFkuiDtWcSvUV6tY2yJyGxG0drkVsCqotWL1VWu1alM3VBTRoqhXrV6qdUEQkKoIiqhFQCBsCUHZQn73j4wxkJCcHE7OZIbv29e8klnOM8/R+M0vz5l5xtwdERFJv4ywOyAisrdSAIuIhEQBLCISEgWwiEhIFMAiIiHJSsM5dJmFiCTK9rSBlj0vSzhzNs+/c4/PtyfSEcC07HlZOk4jEbF5/p0AbCkPuSPSpLRISxo1LXvhWxaRWLPojKwqgEUkXjIyw+5BwhTAIhIvFuqwboMogEUkXjQEISISElXAIiIhUQUsIhISVcAiIiHRVRAiIiHREISISEg0BCEiEhJVwCIiIVEAi4iEJFMfwomIhENjwCIiIdEQhIhISFQBi4iERBWwiEhIVAGLiIREtyKLiIREQxAiIiHREISISEhUAYuIhEQBLCISEn0IJyISEo0Bi4iEREMQIiIhUQUsIhIOUwCLiIQjSgEcncESEZEEWIYlvNTbltn9ZlZsZguqbbvFzD40s/fM7Ckzy622b5yZLTGzj8zs9PraVwCLSKyYWcJLAh4EBu2y7WXgCHfvASwGxgXn7Q4MBQ4PXnOXmdV5TZwCWERiJZUB7O6vAet32faSu5cHq7OAguD7wcAUd9/q7p8BS4C+dbWvABaRWElxBVyfi4EXgu87A8uq7VsebNstBbCIxIslvphZoZnNrbYUJnwas98C5cDkZLuqqyBEJFYaUtm6exFQlMQ5LgLOBga6uwebVwAHVDusINi2W6qARSRWMjIyEl6SYWaDgCuB77n7V9V2PQMMNbPmZtYV6Aa8XVdbqoBFJFZSeR2wmT0GnAR0MLPlwHgqr3poDrwcnGuWu4909w/MbCqwkMqhidHuvqOu9hXAIhIvKbwPw90vqGXzfXUcfyNwY6LtK4BFJFaidCecAlhEYkUBLCISkkRuMW4qFMAiEiuqgEVEQqIAFhEJiQJYRCQkCmARkbBEJ38VwCISL8neYhwGBbCIxIqGIEREwhKd/FUAJ+ue8RdyxglHsGZ9Gb1/+HsArh11Fmef2IMKd9asL6Nw/COsXFNKbpuW/HXCj+la0IGt27ZzyYTJLPxkZY02u+zfnocnDqddzr7MX/Q5F1/9ENvL65zLQ5qwa68ex2v/epV27dozbfpzAHy4aBG/u34827ZuJTMrk6uunsCRPXrUeO0zTz/F3/56NwA/u+RSvjfk+2nte5RFqQKOzmBJE/Pws7MYPPovO227bdIM+p5/E/2GTuSF1xcwrvAMAK4ccTrvfrScvuffxIhrHubW3/yg1jZvvGIwd0x+hSMGX8eGss1c9P3+jf4+pPEMHnIud//13p223fbHWxg5ajRTp01n1GVX8Kc/3lLjdaUlJdxz95088thUJk95gnvuvpONpaXp6nbkpfmJGHuk3gA2s8PMbIyZ/TlYxpjZd9LRuabszXc+YX3pVzttK/tyS9X3rVo25+t5mg87uCP/mrMYgMX/WU2X/duR165NjTZP7HMI0/5vPgCTn53NOScd1VjdlzTo1bsP2Tk5O20zjE2bvgRgU1kZ++2XV+N1M998g379B5CTm0t2Tg79+g/gzTdeT0uf4yBKAVznEISZjQEuAKbwzcTCBcBjZjbF3Sc2cv8iZ8Loc7jw7L6UbtrMoMI/A/D+4hUMPuUo3pz/Cb0P78KBndrROT+X4vVlVa9rn7svpWWb2bGjAoAVqzewf15OreeQ6Lpy7FVcWjiCP956MxUVFTw0eUqNY4qLV9OxY8eq9fz8fIqLV6ezm5EWpbkg6quARwB93H2iuz8SLBOpfNLniN29qPpzloqKGvy0j0ib8Jdn6XbGNUx5YS4jzz8BgFsfeJmcNq2YNWUslw49kXc/Wl4VtLJ3mfr4Y/xmzDhemvEvfjNmHBOu+W3YXYqdKFXA9QVwBbB/Lds7Bftq5e5F7t7b3XsXFib8jLtYefz5OQwZeDRQOTRxyYRH6Dd0IiOueYgObVvz2Yp1Ox2/ruRLctq0JDOz8j9J5/y2fFGscb+4eXb6Uwz87mkAnHb6GSx4/70ax+Tl5bNq1aqq9dWrV5OXl5+2PkZdnAL4F8AMM3vBzIqC5UVgBnBFo/cuYr514H5V3599Ug8W/6fyz8ac1i3ZJysTgOHfP5Y33lmy03jx116bu5hzT+0JwIXn/BfPvVrzf06Jtv3y8pg7p3I07+3Zsziwy0E1jjl2wHG8NfMNNpaWsrG0lLdmvsGxA45Lc0+jyyzxJWx1jgG7+4tmdgiVQw5fP99+BTCnvmcdxd2kmy7i+F7d6JDbmiUv3sAN9zzPoOMOp1uXPCoqnM9XrufnN1aO7x12cEf+dv1PcHcWfbKSkdd98xTrp+64lFHXP8rKNaX89vbpPDxxOONHnc27Hy3jwaffCuvtSQqM+Z9fMXfO25SUbOC7p5zApaMv59oJN/CHib9nR3k5zZo359oJ1wPwwYL3eWLqFCZcfyM5ubkUjhzFj86vvFrmkktHk5ObG+I7iZamUNkmyr55onKj8ZY9L2vsc0iEbJ5/JwBbykPuiDQpLSrLwT1Oz0PH/CPhUPvo5tNDTWvdiCEisRKhAlgBLCLxkhGhy9AUwCISK6qARURCEqUP4RTAIhIrEcpfBbCIxIsmZBcRCUmUKuDo/KoQEUlAKm9FNrP7zazYzBZU29bOzF42s4+Dr22D7RbMGLnEzN4zs2Pqa18BLCKxkuJbkR8EBu2ybSwww927UTktw9hg+xlAt2ApBO6ur3EFsIjESiorYHd/DVi/y+bBwKTg+0nAkGrbH/JKs4BcM+tUV/sKYBGJlYZUwNWnzg2WRKZvzHf3r58ptgr4eqq6zsCyasct55s5dGqlD+FEJFYaciecuxcBSU9a7u5uZklPqKMAFpFYScONGKvNrJO7rwyGGIqD7SuAA6odVxBs2y0NQYhIrKRhPuBngGHB98OA6dW2/zS4GqIfUFptqKJWqoBFJFZSWQGb2WPASUAHM1sOjAcmAlPNbASwFDgvOPx54ExgCfAVMLy+9hXAIhIrqRyBcPcLdrNrYC3HOjC6Ie0rgEUkVjQdpYhISDQbmohISBTAIiIhiVD+KoBFJF5UAYuIhCRC+asAFpF40VUQIiIhyYhQCawAFpFYiVD+KoBFJF70IZyISEgiNASsABaReNGHcCIiITEUwCIioYhQAawAFpF40YdwIiIhiVD+KoBFJF50I4aISEh0FYSISEgiVAArgEUkXjQEISISkujErwJYRGJGl6GJiIQkQp/BKYBFJF50FYSISEg0BCEiEpIIFcAKYBGJlyhVwBlhd0BEJJWsAUu9bZn90sw+MLMFZvaYmbUws65mNtvMlpjZ42bWLNm+KoBFJFYyMyzhpS5m1hn4OdDb3Y8AMoGhwM3Abe7+bWADMCLZviqARSRWzCzhJQFZQEszywJaASuBU4Ang/2TgCHJ9lUBLCKxYtaQxQrNbG61pfDrdtx9BXAr8DmVwVsKzANK3L08OGw50DnZvupDOBGJlYbMBeHuRUBRbfvMrC0wGOgKlABPAIP2vIffSEsAb55/ZzpOIxHTQr/+pRGk8CKIU4HP3H1NZbs2DRgA5JpZVlAFFwArkj2BhiBEJFZSOAb8OdDPzFpZ5cEDgYXAK8APgmOGAdOT7WtaapCl67am4zQSEV3aNwdg1LSFIfdEmpK7zu2eknYyU1QCu/tsM3sSeAcoB+ZTOVzxv8AUM/tdsO2+ZM+hPwJFJFZSeSecu48Hxu+y+VOgbyraVwCLSKzoVmQRkZBE6VZkBbCIxIoqYBGRkESoAFYAi0i8ZEUogRXAIhIrEcpfBbCIxIseSy8iEpII5a8CWETiRVdBiIiEpL6J1psSBbCIxEqE8lcBLCLxYgk97a1pUACLSKyoAhYRCYkCWEQkJJqMR0QkJJkRes6PAlhEYkV3womIhERjwCIiIYlQAawAFpF4ydB1wCIi4VAFLCISkqwIDQIrgEUkVlQBi4iERJehiYiEJEL5qwAWkXiJ0I1wCmARiZcoDUFE6ZeFiEi9MswSXupjZrlm9qSZfWhmi8ysv5m1M7OXzezj4GvbpPua7AtFRJoia8CSgNuBF939MOAoYBEwFpjh7t2AGcF6UhTAIhIrZokvdbdjOcAJwH0A7r7N3UuAwcCk4LBJwJBk+6oAFpFYMbOGLIVmNrfaUlitqa7AGuABM5tvZvea2b5AvruvDI5ZBeQn21d9CCcisdKQqtLdi4Ci3ezOAo4BLnf32WZ2O7sMN7i7m5kn11NVwCISMyn8EG45sNzdZwfrT1IZyKvNrBNA8LU46b4m+0IRkaaoIUMQdXH3VcAyMzs02DQQWAg8AwwLtg0DpifbVw1BiEispLiqvByYbGbNgE+B4cEppprZCGApcF6yjSuARSRWUvlQTnf/N9C7ll0DU9G+AlhEYiU698EpgEUkZjIjdCuyAlhEYiVC+asAFpF4sQgNQiiARSRWVAGLiIRET0UWEQmJKmARkZBEaUJ2BbCIxEqEnkqvABaReNFVECIiIYnQCIQCOBW2bd3Kr0cNZ/v2bezYsYPjTz6Vn/73aG6aMJaPP/yAzMwsDut+JFeMuYasrH1qvP6l56fz6IN/A+BHF/2M084cnO63II0gr3UzRvQtqFrvsO8+PLdwDYvXfskFR3eieVYG67/azgNzVrClvKLG67vn78sPe3TEzJj5nw28tHhdOrsfWaqA9zL7NGvGH+64l5atWlFevp1fjhxGn37HMfC0sxg7/iYAbho/hheemcY5556/02s3bizlkfvv4c77p2AYoy8+n/7HnUyb7Oww3oqkUPGmbdz0z0+ByvkJfn/mIbz7RRk/61fAtPdX8/Har+jfJZdTD2nPcwvX7PRaA84/qhN/fmMpJZu3M+bkg3lvZRmryral/41ETJTGgDUfcAqYGS1btQKgvLycHeXlYEbfY4+vmnf00O5HsrZ4dY3Xzpv1Jsf06U92dg5tsrM5pk9/5s56I91vQRrZYXn7svbLbazfvJ281s34eO1XAHxYvIme+9f8ZXtQu5as+XIb677azg6HectLOapTm3R3O5JS+VTkRu9r2B2Iix07djBy2A8576yTOKZPf75zeI+qfeXl25nx4rP07jegxuvWri1mv7yOVesd8vJZuzbpCfaliepVkM3cZaUArNy4tSpMe3bOpm3Lmn+I5rbIYsPm7VXrGzaXk9Oy5vCV1JTipyI3qqQD2MyG17Gv6kF3RUW7e9xSvGRmZnLPpCd49OmX+WjRAj775OOqfXfcciNHHt2LI4/uFWIPJSyZBj06teGdFRsBeHjeF5xwcFvGntyVFlkZlFck/UgxqUWUKuA9GQO+Dnigth27POjOl67bugeniZbWbbI56pg+zJ39Jl2/1Y2H77ubkpINjB9zba3Hd+iQx7vz51atry1ezVE9a5v/WaLq8I6tWVayhbKtOwBYvWkbd7z5OVD5Qd0RHWsOLZRsKadttYq3bcssSqtVxLJ74cdq4uqsgM3svd0s77MHj2KOm5IN69lUVlndbN26hXfmvMUBXbrywjN/Z97smVx1/c1kZNT+r7pXvwHMe3smZRs3UrZxI/PenkmvWoYqJLp6F+QwZ3lp1Xrr5plAZVCccWgHXv9sQ43XLN2wmbzWzWjfah8yDXoV5PDeyk3p6nK0RWgMor4KOB84Hdj1J8SAmY3Sowhav24tt9xwNRUVO6ioqODEgafTb8CJDDq+J/n5nbii8CcAHHfiQH588UgWL/qA556eyq/GXUd2dg4XDr+Ey0dcAMCPh48kOzsnzLcjKdQs0zgsb18enb+yalufghxOOLgtAP/+ooy3lpYAkNMiiwuP6cRdM5dR4fD4v1dx2YADyTDjraUlrCzbe/6S3BNNYWghUea++/EnM7sPeMDda3wsb2aPuvuPEjjHXjUEIfXr0r45AKOmLQy5J9KU3HVud0hBXTrn09KEB9X7HJwTalrXWQG7+4g69iUSviIi6RWdAlg3YohIvOhOOBGRkERoCFgBLCLxEqH8VQCLSLxYhEpgBbCIxEqE8lcBLCLxEqH81WQ8IhIzKb4TzswyzWy+mT0XrHc1s9lmtsTMHjezZsl2VQEsIrFiDfgnQVcAi6qt3wzc5u7fpvIu4d3eL1EfBbCIxIpZ4kv9bVkBcBZwb7BuwCnAk8Ehk4AhyfZVASwisdKQAK4+dW6wFO7S3J+AK4GvnxnVHihx9/JgfTnQOdm+6kM4EYmVhtwJt8vUuTu3Y3Y2UOzu88zspJR0bhcKYBGJlRRehjYA+J6ZnQm0ALKB24FcM8sKquACYEWyJ9AQhIjESqougnD3ce5e4O4HAUOBf7r7hcArwA+Cw4YB05PtqwJYROKl8SdkHwP8ysyWUDkmfF+yDWkIQkRipTEmZHf3V4FXg+8/Bfqmol0FsIjESpTuhFMAi0i8RCiBFcAiEiuakF1EJCSaDU1EJCQRyl8FsIjEiyZkFxEJSYTyVwEsIvESofxVAItIzEQogRXAIhIrugxNRCQkGgMWEQlJhgJYRCQs0UlgBbCIxIqGIEREQhKh/FUAi0i8qAIWEQmJbkUWEQlJdOJXASwiMROhAlgBLCLxojvhRETCEp38VQCLSLxEKH8VwCISL43xWPrGogAWkViJUP6SEXYHRET2VqqARSRWolQBK4BFJFaidBmahiBEJFbMEl/qbscOMLNXzGyhmX1gZlcE29uZ2ctm9nHwtW2yfVUAi0ispCqAgXLg1+7eHegHjDaz7sBYYIa7dwNmBOtJUQCLSKxYA/6pi7uvdPd3gu/LgEVAZ2AwMCk4bBIwJNm+KoBFJFYaUgGbWaGZza22FNbeph0E9ARmA/nuvjLYtQrIT7av+hBORGKlIR/BuXsRUFRne2atgb8Dv3D3jdWnu3R3NzNPqqOoAhaRuLEGLPU1ZbYPleE72d2nBZtXm1mnYH8noDjprronHd6JavQTiEhs7PE1ZFvKE8+cFlm7P59VlrqTgPXu/otq228B1rn7RDMbC7Rz9yuT6Ws6AlgCZlYY/MkjUkU/F02TmR0HvA68D1QEm6+ichx4KnAgsBQ4z93XJ3UOBXD6mNlcd+8ddj+kadHPxd5LY8AiIiFRAIuIhEQBnF4a55Pa6OdiL6UxYBGRkKgCFhEJiQJYRCQkCuA0MbNBZvaRmS0JLt6WvZyZ3W9mxWa2IOy+SDgUwGlgZpnAX4AzgO7ABcG0drJ3exAYFHYnJDwK4PToCyxx90/dfRswhcop7WQv5u6vAUndQSXxoABOj87Asmrry4NtIrIXUwCLiIREAZweK4ADqq0XBNtEZC+mAE6POUA3M+tqZs2AocAzIfdJREKmAE4Ddy8HLgP+QeVzpaa6+wfh9krCZmaPAW8Bh5rZcjMbEXafJL10K7KISEhUAYuIhEQBLCISEgWwiEhIFMAiIiFRAIuIhEQBLCISEgWwiEhI/h9aX05XSn1/XwAAAABJRU5ErkJggg==\n",
2169 | "text/plain": [
2170 | ""
2171 | ]
2172 | },
2173 | "metadata": {
2174 | "needs_background": "light"
2175 | },
2176 | "output_type": "display_data"
2177 | }
2178 | ],
2179 | "source": [
2180 | "dt = DecisionTreeClassifier()\n",
2181 | "classifier_report(\"Decision Tree\", dt)"
2182 | ]
2183 | },
2184 | {
2185 | "cell_type": "code",
2186 | "execution_count": 51,
2187 | "metadata": {},
2188 | "outputs": [
2189 | {
2190 | "name": "stdout",
2191 | "output_type": "stream",
2192 | "text": [
2193 | "Random Forest Classifier Report:\n",
2194 | "\n",
2195 | " precision recall f1-score support\n",
2196 | "\n",
2197 | " 0 0.82 0.85 0.83 157\n",
2198 | " 1 0.78 0.73 0.75 111\n",
2199 | "\n",
2200 | " accuracy 0.80 268\n",
2201 | " macro avg 0.80 0.79 0.79 268\n",
2202 | "weighted avg 0.80 0.80 0.80 268\n",
2203 | "\n",
2204 | "\n",
2205 | "\n",
2206 | "Confusion_matrix: \n",
2207 | "\n"
2208 | ]
2209 | },
2210 | {
2211 | "data": {
2212 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAU7UlEQVR4nO3de5yWdZ3/8ddnZkBAkDGFUcEDKJpiB1uXtbXShBQVRHf9ech+qbGybWqav0qxdtEtV3vsdtC1NMoDVoJoKmBaKmll5YHiVyBYIZ5AmcFFEATEge/+MXc4wsDcDPfc11wXr+fjcT2Y67qvw+dWePPhe50ipYQkqfpqsi5AknZUBrAkZcQAlqSMGMCSlBEDWJIyUleFY3iZhaRyxfbuoOdhF5SdOWtmX7/dx9se1Qhgeh52QTUOo5xYM/t6ANY2Z1yIupQeVUmjrmUH/MqSCi3yM7JqAEsqlprarCsomwEsqVgi02HdbWIASyoWhyAkKSN2wJKUETtgScqIHbAkZcSrICQpIw5BSFJGHIKQpIzYAUtSRgxgScpIrSfhJCkbjgFLUkYcgpCkjNgBS1JG7IAlKSN2wJKUEW9FlqSMOAQhSRlxCEKSMmIHLEkZyVEA56dSSSpHTW35Uzsi4uaIaIqIua2W/WdEPBMRf4yIeyKivtVn4yNiQUT8KSKOa7fUjn5HSeqSIsqf2ncrMHKTZQ8Bh6aU3gv8GRjfctg4BDgDGFra5jsRsdWUN4AlFUvUlD+1I6X0S2DZJsseTCk1l2YfBwaWfh4DTEkpvZlSeg5YAAzb2v4NYEnFsg0dcESMi4hZraZx23i0TwEPlH4eALzU6rNFpWVb5Ek4SYUS23AZWkppIjCxg8f5EtAM/Kgj24MBLKlgtiWAt+MY5wCjgOEppVRavBjYu9VqA0vLtsghCEmFEjVR9tSh/UeMBL4InJRSWt3qo+nAGRGxU0QMAoYAT25tX3bAkgqlkh1wREwGjgZ2j4hFwARarnrYCXiodKzHU0qfTik9HRFTgXm0DE2cn1Jav7X9G8CSCqWSAZxSOrONxTdtZf2rgKvK3b8BLKlQqjEGXCkGsKRiyU/+GsCSisUOWJIyUlOTn4u7DGBJhWIHLElZyU/+GsCSisUOWJIyYgBLUkY6eotxFgxgSYViByxJGTGAJSkjBrAkZcQAlqSs5Cd/DWBJxeKtyJKUEYcgJCkr+clf3wnXUTdOOIsXZl7NrDsv37js3z5zIk/eMZ7Hp1zGjO+cz579+r5jm785ZB9WPnUtp4x4f5v7POzgvXlq6uXMnTaBr3/x1M4sX1Ww5JVXGHvO/+WU0Sdwykkn8qMfTALg+uu+xamnjOa0fxjDP5/3KZqaGtvcfvq99zD6+GMZffyxTL/3nmqWnmvR8rr5sqasGcAd9IMZjzPm/G+/Y9k3J81k2OlXc8QZ1/DAr+YyftzxGz+rqQm+etEYHn78mS3u87rLT+f8r9zOoWOuZP99+nHskYd0Wv3qfLV1tXz+i5dxz4z7+eHkO5gy+XaeXbCAcz71T9x1zwym3j2Njxx1NN+94dubbbti+XJuvOF6fjh5Kj+acic33nA9r69YkcG3yJ9CBXBEvDsiLo2I60rTpRFxcDWK68p+/ftnWbZi9TuWrXxj7cafe/XcibffVg2fOeMo7p35B5YuW9nm/vbYfRf67NyDJ+c8D8Dt9z3J6KPfW/nCVTX9+vXn4EOGArDzzr0ZPHgwTU2N9O7de+M6a9esaTMIfvPrxzjig0fSt76eXfr25YgPHsmvH/tV1WrPszwF8FbHgCPiUuBMYApvv155IDA5IqaklK7p5Ppy54rzR3PWqGGsWLWGkeOuA2Cvfn056Zj3cdx51/HdoWe1ud1e/etZ3LR84/zixuXs1b++ChWrGhYvXsQz8+fznve+D4D/vvabzJh+L7179+H7t9y22fpNTY3sscceG+cbGhq2OFShd8rTsyDa64DHAn+bUrompfTD0nQNMKz0WZsiYlxEzIqIWRMnTqxkvV3eFd+ewZDj/5UpD8zi06d/BID//MI/8uVrp72jI9aOY/Ubb/D/Lv4sX7js8o3d74UXfY4HZ/6CE0eNZsrtP8y4wmLJUwfcXgBvAPZqY/mepc/alFKamFI6PKV0+Lhx47anvty64/6nOHn4+wH4wCH7cNs15/LMT67klBGH8a3xp282vPBy03IGtOp4BzTU83Krjlj59NZbb3HJxZ/lhBNHM+Jjx272+Qknjubhhx7cbHn//g0sWbJk43xjYyP9+zd0aq1FkacAbu8ytIuBmRHxF+Cl0rJ9gAOACzqxrlzaf59+PPviUgBGHf1e/vx8yz8ZDx51xcZ1Jl75CR741VxmPPrHd2y75NXXWfnGWoa9Zz+enPM8Hx81jBum/KJqtavyUkpc8W9fYvDgwXzynHM3Ln/hhefZd9/9AHjkkZkMGjR4s23//sgPcd2139h44u23v3mMiy6+pCp1510XyNWybTWAU0o/jYgDaRlyGFBavBh4KqW0vrOL68omXX0OH/6bIexe35sFP/0KX7nxfkZ+aChD9u3Phg2JF19ZxmevmtLufh6fchlHnNEylH7R1VOZeOUn6LlTNx789Tx+9ti8zv4a6kSzf/877ps+jSEHHshp/zAGgAsvvoR7fnwXzz//HDU1wZ57DuDLE64E4Om5c7hz6hSu+Per6Ftfz7hPf4aPn95yOeI//8v59K2vz+qr5EpX6GzLFVUYl0w9D7NZ1tvWzL4egLXNGReiLqVHSzu43el50KU/KzvU/vS14zJNa++Ek1QoOWqADWBJxVKTo8vQDGBJhWIHLEkZydNJOJ8FIalQIsqf2t9X3BwRTRExt9Wyd0XEQxHxl9Kvu5aWR+lxDQsi4o8R8YH29m8ASyqUmpqasqcy3AqM3GTZZcDMlNIQYGZpHuB4YEhpGgfc0G6tZX4nScqFSnbAKaVfAss2WTwGmFT6eRJwcqvlt6UWjwP1EbHn1vZvAEsqlG25Fbn1c2tKUznPTmhIKb1S+nkJ8Nd7xAfw9h3DAIt4+wa2NnkSTlKhbMs5uJTSRKDDTwxLKaWI6PDdbAawpEKpwlUQjRGxZ0rpldIQQ1Np+WJg71brDSwt2yKHICQVSiXHgLdgOnB26eezgWmtln+ydDXEEcCKVkMVbbIDllQolbwTLiImA0cDu0fEImACcA0wNSLGAi8Ap5VWvx84AVgArAbO3WyHmzCAJRVKJYcgUkpnbuGj4W2sm4Dzt2X/BrCkQsnRjXAGsKRiydOtyAawpELJUf4awJKKxcdRSlJGHIKQpIwYwJKUkRzlrwEsqVjsgCUpIznKXwNYUrF4FYQkZaQmRy2wASypUHKUvwawpGLxJJwkZSRHQ8AGsKRi8SScJGUkMIAlKRM5aoANYEnF4kk4ScpIjvLXAJZULN6IIUkZ8SoIScpIjhpgA1hSsTgEIUkZyU/8GsCSCsbL0CQpIzk6B2cASyoWr4KQpIw4BCFJGclRA2wASyqWPHXANVkXIEmVFNswtbuviM9FxNMRMTciJkdEj4gYFBFPRMSCiLgjIrp3tFYDWFKh1NZE2dPWRMQA4LPA4SmlQ4Fa4Azga8A3U0oHAK8BYztaqwEsqVAiouypDHVAz4ioA3oBrwDHAHeVPp8EnNzRWg1gSYUSsS1TjIuIWa2mcX/dT0ppMfBfwIu0BO8K4HfA8pRSc2m1RcCAjtbqSThJhbItz4JIKU0EJrb1WUTsCowBBgHLgTuBkdtf4duqEsBrZl9fjcMoZ3r41786QQUvghgBPJdSWtqy37gbOBKoj4i6Uhc8EFjc0QM4BCGpUCo4BvwicERE9IqWlYcD84BHgFNL65wNTOtorVXpQZauam5/Je0w+vVu+W133tS5GVeiruR7px1akf3UVqgFTik9ERF3Ab8HmoHZtAxX/ASYEhFfLS27qaPH8B+BkgqlknfCpZQmABM2WbwQGFaJ/RvAkgrFW5ElKSN5uhXZAJZUKHbAkpSRHDXABrCkYqnLUQIbwJIKJUf5awBLKhZfSy9JGclR/hrAkorFqyAkKSPtPWi9KzGAJRVKjvLXAJZULFHW2966BgNYUqHYAUtSRgxgScqID+ORpIzU5ug9PwawpELxTjhJyohjwJKUkRw1wAawpGKp8TpgScqGHbAkZaQuR4PABrCkQrEDlqSMeBmaJGUkR/lrAEsqlhzdCGcASyoWhyAkKSMGsCRlJD/xawBLKpgcNcC5Gq+WpHZFRNlTGfuqj4i7IuKZiJgfER+MiHdFxEMR8ZfSr7t2tFYDWFKh1GzDVIZrgZ+mlN4NvA+YD1wGzEwpDQFmluY7XKskFUZNRNnT1kREX+AjwE0AKaV1KaXlwBhgUmm1ScDJHa61oxtKUle0LUMQETEuIma1msa12tUgYClwS0TMjojvR8TOQENK6ZXSOkuAho7W6kk4SYWyLV1lSmkiMHELH9cBHwAuTCk9ERHXsslwQ0opRUTqWKV2wJIKpoIn4RYBi1JKT5Tm76IlkBsjYs/SsfYEmjpaqwEsqVBiG6atSSktAV6KiINKi4YD84DpwNmlZWcD0zpaq0MQkgqltrIXAl8I/CgiugMLgXNpaVynRsRY4AXgtI7u3ACWVCiVzN+U0v8HDm/jo+GV2L8BLKlQIkc3IxvAkgolT7ciG8CSCsW3IktSRuyAJSkjPg9YkjKSo7fSG8CSisWrICQpIzkagTCAK+HNN9/kgvM+ybp161i/fj0fHX4sYz99AS8vXsSE8Z/n9RXLOejgofzrV66mW7fum23/g5u/x33TfkxNbS0Xf348f/f3H8rgW6gzjDhwNz48aFcSsHjFWm55cjEfHrwrI4bsRv8+O/G5e+ezat36Nrf94L71nHhIPwB+Mm8pv31hefUKz7E8dcA+C6ICunfvzrU33sykKfdw6+0/5vHfPMbcOX/ghuu+welnfZI7pv2UPrvswn333r3Zts8tXMDDD97PD+6cztf/+7t8/Zqvsn59238glS/1PesYfsBufPXhZ7niZwuoiWDYPn1Z8OpqvvGL53n1jXVb3LZX91pGD+3Pf8xcyH88/Cyjh/anVzf/uJajJsqfsub/0QqICHr12hmA5uZm1jc3EwS/f+oJjh5+LADHjxrDrx6dudm2jz36CCOOPYHu3buz14CBDNx7b+Y/Paeq9avz1NQE3WprqAnoXhssX9PMS8vX8j+r39rqdoc29GZe4ypWr1vP6rc2MK9xFYfu0adKVedbpR7IXg0OQVTI+vXrGfuJ/8Pil17klNPOZMDAvendpw91dS3/ifv1b2Dp0s2fWrd0aSND3/O+jfP9GvZgaVNj1epW51m+ppkH//QqXzvxQN5an5jXuIp5javK2ra+Vx2vtQrp19a8RX0v/7iWI/tYLV+HO+CIOHcrn218yvzEiVt61nGx1NbWcuvku7n7gZ8zf+4cXnh+YdYlKWO9utXw/r36MP7+P/OFGc/Qva6Gv9unb9ZlFd6O0gFfCdzS1gebPGU+LV3VvB2HyZc+fXbhA4cP4+k5f2DVypU0NzdTV1fH0qZG+vXrv9n6/fo10LRkycb5pY1L6Ne/w284URdycENvXn3jLVa92TKmP3vR6+y/ey+eeHFFu9suX93Mgf133ji/a89u/LnpjU6rtUiyj9XybbUDjog/bmGaw3a8B6loXnttGStXvg7Am2vX8tQTv2Xf/QZz2OHDeHTmgwA8cN80PnTUMZtte+RRH+XhB+9n3bp1vLx4ES+99CIHD31PVetX51i2+i0G79aT7rUtkfDuhp1Z8vqbZW07t3EVQxt606tbDb261TC0oTdzyxy+2OFV6onsVdBeB9wAHAe8tsnyAH7TKRXl0P+8upSrJlzOhvUb2JA2cMyI4zjyI0ez3+D9ueLyz/O971zHkIMOZtTJ/wjAY7/4Oc/Me5p/+pcLGbz/ARzzsZF84tSTqK2r5ZJLv0xtbW3G30iV8NyyNfxu0et8+WMHsCElXnxtLb9c+BrHDHkXIw/qxy496phw3AHMeWUlt816mX137cFR+7+L22a9zOp167lvfhNfGrE/ADPmNbF6C5er6Z26wtBCuSKlLb9PLiJuAm5JKT3Wxme3p5Q+XsYxdqghCLWvX++Wv/fPmzo340rUlXzvtEOhAn3pUwtXlP2SzL8d3DfTtN5qB5xSGruVz8oJX0mqrvw0wF6GJqlY8nQnnAEsqVByNARsAEsqlhzlrwEsqVgiRy2wASypUHKUvwawpGLJUf4awJIKJkcJbABLKhQvQ5OkjDgGLEkZMYAlKSMOQUhSRvLUAftOOEmFUunHAUdEbUTMjoj7SvODIuKJiFgQEXdExOavOi+TASypWCr/QPaLgPmt5r8GfDOldAAtz0rf4lMj22MASyqUSr4TLiIGAicC3y/NB3AMcFdplUnAyR2utaMbSlJXtC0NcOsXCJemcZvs7lvAF4ENpfndgOUppb++ZWIRMKCjtXoSTlKxbMNJuE1eIPzO3USMAppSSr+LiKMrUdqmDGBJhVLBy9COBE6KiBOAHsAuwLVAfUTUlbrggcDijh7AIQhJhRJR/rQ1KaXxKaWBKaX9gDOAn6eUzgIeAU4trXY2MK2jtRrAkgqlCm+lvxS4JCIW0DImfFNHd+QQhKRC6YwHsqeUHgUeLf28EBhWif0awJIKJU93whnAkgolR/lrAEsqmBwlsAEsqVB8GpokZcQxYEnKSI0BLElZyU8CG8CSCsUhCEnKSI7y1wCWVCx2wJKUkc64FbmzGMCSCiU/8WsASyqYHDXABrCkYvFOOEnKSn7y1wCWVCw5yl8DWFKxlPO6+a7CAJZUKDnKX98JJ0lZsQOWVCh56oANYEmF4mVokpQRO2BJyogBLEkZcQhCkjJiByxJGclR/hrAkgomRwkcKaXOPkanH0BSYWx3fK5tLj9zetRlG9fVCGCVRMS4lNLErOtQ1+Lvix2XtyJX17isC1CX5O+LHZQBLEkZMYAlKSMGcHU5zqe2+PtiB+VJOEnKiB2wJGXEAJakjBjAVRIRIyPiTxGxICIuy7oeZS8ibo6IpoiYm3UtyoYBXAURUQt8GzgeOAQ4MyIOybYqdQG3AiOzLkLZMYCrYxiwIKW0MKW0DpgCjMm4JmUspfRLYFnWdSg7BnB1DABeajW/qLRM0g7MAJakjBjA1bEY2LvV/MDSMkk7MAO4Op4ChkTEoIjoDpwBTM+4JkkZM4CrIKXUDFwA/AyYD0xNKT2dbVXKWkRMBn4LHBQRiyJibNY1qbq8FVmSMmIHLEkZMYAlKSMGsCRlxACWpIwYwJKUEQNYkjJiAEtSRv4X8InjEaexLjsAAAAASUVORK5CYII=\n",
2213 | "text/plain": [
2214 | ""
2215 | ]
2216 | },
2217 | "metadata": {
2218 | "needs_background": "light"
2219 | },
2220 | "output_type": "display_data"
2221 | }
2222 | ],
2223 | "source": [
2224 | "# Random Forest Classifier\n",
2225 | "rf = RandomForestClassifier(n_estimators = 51)\n",
2226 | "classifier_report(\"Random Forest\", rf)"
2227 | ]
2228 | },
2229 | {
2230 | "cell_type": "code",
2231 | "execution_count": 52,
2232 | "metadata": {},
2233 | "outputs": [
2234 | {
2235 | "name": "stdout",
2236 | "output_type": "stream",
2237 | "text": [
2238 | "K-Neighbors Classifier Report:\n",
2239 | "\n",
2240 | " precision recall f1-score support\n",
2241 | "\n",
2242 | " 0 0.77 0.85 0.81 157\n",
2243 | " 1 0.76 0.64 0.69 111\n",
2244 | "\n",
2245 | " accuracy 0.76 268\n",
2246 | " macro avg 0.76 0.75 0.75 268\n",
2247 | "weighted avg 0.76 0.76 0.76 268\n",
2248 | "\n",
2249 | "\n",
2250 | "\n",
2251 | "Confusion_matrix: \n",
2252 | "\n"
2253 | ]
2254 | },
2255 | {
2256 | "data": {
2257 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAUiElEQVR4nO3de5jXdZ338ed7Bs0DxihnQQkUXa21dI21aI1bvcETobvdSdmumruzrud1dxWzTe1wS7f3fRuGaWyUtB4QudeAstQwNTQQijI8pHhCEBjRQBNdBT/3H/PbcYQBfjPM/D7z/fJ8XNf3Yr7n93hxvXzz+Z4ipYQkqfbqchcgSTsqA1iSMjGAJSkTA1iSMjGAJSmTHjU4h7dZSKpWbO8Bdj303Koz543Fk7f7fNujFgHMroeeW4vTqCDeWDwZgDc3ZC5E3couNUmj7mUH/JUllVoUZ2TVAJZULnX1uSuomgEsqVwi67BuuxjAksrFIQhJysQOWJIysQOWpEzsgCUpE++CkKRMHIKQpEwcgpCkTOyAJSkTA1iSMqn3Ipwk5eEYsCRl4hCEJGViByxJmdgBS1ImdsCSlImPIktSJg5BSFImDkFIUiZ2wJKUSYECuDiVSlI16uqrn7YhIr4XEU0RsaTVsqsj4omIeCQi7oiIhlbrLo2IpRHx+4gYs81SO/o7SlK3FFH9tG03Asdusuwe4EMppUOAJ4FLm08bBwPjgQ9W9vl2RGw15Q1gSeUSddVP25BSegB4ZZNld6eUNlRm5wODKz+PA6anlP4zpfQssBQYsbXjG8CSyqUdHXBENEbEolZTYzvP9gXgJ5WfBwEvtFq3vLJsi7wIJ6lUoh23oaWUpgBTOniey4ANwM0d2R8MYEkl054A3o5znA6cCBydUkqVxSuAfVptNriybIscgpBUKlEXVU8dOn7EscDFwKdSSutbrZoNjI+I90XEUGA48PDWjmUHLKlUOrMDjohbgVFAn4hYDlxO810P7wPuqZxrfkrprJTSoxExA3iM5qGJc1JKG7d2fANYUql0ZgCnlD7bxuKpW9n+68DXqz2+ASypVGoxBtxZDGBJ5VKc/DWAJZWLHbAkZVJXV5ybuwxgSaViByxJuRQnfw1gSeViByxJmRjAkpRJRx8xzsEAllQqdsCSlIkBLEmZGMCSlIkBLEm5FCd/DWBJ5eKjyJKUiUMQkpRLcfLXb8J11A2Xn8rzc69i0e1fbFn25bNP4OHbLmX+9AnM+fY5DOzb6z37/NnB+/LawkmcfMxH2jzmoQftw8IZX2TJrMv5Pxd/uivLVw2sWrmSM0//a04eezwnf+oEbv73aQBMvvabfPrksXzmL8fx93/3BZqaVre5/+wf3sHY40Yz9rjRzP7hHbUsvdCi+XPzVU25GcAd9O9z5jPunOves+yaaXMZccpVHDF+Ij/5xRIubTyuZV1dXfC1C8bxs/lPbPGY137xFM756i18aNyV7LdvX0aPPLjL6lfXq+9Rzz9fPIE75tzJTbfexvRbb+HppUs5/Qt/y8w75jDjP2Zx5CdH8Z3rr9ts33Vr13LD9ZO56dYZ3Dz9dm64fjKvrluX4bconlIFcET8SURcEhHXVqZLIuKgWhTXnT3466d5Zd369yx77fU3W37ebdf38e7XquHs8Z/kh3N/y0uvvNbm8Qb0eT977L4LD//uOQBu+dHDjB11SOcXrprp27cfBx38QQB2370nw4YNo6lpNT179mzZ5s033mgzCB56cB5HfGwkvRoaeH+vXhzxsZE8OO8XNau9yIoUwFsdA46IS4DPAtN59/PKg4FbI2J6SmliF9dXOFecM5ZTTxzBuj++wbGN1wKwd99efOqoDzPm767lOx88tc399u7XwIqmtS3zK1avZe9+DTWoWLWwYsVynnj8cf70kA8D8K1J1zBn9g/p2XMPvvv9H2y2fVPTagYMGNAy379//y0OVei9ivQuiG11wGcCH00pTUwp3VSZJgIjKuvaFBGNEbEoIhZNmTKlM+vt9q64bg7Dj/tXpv9kEWedciQAV//LX/GlSbPe0xFrx7H+9df5pwvP518mfLGl+z3vgn/k7rn3c8KJY5l+y02ZKyyXInXA2wrgd4C921g+sLKuTSmlKSmlw1NKhzc2Nm5PfYV1250LOenojwBw2MH78oOJZ/DEj6/k5GMO5ZuXnrLZ8MKLTWsZ1KrjHdS/gRdbdcQqprfffpuLLjyf408YyzH/ffRm648/YSw/u+fuzZb369efVatWtcyvXr2afv36d2mtZVGkAN7WbWgXAnMj4inghcqyfYH9gXO7sK5C2m/fvjy97CUAThx1CE8+1/xPxoNOvKJlmylXfp6f/GIJc+575D37rlrzKq+9/iYj/vQDPPy75/jciSO4fvr9NatdnS+lxBVfvoxhw4bxN6ef0bL8+eefY8iQDwDw85/PZejQYZvt+/GRn+DaSf+35cLbLx+axwUXXlSTuouuG+Rq1bYawCmln0bEATQPOQyqLF4BLEwpbezq4rqzaVedzl/82XD6NPRk6U+/yldvuJNjP/FBhg/pxzvvJJatfIXzvz59m8eZP30CR4xvHkq/4KoZTLny8+z6vp24+8HHuGveY139a6gLLf71r/jR7FkMP+AAPvOX4wA478KLuOP/zeS5556lri4YOHAQX7r8SgAeXfI7bp8xnSu+8nV6NTTQeNbZfO6U5tsR//4fzqFXQ0OuX6VQukNnW62owbhk2vVQm2W9643FkwF4c0PmQtSt7NLcDm53eh54yV1Vh9rvvzEma1r7JJykUilQA2wASyqXugLdhmYASyoVO2BJyqRIF+F8F4SkUomoftr2seJ7EdEUEUtaLdsrIu6JiKcqf+5ZWR6V1zUsjYhHIuKwbR3fAJZUKnV1dVVPVbgROHaTZROAuSml4cDcyjzAccDwytQIXL/NWqv8nSSpEDqzA04pPQC8ssniccC0ys/TgJNaLf9BajYfaIiIgVs7vgEsqVTa8yhy6/fWVKZq3p3QP6W0svLzKuC/nhEfxLtPDAMs590H2NrkRThJpdKea3AppSlAh98YllJKEdHhp9kMYEmlUoO7IFZHxMCU0srKEENTZfkKYJ9W2w2uLNsihyAklUpnjgFvwWzgtMrPpwGzWi3/m8rdEEcA61oNVbTJDlhSqXTmk3ARcSswCugTEcuBy4GJwIyIOBN4HvhMZfM7geOBpcB64IzNDrgJA1hSqXTmEERK6bNbWHV0G9sm4Jz2HN8AllQqBXoQzgCWVC5FehTZAJZUKgXKXwNYUrn4OkpJysQhCEnKxACWpEwKlL8GsKRysQOWpEwKlL8GsKRy8S4IScqkrkAtsAEsqVQKlL8GsKRy8SKcJGVSoCFgA1hSuXgRTpIyCQxgScqiQA2wASypXLwIJ0mZFCh/DWBJ5eKDGJKUiXdBSFImBWqADWBJ5eIQhCRlUpz4NYAllYy3oUlSJgW6BmcASyoX74KQpEwcgpCkTArUABvAksqlSB1wXe4CJKkzRTumbR4r4h8j4tGIWBIRt0bELhExNCIWRMTSiLgtInbuaK0GsKRSqa+LqqetiYhBwPnA4SmlDwH1wHjgG8A1KaX9gT8AZ3a0VgNYUqlERNVTFXoAu0ZED2A3YCVwFDCzsn4acFJHazWAJZVKRHumaIyIRa2mxv86TkppBfC/gWU0B+864FfA2pTShspmy4FBHa3Vi3CSSqU974JIKU0BprS1LiL2BMYBQ4G1wO3Asdtf4btqEsBvLJ5ci9OoYHbxf//qAp14E8QxwLMppZeajxv/AYwEGiKiR6ULHgys6OgJHIKQVCqdOAa8DDgiInaL5o2PBh4Dfg58urLNacCsjtZakx7kyVXra3EaFcQBA3YDYOK9T2euRN3JhKP265Tj1HdSC5xSWhARM4FfAxuAxTQPV/wYmB4RX6ssm9rRc/iPQEml0plPwqWULgcu32TxM8CIzji+ASypVHwUWZIyKdKjyAawpFKxA5akTArUABvAksqlR4ES2ACWVCoFyl8DWFK5+Fl6ScqkQPlrAEsqF++CkKRMtvWi9e7EAJZUKgXKXwNYUrlEVV976x4MYEmlYgcsSZkYwJKUiS/jkaRM6gv0nR8DWFKp+CScJGXiGLAkZVKgBtgAllQudd4HLEl52AFLUiY9CjQIbABLKhU7YEnKxNvQJCmTAuWvASypXAr0IJwBLKlcHIKQpEwMYEnKpDjxawBLKpkCNcCFGq+WpG2KiKqnKo7VEBEzI+KJiHg8Ij4WEXtFxD0R8VTlzz07WqsBLKlU6toxVWES8NOU0p8AHwYeByYAc1NKw4G5lfkO1ypJpVEXUfW0NRHRCzgSmAqQUnorpbQWGAdMq2w2DTipw7V2dEdJ6o7aMwQREY0RsajV1NjqUEOBl4DvR8TiiPhuROwO9E8praxsswro39FavQgnqVTa01WmlKYAU7awugdwGHBeSmlBRExik+GGlFKKiNSxSu2AJZVMJ16EWw4sTyktqMzPpDmQV0fEwMq5BgJNHa3VAJZUKtGOaWtSSquAFyLiwMqio4HHgNnAaZVlpwGzOlqrQxCSSqW+c28EPg+4OSJ2Bp4BzqC5cZ0REWcCzwOf6ejBDWBJpdKZ+ZtS+g1weBurju6M4xvAkkolCvQwsgEsqVSK9CiyASypVPwqsiRlYgcsSZn4PmBJyqRAX6U3gCWVi3dBSFImBRqBMIA708aNG7mo8VT26tuPyydey6qVK7j6ygm89uo69jvgIC667GvstNNOm+13+01TuefOWdTV1dF4/sUcNuLjGapXZ1u3ajn3TZ3YMv/ampUceuJfs1tDb37z45tZu+oFxl5yDX2GHNDm/ssfXcSCGd8hpXc4YOQYDhnT4QeudihF6oB9F0QnmjPzFgYPGdoyf+MNkxj3P05lyi2z6bnHHtzz4zs222fZc0/zwL13cd2NM7ni6uu4/pqr2LhxYy3LVhfpNWAw4y6bzLjLJjP20kn02HkXhnzkY+y59xCOavwSA/b/0Bb3feedjcyf/m1Gn/sVTv7yDTyz8H7WrlxWw+qLqy6qn3IzgDvJmqbVLJw/j9EnngxASolHFi9k5CePAeDoMWOZP+++zfZbMO8+jjxqDDvtvDMDBg5i4KB9eOrxJbUsXTWw8onfskefAfTs3Z+GgfvSa8DgrW6/5rkn2aPv3uzRdyD1PXZi2OFHsuy3v6xRtcXWWS9kr0mtuQsoi3+bfDVnnHUBddH8n/TVdWvp2XMP6ns0j/L07tefl9ds/ta6l9e8RJ9+A1rm+/Tt1+Z2KrZnF93P0I+Oqnr79WtfZvc9+7TM77ZnH15f+3IXVFY+nfU2tFrocABHxBlbWdfylvkpU7b0ruPyePihB+jVsBf7H3hw7lLUDW3c8DbLHlnA0MM+kbuUHUKROuDtuQh3JfD9tlZs8pb59OSq9dtxmu7v8SW/4eGH7udXC+bx1ltvsf711/m3b13NH//4Ghs3bKC+Rw9eblpN7z79Ntu3d5++rGla1TK/5qWmNrdTcS1/dBG9992PXd9f/cdzd2vozet/WNMyv/4Pa9i9oXdXlFc6+WO1elvtgCPikS1Mv2M7voNUNqc1ns+NM+9i6m13cvGXJ3LIYR/ln//1f3LIRw7nwft/BsDcu+bw5yNHbbbviJGjeODeu3j7rbdYtXIFLy5fxvCDtnxxRsXz7ML7GXb4J9u1T58hB/Bq04u8tmYVGze8zTOLHmCfQ47oogpLpkBjENvqgPsDY4A/bLI8gIe6pKISOf2sC/hfV07gpqnfZtj+BzL6hJMAWPDgfTz1xGN8/syzGTJ0Pz7x30Zz9ml/RX19PWddOIH6+vq8havTvP2fb/LiE4v5+KnntSx7/jcPMf+263nzj+u457or2GvwMMac/zXWr32ZeTdNYvS5X6Guvp4jxv8Dd3/rS6R33mH4x0ez595DMv4mxdEdhhaqFSlt+XtyETEV+H5KaV4b625JKX2uinOUfghC7XPAgN0AmHjv05krUXcy4aj9oBP60oXPrKv6I5kfHdYra1pvtQNOKZ25lXXVhK8k1VZxGmCfhJNULkV6Es4AllQqBRoCNoAllUuB8tcAllQuUaAW2ACWVCoFyl8DWFK5FCh/DWBJJVOgBDaAJZWKt6FJUiaOAUtSJgawJGXiEIQkZVKkDthPEkkqlc5+HXBE1EfE4oj4UWV+aEQsiIilEXFbROzc0VoNYEnl0vkvZL8AeLzV/DeAa1JK+9P8rvQtvjVyWwxgSaXSmd+Ei4jBwAnAdyvzARwFzKxsMg04qcO1dnRHSeqO2tMAt/6AcGVq3ORw3wQuBt6pzPcG1qaUNlTmlwODOlqrF+EklUs7LsJt8gHh9x4m4kSgKaX0q4gY1RmlbcoAllQqnXgb2kjgUxFxPLAL8H5gEtAQET0qXfBgYEVHT+AQhKRSiah+2pqU0qUppcEppQ8A44F7U0qnAj8HPl3Z7DRgVkdrNYAllUoNvkp/CXBRRCyleUx4akcP5BCEpFLpiheyp5TuA+6r/PwMMKIzjmsASyqVIj0JZwBLKpUC5a8BLKlkCpTABrCkUvFtaJKUiWPAkpRJnQEsSbkUJ4ENYEml4hCEJGVSoPw1gCWVix2wJGXSFY8idxUDWFKpFCd+DWBJJVOgBtgAllQuPgknSbkUJ38NYEnlUqD8NYAllUs1n5vvLgxgSaVSoPz1m3CSlIsdsKRSKVIHbABLKhVvQ5OkTOyAJSkTA1iSMnEIQpIysQOWpEwKlL8GsKSSKVACR0qpq8/R5SeQVBrbHZ9vbqg+c3bpkTeuaxHAqoiIxpTSlNx1qHvx78WOy0eRa6sxdwHqlvx7sYMygCUpEwNYkjIxgGvLcT61xb8XOygvwklSJnbAkpSJASxJmRjANRIRx0bE7yNiaURMyF2P8ouI70VEU0QsyV2L8jCAayAi6oHrgOOAg4HPRsTBeatSN3AjcGzuIpSPAVwbI4ClKaVnUkpvAdOBcZlrUmYppQeAV3LXoXwM4NoYBLzQan55ZZmkHZgBLEmZGMC1sQLYp9X84MoySTswA7g2FgLDI2JoROwMjAdmZ65JUmYGcA2klDYA5wJ3AY8DM1JKj+atSrlFxK3AL4EDI2J5RJyZuybVlo8iS1ImdsCSlIkBLEmZGMCSlIkBLEmZGMCSlIkBLEmZGMCSlMn/B06XsN/LOI8cAAAAAElFTkSuQmCC\n",
2258 | "text/plain": [
2259 | ""
2260 | ]
2261 | },
2262 | "metadata": {
2263 | "needs_background": "light"
2264 | },
2265 | "output_type": "display_data"
2266 | }
2267 | ],
2268 | "source": [
2269 | "# K-Neighbors Classifier\n",
2270 | "knn = KNeighborsClassifier(n_neighbors=3)\n",
2271 | "classifier_report(\"K-Neighbors\", knn)"
2272 | ]
2273 | },
2274 | {
2275 | "cell_type": "markdown",
2276 | "metadata": {},
2277 | "source": [
2278 | "# Model selection\n",
2279 | "\n",
2280 | "Decision tree model performs well compared to other models such as Logistic regression, Random forest and K-Neighbors Classifier. Random forest model has selected for further prediction and analytics.\n"
2281 | ]
2282 | },
2283 | {
2284 | "cell_type": "code",
2285 | "execution_count": 90,
2286 | "metadata": {},
2287 | "outputs": [
2288 | {
2289 | "data": {
2290 | "text/html": [
2291 | "\n",
2292 | "\n",
2305 | "
\n",
2306 | " \n",
2307 | " \n",
2308 | " \n",
2309 | " Survived \n",
2310 | " \n",
2311 | " \n",
2312 | " PassengerId \n",
2313 | " \n",
2314 | " \n",
2315 | " \n",
2316 | " \n",
2317 | " \n",
2318 | " 892 \n",
2319 | " 0 \n",
2320 | " \n",
2321 | " \n",
2322 | " 893 \n",
2323 | " 1 \n",
2324 | " \n",
2325 | " \n",
2326 | " 894 \n",
2327 | " 0 \n",
2328 | " \n",
2329 | " \n",
2330 | " 895 \n",
2331 | " 0 \n",
2332 | " \n",
2333 | " \n",
2334 | " 896 \n",
2335 | " 1 \n",
2336 | " \n",
2337 | " \n",
2338 | " ... \n",
2339 | " ... \n",
2340 | " \n",
2341 | " \n",
2342 | " 1305 \n",
2343 | " 0 \n",
2344 | " \n",
2345 | " \n",
2346 | " 1306 \n",
2347 | " 1 \n",
2348 | " \n",
2349 | " \n",
2350 | " 1307 \n",
2351 | " 0 \n",
2352 | " \n",
2353 | " \n",
2354 | " 1308 \n",
2355 | " 0 \n",
2356 | " \n",
2357 | " \n",
2358 | " 1309 \n",
2359 | " 0 \n",
2360 | " \n",
2361 | " \n",
2362 | "
\n",
2363 | "
418 rows × 1 columns
\n",
2364 | "
"
2365 | ],
2366 | "text/plain": [
2367 | " Survived\n",
2368 | "PassengerId \n",
2369 | "892 0\n",
2370 | "893 1\n",
2371 | "894 0\n",
2372 | "895 0\n",
2373 | "896 1\n",
2374 | "... ...\n",
2375 | "1305 0\n",
2376 | "1306 1\n",
2377 | "1307 0\n",
2378 | "1308 0\n",
2379 | "1309 0\n",
2380 | "\n",
2381 | "[418 rows x 1 columns]"
2382 | ]
2383 | },
2384 | "execution_count": 90,
2385 | "metadata": {},
2386 | "output_type": "execute_result"
2387 | }
2388 | ],
2389 | "source": [
2390 | "submission = pd.read_csv(\"gender_submission.csv\", index_col='PassengerId')\n",
2391 | "submission"
2392 | ]
2393 | },
2394 | {
2395 | "cell_type": "code",
2396 | "execution_count": 91,
2397 | "metadata": {},
2398 | "outputs": [],
2399 | "source": [
2400 | "rf = DecisionTreeClassifier(random_state=41)\n",
2401 | "rf.fit(X_train, y_train)\n",
2402 | "y_test_pred = rf.predict(X_test)"
2403 | ]
2404 | },
2405 | {
2406 | "cell_type": "code",
2407 | "execution_count": 92,
2408 | "metadata": {},
2409 | "outputs": [
2410 | {
2411 | "data": {
2412 | "text/plain": [
2413 | "0 0\n",
2414 | "1 1\n",
2415 | "2 0\n",
2416 | "3 0\n",
2417 | "4 1\n",
2418 | " ..\n",
2419 | "413 0\n",
2420 | "414 1\n",
2421 | "415 0\n",
2422 | "416 0\n",
2423 | "417 0\n",
2424 | "Name: Survived, Length: 418, dtype: int32"
2425 | ]
2426 | },
2427 | "execution_count": 92,
2428 | "metadata": {},
2429 | "output_type": "execute_result"
2430 | }
2431 | ],
2432 | "source": [
2433 | "test_survived = pd.Series(rf.predict(testDf), name = \"Survived\").astype(int)\n",
2434 | "test_survived"
2435 | ]
2436 | },
2437 | {
2438 | "cell_type": "code",
2439 | "execution_count": 93,
2440 | "metadata": {},
2441 | "outputs": [
2442 | {
2443 | "name": "stdout",
2444 | "output_type": "stream",
2445 | "text": [
2446 | "\n",
2447 | " precision recall f1-score support\n",
2448 | "\n",
2449 | " 0 0.87 0.92 0.90 266\n",
2450 | " 1 0.85 0.76 0.80 152\n",
2451 | "\n",
2452 | " accuracy 0.86 418\n",
2453 | " macro avg 0.86 0.84 0.85 418\n",
2454 | "weighted avg 0.86 0.86 0.86 418\n",
2455 | "\n",
2456 | "\n",
2457 | "\n",
2458 | "Confusion_matrix: \n",
2459 | "\n"
2460 | ]
2461 | },
2462 | {
2463 | "data": {
2464 | "image/png": "\n",
2465 | "text/plain": [
2466 | ""
2467 | ]
2468 | },
2469 | "metadata": {
2470 | "needs_background": "light"
2471 | },
2472 | "output_type": "display_data"
2473 | }
2474 | ],
2475 | "source": [
2476 | "print(\"\\n\", metrics.classification_report(submission, test_survived))\n",
2477 | "# Compute confusion matrix\n",
2478 | "print(\"\\n\\nConfusion_matrix: \\n\")\n",
2479 | "cnf_matrix = metrics.confusion_matrix(submission, test_survived)\n",
2480 | "ax= plt.subplot()\n",
2481 | "sns.heatmap(cnf_matrix, annot=True, ax = None, fmt= '.1f' , cmap= 'Blues', linewidths=0.5); #annot=True to annotate cells "
2482 | ]
2483 | },
2484 | {
2485 | "cell_type": "code",
2486 | "execution_count": 107,
2487 | "metadata": {},
2488 | "outputs": [],
2489 | "source": [
2490 | "results = pd.concat([submission, test_survived],axis = 0)\n",
2491 | "submission.to_csv(\"titanic.csv\", index = False)"
2492 | ]
2493 | },
2494 | {
2495 | "cell_type": "code",
2496 | "execution_count": 104,
2497 | "metadata": {},
2498 | "outputs": [],
2499 | "source": [
2500 | "submission['test_survived'] = pd.Series(test_survived)"
2501 | ]
2502 | },
2503 | {
2504 | "cell_type": "code",
2505 | "execution_count": 105,
2506 | "metadata": {},
2507 | "outputs": [
2508 | {
2509 | "data": {
2510 | "text/html": [
2511 | "\n",
2512 | "\n",
2525 | "
\n",
2526 | " \n",
2527 | " \n",
2528 | " \n",
2529 | " Survived \n",
2530 | " test_survived \n",
2531 | " \n",
2532 | " \n",
2533 | " PassengerId \n",
2534 | " \n",
2535 | " \n",
2536 | " \n",
2537 | " \n",
2538 | " \n",
2539 | " \n",
2540 | " 892 \n",
2541 | " 0 \n",
2542 | " NaN \n",
2543 | " \n",
2544 | " \n",
2545 | " 893 \n",
2546 | " 1 \n",
2547 | " NaN \n",
2548 | " \n",
2549 | " \n",
2550 | " 894 \n",
2551 | " 0 \n",
2552 | " NaN \n",
2553 | " \n",
2554 | " \n",
2555 | " 895 \n",
2556 | " 0 \n",
2557 | " NaN \n",
2558 | " \n",
2559 | " \n",
2560 | " 896 \n",
2561 | " 1 \n",
2562 | " NaN \n",
2563 | " \n",
2564 | " \n",
2565 | " ... \n",
2566 | " ... \n",
2567 | " ... \n",
2568 | " \n",
2569 | " \n",
2570 | " 1305 \n",
2571 | " 0 \n",
2572 | " NaN \n",
2573 | " \n",
2574 | " \n",
2575 | " 1306 \n",
2576 | " 1 \n",
2577 | " NaN \n",
2578 | " \n",
2579 | " \n",
2580 | " 1307 \n",
2581 | " 0 \n",
2582 | " NaN \n",
2583 | " \n",
2584 | " \n",
2585 | " 1308 \n",
2586 | " 0 \n",
2587 | " NaN \n",
2588 | " \n",
2589 | " \n",
2590 | " 1309 \n",
2591 | " 0 \n",
2592 | " NaN \n",
2593 | " \n",
2594 | " \n",
2595 | "
\n",
2596 | "
418 rows × 2 columns
\n",
2597 | "
"
2598 | ],
2599 | "text/plain": [
2600 | " Survived test_survived\n",
2601 | "PassengerId \n",
2602 | "892 0 NaN\n",
2603 | "893 1 NaN\n",
2604 | "894 0 NaN\n",
2605 | "895 0 NaN\n",
2606 | "896 1 NaN\n",
2607 | "... ... ...\n",
2608 | "1305 0 NaN\n",
2609 | "1306 1 NaN\n",
2610 | "1307 0 NaN\n",
2611 | "1308 0 NaN\n",
2612 | "1309 0 NaN\n",
2613 | "\n",
2614 | "[418 rows x 2 columns]"
2615 | ]
2616 | },
2617 | "execution_count": 105,
2618 | "metadata": {},
2619 | "output_type": "execute_result"
2620 | }
2621 | ],
2622 | "source": [
2623 | "submission"
2624 | ]
2625 | },
2626 | {
2627 | "cell_type": "code",
2628 | "execution_count": 106,
2629 | "metadata": {},
2630 | "outputs": [],
2631 | "source": [
2632 | "test_survived.to_csv(\"test_survived.csv\", index = False)"
2633 | ]
2634 | },
2635 | {
2636 | "cell_type": "code",
2637 | "execution_count": 98,
2638 | "metadata": {},
2639 | "outputs": [
2640 | {
2641 | "data": {
2642 | "text/plain": [
2643 | "pandas.core.series.Series"
2644 | ]
2645 | },
2646 | "execution_count": 98,
2647 | "metadata": {},
2648 | "output_type": "execute_result"
2649 | }
2650 | ],
2651 | "source": [
2652 | "type(test_survived)"
2653 | ]
2654 | },
2655 | {
2656 | "cell_type": "code",
2657 | "execution_count": 99,
2658 | "metadata": {},
2659 | "outputs": [
2660 | {
2661 | "data": {
2662 | "text/plain": [
2663 | "pandas.core.frame.DataFrame"
2664 | ]
2665 | },
2666 | "execution_count": 99,
2667 | "metadata": {},
2668 | "output_type": "execute_result"
2669 | }
2670 | ],
2671 | "source": [
2672 | "type(submission)"
2673 | ]
2674 | },
2675 | {
2676 | "cell_type": "code",
2677 | "execution_count": 88,
2678 | "metadata": {},
2679 | "outputs": [
2680 | {
2681 | "data": {
2682 | "text/plain": [
2683 | "836"
2684 | ]
2685 | },
2686 | "execution_count": 88,
2687 | "metadata": {},
2688 | "output_type": "execute_result"
2689 | }
2690 | ],
2691 | "source": [
2692 | "len(results)"
2693 | ]
2694 | },
2695 | {
2696 | "cell_type": "code",
2697 | "execution_count": 67,
2698 | "metadata": {},
2699 | "outputs": [
2700 | {
2701 | "data": {
2702 | "text/plain": [
2703 | "418"
2704 | ]
2705 | },
2706 | "execution_count": 67,
2707 | "metadata": {},
2708 | "output_type": "execute_result"
2709 | }
2710 | ],
2711 | "source": [
2712 | "len(submission)"
2713 | ]
2714 | },
2715 | {
2716 | "cell_type": "code",
2717 | "execution_count": 64,
2718 | "metadata": {},
2719 | "outputs": [
2720 | {
2721 | "data": {
2722 | "text/plain": [
2723 | "418"
2724 | ]
2725 | },
2726 | "execution_count": 64,
2727 | "metadata": {},
2728 | "output_type": "execute_result"
2729 | }
2730 | ],
2731 | "source": [
2732 | "len(test_survived)"
2733 | ]
2734 | },
2735 | {
2736 | "cell_type": "code",
2737 | "execution_count": 66,
2738 | "metadata": {},
2739 | "outputs": [
2740 | {
2741 | "data": {
2742 | "text/plain": [
2743 | "418"
2744 | ]
2745 | },
2746 | "execution_count": 66,
2747 | "metadata": {},
2748 | "output_type": "execute_result"
2749 | }
2750 | ],
2751 | "source": [
2752 | "len(testDf)"
2753 | ]
2754 | },
2755 | {
2756 | "cell_type": "code",
2757 | "execution_count": 48,
2758 | "metadata": {},
2759 | "outputs": [],
2760 | "source": [
2761 | "rf = DecisionTreeClassifier(random_state=41)\n",
2762 | "rf.fit(X_train, y_train)\n",
2763 | "y_test_pred = rf.predict(testDf)"
2764 | ]
2765 | },
2766 | {
2767 | "cell_type": "code",
2768 | "execution_count": 49,
2769 | "metadata": {},
2770 | "outputs": [
2771 | {
2772 | "data": {
2773 | "text/plain": [
2774 | "array([0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,\n",
2775 | " 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,\n",
2776 | " 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n",
2777 | " 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,\n",
2778 | " 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
2779 | " 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
2780 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,\n",
2781 | " 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,\n",
2782 | " 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
2783 | " 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,\n",
2784 | " 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,\n",
2785 | " 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,\n",
2786 | " 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,\n",
2787 | " 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,\n",
2788 | " 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,\n",
2789 | " 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,\n",
2790 | " 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,\n",
2791 | " 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,\n",
2792 | " 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0],\n",
2793 | " dtype=int64)"
2794 | ]
2795 | },
2796 | "execution_count": 49,
2797 | "metadata": {},
2798 | "output_type": "execute_result"
2799 | }
2800 | ],
2801 | "source": [
2802 | "y_test_pred"
2803 | ]
2804 | },
2805 | {
2806 | "cell_type": "code",
2807 | "execution_count": 50,
2808 | "metadata": {},
2809 | "outputs": [],
2810 | "source": [
2811 | "submission = pd.read_csv(\"gender_submission.csv\", index_col='PassengerId')\n",
2812 | "submission"
2813 | ]
2814 | },
2815 | {
2816 | "cell_type": "code",
2817 | "execution_count": 52,
2818 | "metadata": {},
2819 | "outputs": [
2820 | {
2821 | "name": "stdout",
2822 | "output_type": "stream",
2823 | "text": [
2824 | "\n",
2825 | " precision recall f1-score support\n",
2826 | "\n",
2827 | " 0 0.87 0.92 0.90 266\n",
2828 | " 1 0.85 0.76 0.80 152\n",
2829 | "\n",
2830 | " accuracy 0.86 418\n",
2831 | " macro avg 0.86 0.84 0.85 418\n",
2832 | "weighted avg 0.86 0.86 0.86 418\n",
2833 | "\n",
2834 | "\n",
2835 | "\n",
2836 | "Confusion_matrix: \n",
2837 | "\n"
2838 | ]
2839 | },
2840 | {
2841 | "data": {
2842 | "image/png": "\n",
2843 | "text/plain": [
2844 | ""
2845 | ]
2846 | },
2847 | "metadata": {
2848 | "needs_background": "light"
2849 | },
2850 | "output_type": "display_data"
2851 | }
2852 | ],
2853 | "source": [
2854 | "print(\"\\n\", metrics.classification_report(submission, y_test_pred))\n",
2855 | "# Compute confusion matrix\n",
2856 | "print(\"\\n\\nConfusion_matrix: \\n\")\n",
2857 | "cnf_matrix = metrics.confusion_matrix(submission, y_test_pred)\n",
2858 | "ax= plt.subplot()\n",
2859 | "sns.heatmap(cnf_matrix, annot=True, ax = None, fmt= '.1f' , cmap= 'Blues', linewidths=0.5); #annot=True to annotate cells "
2860 | ]
2861 | },
2862 | {
2863 | "cell_type": "markdown",
2864 | "metadata": {},
2865 | "source": [
2866 | "# Conclution: \n",
2867 | "\n",
2868 | "The results as per the Accuracy for Decision Tree is 86%.\n",
2869 | "\n",
2870 | "The training set should be used to build machine learning models. The test set should be used to see how well the model performs on unseen data.\n",
2871 | "\n",
2872 | "For the test set, they do not provide the ground truth for each passenger. It is the challenge to predict these outcomes. For each passenger in the test set, use the trained model to predict whether or not they survived the sinking of the Titanic.\n"
2873 | ]
2874 | }
2875 | ],
2876 | "metadata": {
2877 | "kernelspec": {
2878 | "display_name": "Python 3 (ipykernel)",
2879 | "language": "python",
2880 | "name": "python3"
2881 | },
2882 | "language_info": {
2883 | "codemirror_mode": {
2884 | "name": "ipython",
2885 | "version": 3
2886 | },
2887 | "file_extension": ".py",
2888 | "mimetype": "text/x-python",
2889 | "name": "python",
2890 | "nbconvert_exporter": "python",
2891 | "pygments_lexer": "ipython3",
2892 | "version": "3.11.5"
2893 | }
2894 | },
2895 | "nbformat": 4,
2896 | "nbformat_minor": 4
2897 | }
2898 |
--------------------------------------------------------------------------------