├── README
├── titanic_video1.ipynb
├── titanic_video2.ipynb
├── titanic_video3_4_validacao.ipynb
├── titanic_video5.ipynb
└── titanic_video6_final.ipynb
/README:
--------------------------------------------------------------------------------
1 | Material para o tutorial da playlist de vídeos sobre Machine Learning usando os dados do Titanic
2 |
3 | https://www.youtube.com/playlist?list=PLwnip85KhroW8Q1JSNbgl06iNPeC0SDkx
--------------------------------------------------------------------------------
/titanic_video1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Para quem tiver curiosidade de saber como gerar uma sub igual à gender_submission, esse é o código."
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 12,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "data = pd.read_csv(\"test.csv\")"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 13,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "data": {
35 | "text/html": [
36 | "
\n",
37 | "\n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " | \n",
54 | " PassengerId | \n",
55 | " Pclass | \n",
56 | " Name | \n",
57 | " Sex | \n",
58 | " Age | \n",
59 | " SibSp | \n",
60 | " Parch | \n",
61 | " Ticket | \n",
62 | " Fare | \n",
63 | " Cabin | \n",
64 | " Embarked | \n",
65 | "
\n",
66 | " \n",
67 | " \n",
68 | " \n",
69 | " 0 | \n",
70 | " 892 | \n",
71 | " 3 | \n",
72 | " Kelly, Mr. James | \n",
73 | " male | \n",
74 | " 34.5 | \n",
75 | " 0 | \n",
76 | " 0 | \n",
77 | " 330911 | \n",
78 | " 7.8292 | \n",
79 | " NaN | \n",
80 | " Q | \n",
81 | "
\n",
82 | " \n",
83 | " 1 | \n",
84 | " 893 | \n",
85 | " 3 | \n",
86 | " Wilkes, Mrs. James (Ellen Needs) | \n",
87 | " female | \n",
88 | " 47.0 | \n",
89 | " 1 | \n",
90 | " 0 | \n",
91 | " 363272 | \n",
92 | " 7.0000 | \n",
93 | " NaN | \n",
94 | " S | \n",
95 | "
\n",
96 | " \n",
97 | " 2 | \n",
98 | " 894 | \n",
99 | " 2 | \n",
100 | " Myles, Mr. Thomas Francis | \n",
101 | " male | \n",
102 | " 62.0 | \n",
103 | " 0 | \n",
104 | " 0 | \n",
105 | " 240276 | \n",
106 | " 9.6875 | \n",
107 | " NaN | \n",
108 | " Q | \n",
109 | "
\n",
110 | " \n",
111 | " 3 | \n",
112 | " 895 | \n",
113 | " 3 | \n",
114 | " Wirz, Mr. Albert | \n",
115 | " male | \n",
116 | " 27.0 | \n",
117 | " 0 | \n",
118 | " 0 | \n",
119 | " 315154 | \n",
120 | " 8.6625 | \n",
121 | " NaN | \n",
122 | " S | \n",
123 | "
\n",
124 | " \n",
125 | " 4 | \n",
126 | " 896 | \n",
127 | " 3 | \n",
128 | " Hirvonen, Mrs. Alexander (Helga E Lindqvist) | \n",
129 | " female | \n",
130 | " 22.0 | \n",
131 | " 1 | \n",
132 | " 1 | \n",
133 | " 3101298 | \n",
134 | " 12.2875 | \n",
135 | " NaN | \n",
136 | " S | \n",
137 | "
\n",
138 | " \n",
139 | "
\n",
140 | "
"
141 | ],
142 | "text/plain": [
143 | " PassengerId Pclass Name Sex \\\n",
144 | "0 892 3 Kelly, Mr. James male \n",
145 | "1 893 3 Wilkes, Mrs. James (Ellen Needs) female \n",
146 | "2 894 2 Myles, Mr. Thomas Francis male \n",
147 | "3 895 3 Wirz, Mr. Albert male \n",
148 | "4 896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) female \n",
149 | "\n",
150 | " Age SibSp Parch Ticket Fare Cabin Embarked \n",
151 | "0 34.5 0 0 330911 7.8292 NaN Q \n",
152 | "1 47.0 1 0 363272 7.0000 NaN S \n",
153 | "2 62.0 0 0 240276 9.6875 NaN Q \n",
154 | "3 27.0 0 0 315154 8.6625 NaN S \n",
155 | "4 22.0 1 1 3101298 12.2875 NaN S "
156 | ]
157 | },
158 | "execution_count": 13,
159 | "metadata": {},
160 | "output_type": "execute_result"
161 | }
162 | ],
163 | "source": [
164 | "data.head()"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 14,
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "data": {
174 | "text/plain": [
175 | "0 0\n",
176 | "1 1\n",
177 | "2 0\n",
178 | "3 0\n",
179 | "4 1\n",
180 | "Name: Sex, dtype: int64"
181 | ]
182 | },
183 | "execution_count": 14,
184 | "metadata": {},
185 | "output_type": "execute_result"
186 | }
187 | ],
188 | "source": [
189 | "e_feminino = (data['Sex'] == 'female').astype(int)\n",
190 | "e_feminino.head()"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 15,
196 | "metadata": {},
197 | "outputs": [
198 | {
199 | "data": {
200 | "text/plain": [
201 | "PassengerId\n",
202 | "892 0\n",
203 | "893 1\n",
204 | "894 0\n",
205 | "895 0\n",
206 | "896 1\n",
207 | "Name: Sex, dtype: int64"
208 | ]
209 | },
210 | "execution_count": 15,
211 | "metadata": {},
212 | "output_type": "execute_result"
213 | }
214 | ],
215 | "source": [
216 | "e_feminino.index = data['PassengerId']\n",
217 | "e_feminino.head()"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 16,
223 | "metadata": {},
224 | "outputs": [
225 | {
226 | "data": {
227 | "text/plain": [
228 | "PassengerId\n",
229 | "892 0\n",
230 | "893 1\n",
231 | "894 0\n",
232 | "895 0\n",
233 | "896 1\n",
234 | "Name: Survived, dtype: int64"
235 | ]
236 | },
237 | "execution_count": 16,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | }
241 | ],
242 | "source": [
243 | "e_feminino.name = 'Survived'\n",
244 | "e_feminino.head()"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": 17,
250 | "metadata": {},
251 | "outputs": [],
252 | "source": [
253 | "e_feminino.to_csv('gender_submission.csv', header=True)"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 18,
259 | "metadata": {},
260 | "outputs": [
261 | {
262 | "name": "stdout",
263 | "output_type": "stream",
264 | "text": [
265 | "PassengerId,Survived\n",
266 | "892,0\n",
267 | "893,1\n",
268 | "894,0\n",
269 | "895,0\n",
270 | "896,1\n",
271 | "897,0\n",
272 | "898,1\n",
273 | "899,0\n",
274 | "900,1\n"
275 | ]
276 | }
277 | ],
278 | "source": [
279 | "!head -n10 gender_submission.csv"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {},
286 | "outputs": [],
287 | "source": []
288 | }
289 | ],
290 | "metadata": {
291 | "kernelspec": {
292 | "display_name": "Python 3",
293 | "language": "python",
294 | "name": "python3"
295 | },
296 | "language_info": {
297 | "codemirror_mode": {
298 | "name": "ipython",
299 | "version": 3
300 | },
301 | "file_extension": ".py",
302 | "mimetype": "text/x-python",
303 | "name": "python",
304 | "nbconvert_exporter": "python",
305 | "pygments_lexer": "ipython3",
306 | "version": "3.7.3"
307 | }
308 | },
309 | "nbformat": 4,
310 | "nbformat_minor": 4
311 | }
312 |
--------------------------------------------------------------------------------
/titanic_video2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 72,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 73,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "train = pd.read_csv(\"train.csv\")\n",
20 | "test = pd.read_csv(\"test.csv\")"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 74,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/html": [
31 | "\n",
32 | "\n",
45 | "
\n",
46 | " \n",
47 | " \n",
48 | " | \n",
49 | " PassengerId | \n",
50 | " Survived | \n",
51 | " Pclass | \n",
52 | " Name | \n",
53 | " Sex | \n",
54 | " Age | \n",
55 | " SibSp | \n",
56 | " Parch | \n",
57 | " Ticket | \n",
58 | " Fare | \n",
59 | " Cabin | \n",
60 | " Embarked | \n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " \n",
65 | " 0 | \n",
66 | " 1 | \n",
67 | " 0 | \n",
68 | " 3 | \n",
69 | " Braund, Mr. Owen Harris | \n",
70 | " male | \n",
71 | " 22.0 | \n",
72 | " 1 | \n",
73 | " 0 | \n",
74 | " A/5 21171 | \n",
75 | " 7.2500 | \n",
76 | " NaN | \n",
77 | " S | \n",
78 | "
\n",
79 | " \n",
80 | " 1 | \n",
81 | " 2 | \n",
82 | " 1 | \n",
83 | " 1 | \n",
84 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
85 | " female | \n",
86 | " 38.0 | \n",
87 | " 1 | \n",
88 | " 0 | \n",
89 | " PC 17599 | \n",
90 | " 71.2833 | \n",
91 | " C85 | \n",
92 | " C | \n",
93 | "
\n",
94 | " \n",
95 | " 2 | \n",
96 | " 3 | \n",
97 | " 1 | \n",
98 | " 3 | \n",
99 | " Heikkinen, Miss. Laina | \n",
100 | " female | \n",
101 | " 26.0 | \n",
102 | " 0 | \n",
103 | " 0 | \n",
104 | " STON/O2. 3101282 | \n",
105 | " 7.9250 | \n",
106 | " NaN | \n",
107 | " S | \n",
108 | "
\n",
109 | " \n",
110 | " 3 | \n",
111 | " 4 | \n",
112 | " 1 | \n",
113 | " 1 | \n",
114 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
115 | " female | \n",
116 | " 35.0 | \n",
117 | " 1 | \n",
118 | " 0 | \n",
119 | " 113803 | \n",
120 | " 53.1000 | \n",
121 | " C123 | \n",
122 | " S | \n",
123 | "
\n",
124 | " \n",
125 | " 4 | \n",
126 | " 5 | \n",
127 | " 0 | \n",
128 | " 3 | \n",
129 | " Allen, Mr. William Henry | \n",
130 | " male | \n",
131 | " 35.0 | \n",
132 | " 0 | \n",
133 | " 0 | \n",
134 | " 373450 | \n",
135 | " 8.0500 | \n",
136 | " NaN | \n",
137 | " S | \n",
138 | "
\n",
139 | " \n",
140 | "
\n",
141 | "
"
142 | ],
143 | "text/plain": [
144 | " PassengerId Survived Pclass \\\n",
145 | "0 1 0 3 \n",
146 | "1 2 1 1 \n",
147 | "2 3 1 3 \n",
148 | "3 4 1 1 \n",
149 | "4 5 0 3 \n",
150 | "\n",
151 | " Name Sex Age SibSp \\\n",
152 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
153 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
154 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
155 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
156 | "4 Allen, Mr. William Henry male 35.0 0 \n",
157 | "\n",
158 | " Parch Ticket Fare Cabin Embarked \n",
159 | "0 0 A/5 21171 7.2500 NaN S \n",
160 | "1 0 PC 17599 71.2833 C85 C \n",
161 | "2 0 STON/O2. 3101282 7.9250 NaN S \n",
162 | "3 0 113803 53.1000 C123 S \n",
163 | "4 0 373450 8.0500 NaN S "
164 | ]
165 | },
166 | "execution_count": 74,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "train.head()"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 75,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "from sklearn.ensemble import RandomForestClassifier"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 76,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": [
190 | "modelo = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=0)\n"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 77,
196 | "metadata": {},
197 | "outputs": [
198 | {
199 | "data": {
200 | "text/plain": [
201 | "male 577\n",
202 | "female 314\n",
203 | "Name: Sex, dtype: int64"
204 | ]
205 | },
206 | "execution_count": 77,
207 | "metadata": {},
208 | "output_type": "execute_result"
209 | }
210 | ],
211 | "source": [
212 | "train['Sex'].value_counts()"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 78,
218 | "metadata": {},
219 | "outputs": [],
220 | "source": [
221 | "def transformar_sexo(valor):\n",
222 | " if valor == 'female':\n",
223 | " return 1\n",
224 | " else:\n",
225 | " return 0\n",
226 | " \n",
227 | "train['Sex_binario'] = train['Sex'].map(transformar_sexo)\n",
228 | " "
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 79,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/html": [
239 | "\n",
240 | "\n",
253 | "
\n",
254 | " \n",
255 | " \n",
256 | " | \n",
257 | " PassengerId | \n",
258 | " Survived | \n",
259 | " Pclass | \n",
260 | " Name | \n",
261 | " Sex | \n",
262 | " Age | \n",
263 | " SibSp | \n",
264 | " Parch | \n",
265 | " Ticket | \n",
266 | " Fare | \n",
267 | " Cabin | \n",
268 | " Embarked | \n",
269 | " Sex_binario | \n",
270 | "
\n",
271 | " \n",
272 | " \n",
273 | " \n",
274 | " 0 | \n",
275 | " 1 | \n",
276 | " 0 | \n",
277 | " 3 | \n",
278 | " Braund, Mr. Owen Harris | \n",
279 | " male | \n",
280 | " 22.0 | \n",
281 | " 1 | \n",
282 | " 0 | \n",
283 | " A/5 21171 | \n",
284 | " 7.2500 | \n",
285 | " NaN | \n",
286 | " S | \n",
287 | " 0 | \n",
288 | "
\n",
289 | " \n",
290 | " 1 | \n",
291 | " 2 | \n",
292 | " 1 | \n",
293 | " 1 | \n",
294 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
295 | " female | \n",
296 | " 38.0 | \n",
297 | " 1 | \n",
298 | " 0 | \n",
299 | " PC 17599 | \n",
300 | " 71.2833 | \n",
301 | " C85 | \n",
302 | " C | \n",
303 | " 1 | \n",
304 | "
\n",
305 | " \n",
306 | " 2 | \n",
307 | " 3 | \n",
308 | " 1 | \n",
309 | " 3 | \n",
310 | " Heikkinen, Miss. Laina | \n",
311 | " female | \n",
312 | " 26.0 | \n",
313 | " 0 | \n",
314 | " 0 | \n",
315 | " STON/O2. 3101282 | \n",
316 | " 7.9250 | \n",
317 | " NaN | \n",
318 | " S | \n",
319 | " 1 | \n",
320 | "
\n",
321 | " \n",
322 | " 3 | \n",
323 | " 4 | \n",
324 | " 1 | \n",
325 | " 1 | \n",
326 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
327 | " female | \n",
328 | " 35.0 | \n",
329 | " 1 | \n",
330 | " 0 | \n",
331 | " 113803 | \n",
332 | " 53.1000 | \n",
333 | " C123 | \n",
334 | " S | \n",
335 | " 1 | \n",
336 | "
\n",
337 | " \n",
338 | " 4 | \n",
339 | " 5 | \n",
340 | " 0 | \n",
341 | " 3 | \n",
342 | " Allen, Mr. William Henry | \n",
343 | " male | \n",
344 | " 35.0 | \n",
345 | " 0 | \n",
346 | " 0 | \n",
347 | " 373450 | \n",
348 | " 8.0500 | \n",
349 | " NaN | \n",
350 | " S | \n",
351 | " 0 | \n",
352 | "
\n",
353 | " \n",
354 | "
\n",
355 | "
"
356 | ],
357 | "text/plain": [
358 | " PassengerId Survived Pclass \\\n",
359 | "0 1 0 3 \n",
360 | "1 2 1 1 \n",
361 | "2 3 1 3 \n",
362 | "3 4 1 1 \n",
363 | "4 5 0 3 \n",
364 | "\n",
365 | " Name Sex Age SibSp \\\n",
366 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
367 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
368 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
369 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
370 | "4 Allen, Mr. William Henry male 35.0 0 \n",
371 | "\n",
372 | " Parch Ticket Fare Cabin Embarked Sex_binario \n",
373 | "0 0 A/5 21171 7.2500 NaN S 0 \n",
374 | "1 0 PC 17599 71.2833 C85 C 1 \n",
375 | "2 0 STON/O2. 3101282 7.9250 NaN S 1 \n",
376 | "3 0 113803 53.1000 C123 S 1 \n",
377 | "4 0 373450 8.0500 NaN S 0 "
378 | ]
379 | },
380 | "execution_count": 79,
381 | "metadata": {},
382 | "output_type": "execute_result"
383 | }
384 | ],
385 | "source": [
386 | "train.head()"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": 80,
392 | "metadata": {},
393 | "outputs": [],
394 | "source": [
395 | "variaveis = ['Sex_binario', 'Age']"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 81,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "X = train[variaveis]\n",
405 | "y = train['Survived']"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 82,
411 | "metadata": {},
412 | "outputs": [
413 | {
414 | "data": {
415 | "text/html": [
416 | "\n",
417 | "\n",
430 | "
\n",
431 | " \n",
432 | " \n",
433 | " | \n",
434 | " Sex_binario | \n",
435 | " Age | \n",
436 | "
\n",
437 | " \n",
438 | " \n",
439 | " \n",
440 | " 0 | \n",
441 | " 0 | \n",
442 | " 22.0 | \n",
443 | "
\n",
444 | " \n",
445 | " 1 | \n",
446 | " 1 | \n",
447 | " 38.0 | \n",
448 | "
\n",
449 | " \n",
450 | " 2 | \n",
451 | " 1 | \n",
452 | " 26.0 | \n",
453 | "
\n",
454 | " \n",
455 | " 3 | \n",
456 | " 1 | \n",
457 | " 35.0 | \n",
458 | "
\n",
459 | " \n",
460 | " 4 | \n",
461 | " 0 | \n",
462 | " 35.0 | \n",
463 | "
\n",
464 | " \n",
465 | "
\n",
466 | "
"
467 | ],
468 | "text/plain": [
469 | " Sex_binario Age\n",
470 | "0 0 22.0\n",
471 | "1 1 38.0\n",
472 | "2 1 26.0\n",
473 | "3 1 35.0\n",
474 | "4 0 35.0"
475 | ]
476 | },
477 | "execution_count": 82,
478 | "metadata": {},
479 | "output_type": "execute_result"
480 | }
481 | ],
482 | "source": [
483 | "X.head()"
484 | ]
485 | },
486 | {
487 | "cell_type": "code",
488 | "execution_count": 83,
489 | "metadata": {},
490 | "outputs": [
491 | {
492 | "data": {
493 | "text/plain": [
494 | "0 0\n",
495 | "1 1\n",
496 | "2 1\n",
497 | "3 1\n",
498 | "4 0\n",
499 | "Name: Survived, dtype: int64"
500 | ]
501 | },
502 | "execution_count": 83,
503 | "metadata": {},
504 | "output_type": "execute_result"
505 | }
506 | ],
507 | "source": [
508 | "y.head()"
509 | ]
510 | },
511 | {
512 | "cell_type": "code",
513 | "execution_count": 84,
514 | "metadata": {},
515 | "outputs": [
516 | {
517 | "ename": "ValueError",
518 | "evalue": "Input contains NaN, infinity or a value too large for dtype('float32').",
519 | "output_type": "error",
520 | "traceback": [
521 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
522 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
523 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodelo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
524 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0;31m# Validate or convert input data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"csc\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csc'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
525 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 571\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mforce_all_finite\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 572\u001b[0m _assert_all_finite(array,\n\u001b[0;32m--> 573\u001b[0;31m allow_nan=force_all_finite == 'allow-nan')\n\u001b[0m\u001b[1;32m 574\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 575\u001b[0m \u001b[0mshape_repr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_shape_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
526 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36m_assert_all_finite\u001b[0;34m(X, allow_nan)\u001b[0m\n\u001b[1;32m 54\u001b[0m not allow_nan and not np.isfinite(X).all()):\n\u001b[1;32m 55\u001b[0m \u001b[0mtype_err\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'infinity'\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mallow_nan\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m'NaN, infinity'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg_err\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype_err\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
527 | "\u001b[0;31mValueError\u001b[0m: Input contains NaN, infinity or a value too large for dtype('float32')."
528 | ]
529 | }
530 | ],
531 | "source": [
532 | "modelo.fit(X, y)"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 85,
538 | "metadata": {},
539 | "outputs": [],
540 | "source": [
541 | "X = X.fillna(-1)"
542 | ]
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": 86,
547 | "metadata": {},
548 | "outputs": [
549 | {
550 | "data": {
551 | "text/plain": [
552 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
553 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
554 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
555 | " min_samples_leaf=1, min_samples_split=2,\n",
556 | " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,\n",
557 | " oob_score=False, random_state=0, verbose=0, warm_start=False)"
558 | ]
559 | },
560 | "execution_count": 86,
561 | "metadata": {},
562 | "output_type": "execute_result"
563 | }
564 | ],
565 | "source": [
566 | "modelo.fit(X, y)"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": 87,
572 | "metadata": {},
573 | "outputs": [
574 | {
575 | "ename": "KeyError",
576 | "evalue": "\"['Sex_binario'] not in index\"",
577 | "output_type": "error",
578 | "traceback": [
579 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
580 | "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
581 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mX_prev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvariaveis\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
582 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2932\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2933\u001b[0m indexer = self.loc._convert_to_indexer(key, axis=1,\n\u001b[0;32m-> 2934\u001b[0;31m raise_missing=True)\n\u001b[0m\u001b[1;32m 2935\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2936\u001b[0m \u001b[0;31m# take() does not accept boolean indexers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
583 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_convert_to_indexer\u001b[0;34m(self, obj, axis, is_setter, raise_missing)\u001b[0m\n\u001b[1;32m 1352\u001b[0m kwargs = {'raise_missing': True if is_setter else\n\u001b[1;32m 1353\u001b[0m raise_missing}\n\u001b[0;32m-> 1354\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_listlike_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1355\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1356\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
584 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_get_listlike_indexer\u001b[0;34m(self, key, axis, raise_missing)\u001b[0m\n\u001b[1;32m 1159\u001b[0m self._validate_read_indexer(keyarr, indexer,\n\u001b[1;32m 1160\u001b[0m \u001b[0mo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis_number\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1161\u001b[0;31m raise_missing=raise_missing)\n\u001b[0m\u001b[1;32m 1162\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mkeyarr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1163\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
585 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_validate_read_indexer\u001b[0;34m(self, key, indexer, axis, raise_missing)\u001b[0m\n\u001b[1;32m 1250\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'loc'\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mraise_missing\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1251\u001b[0m \u001b[0mnot_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1252\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"{} not in index\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnot_found\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1253\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1254\u001b[0m \u001b[0;31m# we skip the warning on Categorical/Interval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
586 | "\u001b[0;31mKeyError\u001b[0m: \"['Sex_binario'] not in index\""
587 | ]
588 | }
589 | ],
590 | "source": [
591 | "X_prev = test[variaveis]"
592 | ]
593 | },
594 | {
595 | "cell_type": "code",
596 | "execution_count": 88,
597 | "metadata": {},
598 | "outputs": [],
599 | "source": [
600 | "test['Sex_binario'] = test['Sex'].map(transformar_sexo)"
601 | ]
602 | },
603 | {
604 | "cell_type": "code",
605 | "execution_count": 89,
606 | "metadata": {},
607 | "outputs": [
608 | {
609 | "data": {
610 | "text/html": [
611 | "\n",
612 | "\n",
625 | "
\n",
626 | " \n",
627 | " \n",
628 | " | \n",
629 | " Sex_binario | \n",
630 | " Age | \n",
631 | "
\n",
632 | " \n",
633 | " \n",
634 | " \n",
635 | " 0 | \n",
636 | " 0 | \n",
637 | " 34.5 | \n",
638 | "
\n",
639 | " \n",
640 | " 1 | \n",
641 | " 1 | \n",
642 | " 47.0 | \n",
643 | "
\n",
644 | " \n",
645 | " 2 | \n",
646 | " 0 | \n",
647 | " 62.0 | \n",
648 | "
\n",
649 | " \n",
650 | " 3 | \n",
651 | " 0 | \n",
652 | " 27.0 | \n",
653 | "
\n",
654 | " \n",
655 | " 4 | \n",
656 | " 1 | \n",
657 | " 22.0 | \n",
658 | "
\n",
659 | " \n",
660 | "
\n",
661 | "
"
662 | ],
663 | "text/plain": [
664 | " Sex_binario Age\n",
665 | "0 0 34.5\n",
666 | "1 1 47.0\n",
667 | "2 0 62.0\n",
668 | "3 0 27.0\n",
669 | "4 1 22.0"
670 | ]
671 | },
672 | "execution_count": 89,
673 | "metadata": {},
674 | "output_type": "execute_result"
675 | }
676 | ],
677 | "source": [
678 | "X_prev = test[variaveis]\n",
679 | "X_prev = X_prev.fillna(-1)\n",
680 | "X_prev.head()"
681 | ]
682 | },
683 | {
684 | "cell_type": "code",
685 | "execution_count": 90,
686 | "metadata": {},
687 | "outputs": [
688 | {
689 | "data": {
690 | "text/plain": [
691 | "array([0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,\n",
692 | " 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,\n",
693 | " 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,\n",
694 | " 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,\n",
695 | " 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,\n",
696 | " 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
697 | " 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,\n",
698 | " 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1,\n",
699 | " 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,\n",
700 | " 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,\n",
701 | " 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,\n",
702 | " 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,\n",
703 | " 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,\n",
704 | " 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,\n",
705 | " 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
706 | " 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0,\n",
707 | " 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,\n",
708 | " 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,\n",
709 | " 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0])"
710 | ]
711 | },
712 | "execution_count": 90,
713 | "metadata": {},
714 | "output_type": "execute_result"
715 | }
716 | ],
717 | "source": [
718 | "p = modelo.predict(X_prev)\n",
719 | "p"
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "execution_count": 91,
725 | "metadata": {},
726 | "outputs": [
727 | {
728 | "data": {
729 | "text/html": [
730 | "\n",
731 | "\n",
744 | "
\n",
745 | " \n",
746 | " \n",
747 | " | \n",
748 | " PassengerId | \n",
749 | " Pclass | \n",
750 | " Name | \n",
751 | " Sex | \n",
752 | " Age | \n",
753 | " SibSp | \n",
754 | " Parch | \n",
755 | " Ticket | \n",
756 | " Fare | \n",
757 | " Cabin | \n",
758 | " Embarked | \n",
759 | " Sex_binario | \n",
760 | "
\n",
761 | " \n",
762 | " \n",
763 | " \n",
764 | " 0 | \n",
765 | " 892 | \n",
766 | " 3 | \n",
767 | " Kelly, Mr. James | \n",
768 | " male | \n",
769 | " 34.5 | \n",
770 | " 0 | \n",
771 | " 0 | \n",
772 | " 330911 | \n",
773 | " 7.8292 | \n",
774 | " NaN | \n",
775 | " Q | \n",
776 | " 0 | \n",
777 | "
\n",
778 | " \n",
779 | " 1 | \n",
780 | " 893 | \n",
781 | " 3 | \n",
782 | " Wilkes, Mrs. James (Ellen Needs) | \n",
783 | " female | \n",
784 | " 47.0 | \n",
785 | " 1 | \n",
786 | " 0 | \n",
787 | " 363272 | \n",
788 | " 7.0000 | \n",
789 | " NaN | \n",
790 | " S | \n",
791 | " 1 | \n",
792 | "
\n",
793 | " \n",
794 | " 2 | \n",
795 | " 894 | \n",
796 | " 2 | \n",
797 | " Myles, Mr. Thomas Francis | \n",
798 | " male | \n",
799 | " 62.0 | \n",
800 | " 0 | \n",
801 | " 0 | \n",
802 | " 240276 | \n",
803 | " 9.6875 | \n",
804 | " NaN | \n",
805 | " Q | \n",
806 | " 0 | \n",
807 | "
\n",
808 | " \n",
809 | " 3 | \n",
810 | " 895 | \n",
811 | " 3 | \n",
812 | " Wirz, Mr. Albert | \n",
813 | " male | \n",
814 | " 27.0 | \n",
815 | " 0 | \n",
816 | " 0 | \n",
817 | " 315154 | \n",
818 | " 8.6625 | \n",
819 | " NaN | \n",
820 | " S | \n",
821 | " 0 | \n",
822 | "
\n",
823 | " \n",
824 | " 4 | \n",
825 | " 896 | \n",
826 | " 3 | \n",
827 | " Hirvonen, Mrs. Alexander (Helga E Lindqvist) | \n",
828 | " female | \n",
829 | " 22.0 | \n",
830 | " 1 | \n",
831 | " 1 | \n",
832 | " 3101298 | \n",
833 | " 12.2875 | \n",
834 | " NaN | \n",
835 | " S | \n",
836 | " 1 | \n",
837 | "
\n",
838 | " \n",
839 | "
\n",
840 | "
"
841 | ],
842 | "text/plain": [
843 | " PassengerId Pclass Name Sex \\\n",
844 | "0 892 3 Kelly, Mr. James male \n",
845 | "1 893 3 Wilkes, Mrs. James (Ellen Needs) female \n",
846 | "2 894 2 Myles, Mr. Thomas Francis male \n",
847 | "3 895 3 Wirz, Mr. Albert male \n",
848 | "4 896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) female \n",
849 | "\n",
850 | " Age SibSp Parch Ticket Fare Cabin Embarked Sex_binario \n",
851 | "0 34.5 0 0 330911 7.8292 NaN Q 0 \n",
852 | "1 47.0 1 0 363272 7.0000 NaN S 1 \n",
853 | "2 62.0 0 0 240276 9.6875 NaN Q 0 \n",
854 | "3 27.0 0 0 315154 8.6625 NaN S 0 \n",
855 | "4 22.0 1 1 3101298 12.2875 NaN S 1 "
856 | ]
857 | },
858 | "execution_count": 91,
859 | "metadata": {},
860 | "output_type": "execute_result"
861 | }
862 | ],
863 | "source": [
864 | "test.head()"
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "execution_count": 92,
870 | "metadata": {},
871 | "outputs": [
872 | {
873 | "data": {
874 | "text/plain": [
875 | "(418,)"
876 | ]
877 | },
878 | "execution_count": 92,
879 | "metadata": {},
880 | "output_type": "execute_result"
881 | }
882 | ],
883 | "source": [
884 | "sub = pd.Series(p, index=test['PassengerId'], name='Survived')\n",
885 | "sub.shape"
886 | ]
887 | },
888 | {
889 | "cell_type": "code",
890 | "execution_count": 93,
891 | "metadata": {},
892 | "outputs": [],
893 | "source": [
894 | "sub.to_csv(\"primeiro_modelo.csv\", header=True)"
895 | ]
896 | },
897 | {
898 | "cell_type": "code",
899 | "execution_count": 94,
900 | "metadata": {},
901 | "outputs": [
902 | {
903 | "name": "stdout",
904 | "output_type": "stream",
905 | "text": [
906 | "PassengerId,Survived\n",
907 | "892,0\n",
908 | "893,1\n",
909 | "894,0\n",
910 | "895,1\n",
911 | "896,1\n",
912 | "897,0\n",
913 | "898,1\n",
914 | "899,0\n",
915 | "900,1\n"
916 | ]
917 | }
918 | ],
919 | "source": [
920 | "!head -n10 primeiro_modelo.csv"
921 | ]
922 | },
923 | {
924 | "cell_type": "code",
925 | "execution_count": null,
926 | "metadata": {},
927 | "outputs": [],
928 | "source": []
929 | }
930 | ],
931 | "metadata": {
932 | "kernelspec": {
933 | "display_name": "Python 3",
934 | "language": "python",
935 | "name": "python3"
936 | },
937 | "language_info": {
938 | "codemirror_mode": {
939 | "name": "ipython",
940 | "version": 3
941 | },
942 | "file_extension": ".py",
943 | "mimetype": "text/x-python",
944 | "name": "python",
945 | "nbconvert_exporter": "python",
946 | "pygments_lexer": "ipython3",
947 | "version": "3.7.3"
948 | }
949 | },
950 | "nbformat": 4,
951 | "nbformat_minor": 4
952 | }
953 |
--------------------------------------------------------------------------------
/titanic_video3_4_validacao.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 26,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 27,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "def transformar_sexo(valor):\n",
20 | " if valor == 'female':\n",
21 | " return 1\n",
22 | " else:\n",
23 | " return 0"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 34,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "train = pd.read_csv(\"train.csv\")\n",
33 | "test = pd.read_csv(\"test.csv\")\n",
34 | "\n",
35 | "train['Sex_binario'] = train['Sex'].map(transformar_sexo)\n",
36 | "test['Sex_binario'] = test['Sex'].map(transformar_sexo)\n",
37 | "\n",
38 | "variaveis = ['Sex_binario', 'Age']\n",
39 | "\n",
40 | "X = train[variaveis].fillna(-1)\n",
41 | "y = train['Survived']"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 35,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/html": [
52 | "\n",
53 | "\n",
66 | "
\n",
67 | " \n",
68 | " \n",
69 | " | \n",
70 | " PassengerId | \n",
71 | " Survived | \n",
72 | " Pclass | \n",
73 | " Name | \n",
74 | " Sex | \n",
75 | " Age | \n",
76 | " SibSp | \n",
77 | " Parch | \n",
78 | " Ticket | \n",
79 | " Fare | \n",
80 | " Cabin | \n",
81 | " Embarked | \n",
82 | " Sex_binario | \n",
83 | "
\n",
84 | " \n",
85 | " \n",
86 | " \n",
87 | " 0 | \n",
88 | " 1 | \n",
89 | " 0 | \n",
90 | " 3 | \n",
91 | " Braund, Mr. Owen Harris | \n",
92 | " male | \n",
93 | " 22.0 | \n",
94 | " 1 | \n",
95 | " 0 | \n",
96 | " A/5 21171 | \n",
97 | " 7.2500 | \n",
98 | " NaN | \n",
99 | " S | \n",
100 | " 0 | \n",
101 | "
\n",
102 | " \n",
103 | " 1 | \n",
104 | " 2 | \n",
105 | " 1 | \n",
106 | " 1 | \n",
107 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
108 | " female | \n",
109 | " 38.0 | \n",
110 | " 1 | \n",
111 | " 0 | \n",
112 | " PC 17599 | \n",
113 | " 71.2833 | \n",
114 | " C85 | \n",
115 | " C | \n",
116 | " 1 | \n",
117 | "
\n",
118 | " \n",
119 | " 2 | \n",
120 | " 3 | \n",
121 | " 1 | \n",
122 | " 3 | \n",
123 | " Heikkinen, Miss. Laina | \n",
124 | " female | \n",
125 | " 26.0 | \n",
126 | " 0 | \n",
127 | " 0 | \n",
128 | " STON/O2. 3101282 | \n",
129 | " 7.9250 | \n",
130 | " NaN | \n",
131 | " S | \n",
132 | " 1 | \n",
133 | "
\n",
134 | " \n",
135 | " 3 | \n",
136 | " 4 | \n",
137 | " 1 | \n",
138 | " 1 | \n",
139 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
140 | " female | \n",
141 | " 35.0 | \n",
142 | " 1 | \n",
143 | " 0 | \n",
144 | " 113803 | \n",
145 | " 53.1000 | \n",
146 | " C123 | \n",
147 | " S | \n",
148 | " 1 | \n",
149 | "
\n",
150 | " \n",
151 | " 4 | \n",
152 | " 5 | \n",
153 | " 0 | \n",
154 | " 3 | \n",
155 | " Allen, Mr. William Henry | \n",
156 | " male | \n",
157 | " 35.0 | \n",
158 | " 0 | \n",
159 | " 0 | \n",
160 | " 373450 | \n",
161 | " 8.0500 | \n",
162 | " NaN | \n",
163 | " S | \n",
164 | " 0 | \n",
165 | "
\n",
166 | " \n",
167 | "
\n",
168 | "
"
169 | ],
170 | "text/plain": [
171 | " PassengerId Survived Pclass \\\n",
172 | "0 1 0 3 \n",
173 | "1 2 1 1 \n",
174 | "2 3 1 3 \n",
175 | "3 4 1 1 \n",
176 | "4 5 0 3 \n",
177 | "\n",
178 | " Name Sex Age SibSp \\\n",
179 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
180 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
181 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
182 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
183 | "4 Allen, Mr. William Henry male 35.0 0 \n",
184 | "\n",
185 | " Parch Ticket Fare Cabin Embarked Sex_binario \n",
186 | "0 0 A/5 21171 7.2500 NaN S 0 \n",
187 | "1 0 PC 17599 71.2833 C85 C 1 \n",
188 | "2 0 STON/O2. 3101282 7.9250 NaN S 1 \n",
189 | "3 0 113803 53.1000 C123 S 1 \n",
190 | "4 0 373450 8.0500 NaN S 0 "
191 | ]
192 | },
193 | "execution_count": 35,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "train.head()"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 30,
205 | "metadata": {},
206 | "outputs": [],
207 | "source": [
208 | "from sklearn.ensemble import RandomForestClassifier\n",
209 | "from sklearn.model_selection import train_test_split"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 31,
215 | "metadata": {},
216 | "outputs": [
217 | {
218 | "data": {
219 | "text/plain": [
220 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
221 | ]
222 | },
223 | "execution_count": 31,
224 | "metadata": {},
225 | "output_type": "execute_result"
226 | }
227 | ],
228 | "source": [
229 | "X_falso = np.arange(10)\n",
230 | "X_falso"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 32,
236 | "metadata": {},
237 | "outputs": [
238 | {
239 | "data": {
240 | "text/plain": [
241 | "[array([6, 7, 3, 0, 5]), array([2, 8, 4, 9, 1])]"
242 | ]
243 | },
244 | "execution_count": 32,
245 | "metadata": {},
246 | "output_type": "execute_result"
247 | }
248 | ],
249 | "source": [
250 | "np.random.seed(0)\n",
251 | "train_test_split(X_falso, test_size=0.5)"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 48,
257 | "metadata": {},
258 | "outputs": [],
259 | "source": [
260 | "np.random.seed(1)\n",
261 | "X_treino, X_valid, y_treino, y_valid = train_test_split(X, y, test_size=0.5)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 49,
267 | "metadata": {},
268 | "outputs": [
269 | {
270 | "data": {
271 | "text/html": [
272 | "\n",
273 | "\n",
286 | "
\n",
287 | " \n",
288 | " \n",
289 | " | \n",
290 | " Sex_binario | \n",
291 | " Age | \n",
292 | "
\n",
293 | " \n",
294 | " \n",
295 | " \n",
296 | " 394 | \n",
297 | " 1 | \n",
298 | " 24.00 | \n",
299 | "
\n",
300 | " \n",
301 | " 851 | \n",
302 | " 0 | \n",
303 | " 74.00 | \n",
304 | "
\n",
305 | " \n",
306 | " 373 | \n",
307 | " 0 | \n",
308 | " 22.00 | \n",
309 | "
\n",
310 | " \n",
311 | " 523 | \n",
312 | " 1 | \n",
313 | " 44.00 | \n",
314 | "
\n",
315 | " \n",
316 | " 78 | \n",
317 | " 0 | \n",
318 | " 0.83 | \n",
319 | "
\n",
320 | " \n",
321 | "
\n",
322 | "
"
323 | ],
324 | "text/plain": [
325 | " Sex_binario Age\n",
326 | "394 1 24.00\n",
327 | "851 0 74.00\n",
328 | "373 0 22.00\n",
329 | "523 1 44.00\n",
330 | "78 0 0.83"
331 | ]
332 | },
333 | "execution_count": 49,
334 | "metadata": {},
335 | "output_type": "execute_result"
336 | }
337 | ],
338 | "source": [
339 | "X_treino.head()"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": 50,
345 | "metadata": {},
346 | "outputs": [
347 | {
348 | "data": {
349 | "text/plain": [
350 | "((445, 2), (446, 2), (445,), (446,))"
351 | ]
352 | },
353 | "execution_count": 50,
354 | "metadata": {},
355 | "output_type": "execute_result"
356 | }
357 | ],
358 | "source": [
359 | "X_treino.shape, X_valid.shape, y_treino.shape, y_valid.shape"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": 51,
365 | "metadata": {},
366 | "outputs": [
367 | {
368 | "data": {
369 | "text/plain": [
370 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
371 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
372 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
373 | " min_samples_leaf=1, min_samples_split=2,\n",
374 | " min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,\n",
375 | " oob_score=False, random_state=0, verbose=0, warm_start=False)"
376 | ]
377 | },
378 | "execution_count": 51,
379 | "metadata": {},
380 | "output_type": "execute_result"
381 | }
382 | ],
383 | "source": [
384 | "modelo = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=0)\n",
385 | "modelo.fit(X_treino, y_treino)"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 52,
391 | "metadata": {},
392 | "outputs": [],
393 | "source": [
394 | "p = modelo.predict(X_valid)"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 53,
400 | "metadata": {},
401 | "outputs": [
402 | {
403 | "data": {
404 | "text/plain": [
405 | "0.7466367713004485"
406 | ]
407 | },
408 | "execution_count": 53,
409 | "metadata": {},
410 | "output_type": "execute_result"
411 | }
412 | ],
413 | "source": [
414 | "np.mean(y_valid == p)"
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 54,
420 | "metadata": {},
421 | "outputs": [
422 | {
423 | "data": {
424 | "text/plain": [
425 | "0.7623318385650224"
426 | ]
427 | },
428 | "execution_count": 54,
429 | "metadata": {},
430 | "output_type": "execute_result"
431 | }
432 | ],
433 | "source": [
434 | "p = (X_valid['Sex_binario'] == 1).astype(np.int64)\n",
435 | "np.mean(y_valid == p)"
436 | ]
437 | },
438 | {
439 | "cell_type": "markdown",
440 | "metadata": {},
441 | "source": [
442 | "## Validação cruzada"
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": 55,
448 | "metadata": {},
449 | "outputs": [
450 | {
451 | "data": {
452 | "text/plain": [
453 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
454 | ]
455 | },
456 | "execution_count": 55,
457 | "metadata": {},
458 | "output_type": "execute_result"
459 | }
460 | ],
461 | "source": [
462 | "X_falso"
463 | ]
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": 56,
468 | "metadata": {},
469 | "outputs": [],
470 | "source": [
471 | "from sklearn.model_selection import KFold"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": 57,
477 | "metadata": {},
478 | "outputs": [
479 | {
480 | "data": {
481 | "text/plain": [
482 | "\u001b[0;31mInit signature:\u001b[0m \u001b[0mKFold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_splits\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'warn'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
483 | "\u001b[0;31mDocstring:\u001b[0m \n",
484 | "K-Folds cross-validator\n",
485 | "\n",
486 | "Provides train/test indices to split data in train/test sets. Split\n",
487 | "dataset into k consecutive folds (without shuffling by default).\n",
488 | "\n",
489 | "Each fold is then used once as a validation while the k - 1 remaining\n",
490 | "folds form the training set.\n",
491 | "\n",
492 | "Read more in the :ref:`User Guide `.\n",
493 | "\n",
494 | "Parameters\n",
495 | "----------\n",
496 | "n_splits : int, default=3\n",
497 | " Number of folds. Must be at least 2.\n",
498 | "\n",
499 | " .. versionchanged:: 0.20\n",
500 | " ``n_splits`` default value will change from 3 to 5 in v0.22.\n",
501 | "\n",
502 | "shuffle : boolean, optional\n",
503 | " Whether to shuffle the data before splitting into batches.\n",
504 | "\n",
505 | "random_state : int, RandomState instance or None, optional, default=None\n",
506 | " If int, random_state is the seed used by the random number generator;\n",
507 | " If RandomState instance, random_state is the random number generator;\n",
508 | " If None, the random number generator is the RandomState instance used\n",
509 | " by `np.random`. Used when ``shuffle`` == True.\n",
510 | "\n",
511 | "Examples\n",
512 | "--------\n",
513 | ">>> from sklearn.model_selection import KFold\n",
514 | ">>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n",
515 | ">>> y = np.array([1, 2, 3, 4])\n",
516 | ">>> kf = KFold(n_splits=2)\n",
517 | ">>> kf.get_n_splits(X)\n",
518 | "2\n",
519 | ">>> print(kf) # doctest: +NORMALIZE_WHITESPACE\n",
520 | "KFold(n_splits=2, random_state=None, shuffle=False)\n",
521 | ">>> for train_index, test_index in kf.split(X):\n",
522 | "... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
523 | "... X_train, X_test = X[train_index], X[test_index]\n",
524 | "... y_train, y_test = y[train_index], y[test_index]\n",
525 | "TRAIN: [2 3] TEST: [0 1]\n",
526 | "TRAIN: [0 1] TEST: [2 3]\n",
527 | "\n",
528 | "Notes\n",
529 | "-----\n",
530 | "The first ``n_samples % n_splits`` folds have size\n",
531 | "``n_samples // n_splits + 1``, other folds have size\n",
532 | "``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n",
533 | "\n",
534 | "Randomized CV splitters may return different results for each call of\n",
535 | "split. You can make the results identical by setting ``random_state``\n",
536 | "to an integer.\n",
537 | "\n",
538 | "See also\n",
539 | "--------\n",
540 | "StratifiedKFold\n",
541 | " Takes group information into account to avoid building folds with\n",
542 | " imbalanced class distributions (for binary or multiclass\n",
543 | " classification tasks).\n",
544 | "\n",
545 | "GroupKFold: K-fold iterator variant with non-overlapping groups.\n",
546 | "\n",
547 | "RepeatedKFold: Repeats K-Fold n times.\n",
548 | "\u001b[0;31mFile:\u001b[0m ~/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py\n",
549 | "\u001b[0;31mType:\u001b[0m ABCMeta\n",
550 | "\u001b[0;31mSubclasses:\u001b[0m \n"
551 | ]
552 | },
553 | "metadata": {},
554 | "output_type": "display_data"
555 | }
556 | ],
557 | "source": [
558 | "?KFold"
559 | ]
560 | },
561 | {
562 | "cell_type": "code",
563 | "execution_count": 59,
564 | "metadata": {},
565 | "outputs": [
566 | {
567 | "name": "stdout",
568 | "output_type": "stream",
569 | "text": [
570 | "Treino: [0 1 3 5 6 7]\n",
571 | "Valid: [2 4 8 9]\n",
572 | "\n",
573 | "Treino: [0 2 3 4 5 8 9]\n",
574 | "Valid: [1 6 7]\n",
575 | "\n",
576 | "Treino: [1 2 4 6 7 8 9]\n",
577 | "Valid: [0 3 5]\n",
578 | "\n"
579 | ]
580 | }
581 | ],
582 | "source": [
583 | "kf = KFold(3, shuffle=True, random_state=0)\n",
584 | "for linhas_treino, linhas_valid in kf.split(X_falso):\n",
585 | " print(\"Treino:\", linhas_treino)\n",
586 | " print(\"Valid:\", linhas_valid)\n",
587 | " print()"
588 | ]
589 | },
590 | {
591 | "cell_type": "code",
592 | "execution_count": 72,
593 | "metadata": {},
594 | "outputs": [
595 | {
596 | "name": "stdout",
597 | "output_type": "stream",
598 | "text": [
599 | "Rep: 0\n",
600 | "Treino: 712\n",
601 | "Valid: 179\n",
602 | "Acc: 0.7988826815642458\n",
603 | "\n",
604 | "Treino: 713\n",
605 | "Valid: 178\n",
606 | "Acc: 0.7359550561797753\n",
607 | "\n",
608 | "Treino: 713\n",
609 | "Valid: 178\n",
610 | "Acc: 0.7808988764044944\n",
611 | "\n",
612 | "Treino: 713\n",
613 | "Valid: 178\n",
614 | "Acc: 0.797752808988764\n",
615 | "\n",
616 | "Treino: 713\n",
617 | "Valid: 178\n",
618 | "Acc: 0.7808988764044944\n",
619 | "\n",
620 | "Rep: 1\n",
621 | "Treino: 712\n",
622 | "Valid: 179\n",
623 | "Acc: 0.7374301675977654\n",
624 | "\n",
625 | "Treino: 713\n",
626 | "Valid: 178\n",
627 | "Acc: 0.7247191011235955\n",
628 | "\n",
629 | "Treino: 713\n",
630 | "Valid: 178\n",
631 | "Acc: 0.7808988764044944\n",
632 | "\n",
633 | "Treino: 713\n",
634 | "Valid: 178\n",
635 | "Acc: 0.7921348314606742\n",
636 | "\n",
637 | "Treino: 713\n",
638 | "Valid: 178\n",
639 | "Acc: 0.7921348314606742\n",
640 | "\n",
641 | "Rep: 2\n",
642 | "Treino: 712\n",
643 | "Valid: 179\n",
644 | "Acc: 0.7653631284916201\n",
645 | "\n",
646 | "Treino: 713\n",
647 | "Valid: 178\n",
648 | "Acc: 0.7865168539325843\n",
649 | "\n",
650 | "Treino: 713\n",
651 | "Valid: 178\n",
652 | "Acc: 0.7865168539325843\n",
653 | "\n",
654 | "Treino: 713\n",
655 | "Valid: 178\n",
656 | "Acc: 0.7808988764044944\n",
657 | "\n",
658 | "Treino: 713\n",
659 | "Valid: 178\n",
660 | "Acc: 0.7640449438202247\n",
661 | "\n",
662 | "Rep: 3\n",
663 | "Treino: 712\n",
664 | "Valid: 179\n",
665 | "Acc: 0.7653631284916201\n",
666 | "\n",
667 | "Treino: 713\n",
668 | "Valid: 178\n",
669 | "Acc: 0.7471910112359551\n",
670 | "\n",
671 | "Treino: 713\n",
672 | "Valid: 178\n",
673 | "Acc: 0.7808988764044944\n",
674 | "\n",
675 | "Treino: 713\n",
676 | "Valid: 178\n",
677 | "Acc: 0.7415730337078652\n",
678 | "\n",
679 | "Treino: 713\n",
680 | "Valid: 178\n",
681 | "Acc: 0.8202247191011236\n",
682 | "\n",
683 | "Rep: 4\n",
684 | "Treino: 712\n",
685 | "Valid: 179\n",
686 | "Acc: 0.7988826815642458\n",
687 | "\n",
688 | "Treino: 713\n",
689 | "Valid: 178\n",
690 | "Acc: 0.797752808988764\n",
691 | "\n",
692 | "Treino: 713\n",
693 | "Valid: 178\n",
694 | "Acc: 0.7752808988764045\n",
695 | "\n",
696 | "Treino: 713\n",
697 | "Valid: 178\n",
698 | "Acc: 0.7415730337078652\n",
699 | "\n",
700 | "Treino: 713\n",
701 | "Valid: 178\n",
702 | "Acc: 0.7471910112359551\n",
703 | "\n",
704 | "Rep: 5\n",
705 | "Treino: 712\n",
706 | "Valid: 179\n",
707 | "Acc: 0.7653631284916201\n",
708 | "\n",
709 | "Treino: 713\n",
710 | "Valid: 178\n",
711 | "Acc: 0.7415730337078652\n",
712 | "\n",
713 | "Treino: 713\n",
714 | "Valid: 178\n",
715 | "Acc: 0.8370786516853933\n",
716 | "\n",
717 | "Treino: 713\n",
718 | "Valid: 178\n",
719 | "Acc: 0.7471910112359551\n",
720 | "\n",
721 | "Treino: 713\n",
722 | "Valid: 178\n",
723 | "Acc: 0.702247191011236\n",
724 | "\n",
725 | "Rep: 6\n",
726 | "Treino: 712\n",
727 | "Valid: 179\n",
728 | "Acc: 0.7821229050279329\n",
729 | "\n",
730 | "Treino: 713\n",
731 | "Valid: 178\n",
732 | "Acc: 0.8146067415730337\n",
733 | "\n",
734 | "Treino: 713\n",
735 | "Valid: 178\n",
736 | "Acc: 0.7752808988764045\n",
737 | "\n",
738 | "Treino: 713\n",
739 | "Valid: 178\n",
740 | "Acc: 0.7134831460674157\n",
741 | "\n",
742 | "Treino: 713\n",
743 | "Valid: 178\n",
744 | "Acc: 0.7247191011235955\n",
745 | "\n",
746 | "Rep: 7\n",
747 | "Treino: 712\n",
748 | "Valid: 179\n",
749 | "Acc: 0.7150837988826816\n",
750 | "\n",
751 | "Treino: 713\n",
752 | "Valid: 178\n",
753 | "Acc: 0.7247191011235955\n",
754 | "\n",
755 | "Treino: 713\n",
756 | "Valid: 178\n",
757 | "Acc: 0.8370786516853933\n",
758 | "\n",
759 | "Treino: 713\n",
760 | "Valid: 178\n",
761 | "Acc: 0.8033707865168539\n",
762 | "\n",
763 | "Treino: 713\n",
764 | "Valid: 178\n",
765 | "Acc: 0.7808988764044944\n",
766 | "\n",
767 | "Rep: 8\n",
768 | "Treino: 712\n",
769 | "Valid: 179\n",
770 | "Acc: 0.7597765363128491\n",
771 | "\n",
772 | "Treino: 713\n",
773 | "Valid: 178\n",
774 | "Acc: 0.7921348314606742\n",
775 | "\n",
776 | "Treino: 713\n",
777 | "Valid: 178\n",
778 | "Acc: 0.7921348314606742\n",
779 | "\n",
780 | "Treino: 713\n",
781 | "Valid: 178\n",
782 | "Acc: 0.7696629213483146\n",
783 | "\n",
784 | "Treino: 713\n",
785 | "Valid: 178\n",
786 | "Acc: 0.7640449438202247\n",
787 | "\n",
788 | "Rep: 9\n",
789 | "Treino: 712\n",
790 | "Valid: 179\n",
791 | "Acc: 0.7039106145251397\n",
792 | "\n",
793 | "Treino: 713\n",
794 | "Valid: 178\n",
795 | "Acc: 0.7584269662921348\n",
796 | "\n",
797 | "Treino: 713\n",
798 | "Valid: 178\n",
799 | "Acc: 0.7415730337078652\n",
800 | "\n",
801 | "Treino: 713\n",
802 | "Valid: 178\n",
803 | "Acc: 0.8033707865168539\n",
804 | "\n",
805 | "Treino: 713\n",
806 | "Valid: 178\n",
807 | "Acc: 0.7921348314606742\n",
808 | "\n"
809 | ]
810 | }
811 | ],
812 | "source": [
813 | "resultados = []\n",
814 | "for rep in range(10):\n",
815 | " print(\"Rep:\", rep)\n",
816 | " kf = KFold(5, shuffle=True, random_state=rep)\n",
817 | " \n",
818 | " for linhas_treino, linhas_valid in kf.split(X):\n",
819 | " print(\"Treino:\", linhas_treino.shape[0])\n",
820 | " print(\"Valid:\", linhas_valid.shape[0])\n",
821 | "\n",
822 | " X_treino, X_valid = X.iloc[linhas_treino], X.iloc[linhas_valid]\n",
823 | " y_treino, y_valid = y.iloc[linhas_treino], y.iloc[linhas_valid]\n",
824 | "\n",
825 | " modelo = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=0)\n",
826 | " modelo.fit(X_treino, y_treino)\n",
827 | "\n",
828 | " p = modelo.predict(X_valid)\n",
829 | "\n",
830 | " acc = np.mean(y_valid == p)\n",
831 | " resultados.append(acc)\n",
832 | " print(\"Acc:\", acc)\n",
833 | " print()\n",
834 | " #print(X_treino.head())\n",
835 | " #print()"
836 | ]
837 | },
838 | {
839 | "cell_type": "code",
840 | "execution_count": 73,
841 | "metadata": {},
842 | "outputs": [
843 | {
844 | "data": {
845 | "text/plain": [
846 | "50"
847 | ]
848 | },
849 | "execution_count": 73,
850 | "metadata": {},
851 | "output_type": "execute_result"
852 | }
853 | ],
854 | "source": [
855 | "len(resultados)"
856 | ]
857 | },
858 | {
859 | "cell_type": "code",
860 | "execution_count": 74,
861 | "metadata": {},
862 | "outputs": [
863 | {
864 | "data": {
865 | "text/plain": [
866 | "0.7692593057560732"
867 | ]
868 | },
869 | "execution_count": 74,
870 | "metadata": {},
871 | "output_type": "execute_result"
872 | }
873 | ],
874 | "source": [
875 | "np.mean(resultados)"
876 | ]
877 | },
878 | {
879 | "cell_type": "markdown",
880 | "metadata": {},
881 | "source": [
882 | "## Criar submission"
883 | ]
884 | },
885 | {
886 | "cell_type": "code",
887 | "execution_count": 92,
888 | "metadata": {},
889 | "outputs": [
890 | {
891 | "data": {
892 | "text/plain": [
893 | "(418,)"
894 | ]
895 | },
896 | "execution_count": 92,
897 | "metadata": {},
898 | "output_type": "execute_result"
899 | }
900 | ],
901 | "source": [
902 | "sub = pd.Series(p, index=test['PassengerId'], name='Survived')\n",
903 | "sub.shape"
904 | ]
905 | },
906 | {
907 | "cell_type": "code",
908 | "execution_count": 93,
909 | "metadata": {},
910 | "outputs": [],
911 | "source": [
912 | "sub.to_csv(\"primeiro_modelo.csv\", header=True)"
913 | ]
914 | },
915 | {
916 | "cell_type": "code",
917 | "execution_count": 94,
918 | "metadata": {},
919 | "outputs": [
920 | {
921 | "name": "stdout",
922 | "output_type": "stream",
923 | "text": [
924 | "PassengerId,Survived\n",
925 | "892,0\n",
926 | "893,1\n",
927 | "894,0\n",
928 | "895,1\n",
929 | "896,1\n",
930 | "897,0\n",
931 | "898,1\n",
932 | "899,0\n",
933 | "900,1\n"
934 | ]
935 | }
936 | ],
937 | "source": [
938 | "!head -n10 primeiro_modelo.csv"
939 | ]
940 | },
941 | {
942 | "cell_type": "code",
943 | "execution_count": null,
944 | "metadata": {},
945 | "outputs": [],
946 | "source": []
947 | }
948 | ],
949 | "metadata": {
950 | "kernelspec": {
951 | "display_name": "Python 3",
952 | "language": "python",
953 | "name": "python3"
954 | },
955 | "language_info": {
956 | "codemirror_mode": {
957 | "name": "ipython",
958 | "version": 3
959 | },
960 | "file_extension": ".py",
961 | "mimetype": "text/x-python",
962 | "name": "python",
963 | "nbconvert_exporter": "python",
964 | "pygments_lexer": "ipython3",
965 | "version": "3.7.3"
966 | }
967 | },
968 | "nbformat": 4,
969 | "nbformat_minor": 4
970 | }
971 |
--------------------------------------------------------------------------------
/titanic_video5.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 3,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "def transformar_sexo(valor):\n",
20 | " if valor == 'female':\n",
21 | " return 1\n",
22 | " else:\n",
23 | " return 0"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 28,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "train = pd.read_csv(\"train.csv\")\n",
33 | "test = pd.read_csv(\"test.csv\")\n",
34 | "\n",
35 | "train['Sex_binario'] = train['Sex'].map(transformar_sexo)\n",
36 | "test['Sex_binario'] = test['Sex'].map(transformar_sexo)\n",
37 | "\n",
38 | "variaveis = ['Sex_binario', 'Age']\n",
39 | "\n",
40 | "X = train[variaveis].fillna(-1)\n",
41 | "y = train['Survived']"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 5,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/html": [
52 | "\n",
53 | "\n",
66 | "
\n",
67 | " \n",
68 | " \n",
69 | " | \n",
70 | " PassengerId | \n",
71 | " Survived | \n",
72 | " Pclass | \n",
73 | " Name | \n",
74 | " Sex | \n",
75 | " Age | \n",
76 | " SibSp | \n",
77 | " Parch | \n",
78 | " Ticket | \n",
79 | " Fare | \n",
80 | " Cabin | \n",
81 | " Embarked | \n",
82 | " Sex_binario | \n",
83 | "
\n",
84 | " \n",
85 | " \n",
86 | " \n",
87 | " 0 | \n",
88 | " 1 | \n",
89 | " 0 | \n",
90 | " 3 | \n",
91 | " Braund, Mr. Owen Harris | \n",
92 | " male | \n",
93 | " 22.0 | \n",
94 | " 1 | \n",
95 | " 0 | \n",
96 | " A/5 21171 | \n",
97 | " 7.2500 | \n",
98 | " NaN | \n",
99 | " S | \n",
100 | " 0 | \n",
101 | "
\n",
102 | " \n",
103 | " 1 | \n",
104 | " 2 | \n",
105 | " 1 | \n",
106 | " 1 | \n",
107 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
108 | " female | \n",
109 | " 38.0 | \n",
110 | " 1 | \n",
111 | " 0 | \n",
112 | " PC 17599 | \n",
113 | " 71.2833 | \n",
114 | " C85 | \n",
115 | " C | \n",
116 | " 1 | \n",
117 | "
\n",
118 | " \n",
119 | " 2 | \n",
120 | " 3 | \n",
121 | " 1 | \n",
122 | " 3 | \n",
123 | " Heikkinen, Miss. Laina | \n",
124 | " female | \n",
125 | " 26.0 | \n",
126 | " 0 | \n",
127 | " 0 | \n",
128 | " STON/O2. 3101282 | \n",
129 | " 7.9250 | \n",
130 | " NaN | \n",
131 | " S | \n",
132 | " 1 | \n",
133 | "
\n",
134 | " \n",
135 | " 3 | \n",
136 | " 4 | \n",
137 | " 1 | \n",
138 | " 1 | \n",
139 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
140 | " female | \n",
141 | " 35.0 | \n",
142 | " 1 | \n",
143 | " 0 | \n",
144 | " 113803 | \n",
145 | " 53.1000 | \n",
146 | " C123 | \n",
147 | " S | \n",
148 | " 1 | \n",
149 | "
\n",
150 | " \n",
151 | " 4 | \n",
152 | " 5 | \n",
153 | " 0 | \n",
154 | " 3 | \n",
155 | " Allen, Mr. William Henry | \n",
156 | " male | \n",
157 | " 35.0 | \n",
158 | " 0 | \n",
159 | " 0 | \n",
160 | " 373450 | \n",
161 | " 8.0500 | \n",
162 | " NaN | \n",
163 | " S | \n",
164 | " 0 | \n",
165 | "
\n",
166 | " \n",
167 | "
\n",
168 | "
"
169 | ],
170 | "text/plain": [
171 | " PassengerId Survived Pclass \\\n",
172 | "0 1 0 3 \n",
173 | "1 2 1 1 \n",
174 | "2 3 1 3 \n",
175 | "3 4 1 1 \n",
176 | "4 5 0 3 \n",
177 | "\n",
178 | " Name Sex Age SibSp \\\n",
179 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
180 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
181 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
182 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
183 | "4 Allen, Mr. William Henry male 35.0 0 \n",
184 | "\n",
185 | " Parch Ticket Fare Cabin Embarked Sex_binario \n",
186 | "0 0 A/5 21171 7.2500 NaN S 0 \n",
187 | "1 0 PC 17599 71.2833 C85 C 1 \n",
188 | "2 0 STON/O2. 3101282 7.9250 NaN S 1 \n",
189 | "3 0 113803 53.1000 C123 S 1 \n",
190 | "4 0 373450 8.0500 NaN S 0 "
191 | ]
192 | },
193 | "execution_count": 5,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "train.head()"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 6,
205 | "metadata": {},
206 | "outputs": [],
207 | "source": [
208 | "from sklearn.ensemble import RandomForestClassifier\n",
209 | "from sklearn.model_selection import RepeatedKFold"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": null,
215 | "metadata": {},
216 | "outputs": [],
217 | "source": [
218 | "resultados = []\n",
219 | "kf = RepeatedKFold(n_splits=2, n_repeats=10, random_state=10)\n",
220 | "\n",
221 | "for linhas_treino, linhas_valid in kf.split(X):\n",
222 | " print(\"Treino:\", linhas_treino.shape[0])\n",
223 | " print(\"Valid:\", linhas_valid.shape[0])\n",
224 | "\n",
225 | " X_treino, X_valid = X.iloc[linhas_treino], X.iloc[linhas_valid]\n",
226 | " y_treino, y_valid = y.iloc[linhas_treino], y.iloc[linhas_valid]\n",
227 | "\n",
228 | " modelo = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=0)\n",
229 | " modelo.fit(X_treino, y_treino)\n",
230 | "\n",
231 | " p = modelo.predict(X_valid)\n",
232 | "\n",
233 | " acc = np.mean(y_valid == p)\n",
234 | " resultados.append(acc)\n",
235 | " print(\"Acc:\", acc)\n",
236 | " print()\n",
237 | " #print(X_treino.head())\n",
238 | " #print()"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 12,
244 | "metadata": {},
245 | "outputs": [
246 | {
247 | "name": "stdout",
248 | "output_type": "stream",
249 | "text": [
250 | "Populating the interactive namespace from numpy and matplotlib\n"
251 | ]
252 | },
253 | {
254 | "name": "stderr",
255 | "output_type": "stream",
256 | "text": [
257 | "/Users/mario/anaconda3/lib/python3.7/site-packages/IPython/core/magics/pylab.py:160: UserWarning: pylab import has clobbered these variables: ['test']\n",
258 | "`%matplotlib` prevents importing * from pylab and numpy\n",
259 | " \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"
260 | ]
261 | }
262 | ],
263 | "source": [
264 | "%matplotlib inline\n",
265 | "%pylab inline"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 14,
271 | "metadata": {},
272 | "outputs": [
273 | {
274 | "data": {
275 | "text/plain": [
276 | "0.759601451100922"
277 | ]
278 | },
279 | "execution_count": 14,
280 | "metadata": {},
281 | "output_type": "execute_result"
282 | }
283 | ],
284 | "source": [
285 | "np.mean(resultados)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 13,
291 | "metadata": {},
292 | "outputs": [
293 | {
294 | "data": {
295 | "text/plain": [
296 | "(array([2., 1., 3., 6., 4., 2., 1., 0., 0., 1.]),\n",
297 | " array([0.73542601, 0.74165869, 0.74789137, 0.75412405, 0.76035673,\n",
298 | " 0.76658941, 0.77282209, 0.77905477, 0.78528745, 0.79152013,\n",
299 | " 0.79775281]),\n",
300 | " )"
301 | ]
302 | },
303 | "execution_count": 13,
304 | "metadata": {},
305 | "output_type": "execute_result"
306 | },
307 | {
308 | "data": {
309 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXEAAAD4CAYAAAAaT9YAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAM/UlEQVR4nO3cbYxc91WA8efUm6iJ6zalHiKIu91GVIFQqY21SlUsWZAWcOo2VSU+JFKRGoEWpBC5gCiGT0V8SQRCRWoVZCV9kZoXFSeRSixCitoABWrwJg6x61Sk7pa6abGjUNoEidTp4cNcR2tnd+fOztyZPcvzk1aemb0zc/4768fXd14iM5Ek1fSqaQ8gSVo/Iy5JhRlxSSrMiEtSYUZckgqb6eJGt2/fnnNzc13ctCRtSouLi89mZm/Y63US8bm5OY4cOdLFTUvSphQR31zP9TycIkmFGXFJKsyIS1JhRlySCjPiklSYEZekwlpFPCIui4iDEfFURJyIiHd2PZgkabC2rxP/c+DhzPyViLgYuLTDmSRJLQ2MeES8FtgNfAggM18EXux2LElSG232xK8EzgCfioi3AYvAvsx8YflGEbEALADMzs6Oe051YG7/oand99Jte6d239Jm0uaY+AywE7gjM68BXgD2X7hRZh7IzPnMnO/1hn77vyRpHdpE/BRwKjMPN+cP0o+6JGnKBkY8M78LfCsirmouehfw1U6nkiS10vbVKbcCdzevTDkJ3NzdSJKktlpFPDOPAvMdzyJJGpLv2JSkwoy4JBVmxCWpMCMuSYUZcUkqzIhLUmFGXJIKM+KSVJgRl6TCjLgkFWbEJakwIy5JhRlxSSrMiEtSYUZckgoz4pJUmBGXpMKMuCQVZsQlqTAjLkmFGXFJKsyIS1JhRlySCjPiklSYEZekwmbabBQRS8APgJeAs5k53+VQkqR2WkW88QuZ+Wxnk0iShubhFEkqrG3EE3gkIhYjYmGlDSJiISKORMSRM2fOjG9CSdKq2kZ8V2buBK4HbomI3RdukJkHMnM+M+d7vd5Yh5QkraxVxDPzmebP08CDwLVdDiVJamdgxCNia0RsO3ca+CXgWNeDSZIGa/PqlMuBByPi3Pb3ZObDnU4lSWplYMQz8yTwtgnMIkkaki8xlKTCjLgkFWbEJakwIy5JhRlxSSrMiEtSYUZckgoz4pJUmBGXpMKMuCQVZsQlqTAjLkmFGXFJKsyIS1JhRlySCjPiklSYEZekwoy4JBVmxCWpMCMuSYUZcUkqzIhLUmFGXJIKM+KSVJgRl6TCjLgkFdY64hGxJSIej4iHuhxIktTeMHvi+4ATXQ0iSRpeq4hHxA5gL3Bnt+NIkoYx03K7jwEfAbattkFELAALALOzs6NPpk1tbv+hqdzv0m17p3K/UlcG7olHxHuB05m5uNZ2mXkgM+czc77X641tQEnS6tocTtkF3BARS8B9wHUR8dlOp5IktTIw4pn5B5m5IzPngBuBL2bmBzufTJI0kK8Tl6TC2j6xCUBmPgo82skkkqShuScuSYUZcUkqzIhLUmFGXJIKM+KSVJgRl6TCjLgkFWbEJakwIy5JhRlxSSrMiEtSYUZckgoz4pJUmBGXpMKMuCQVZsQlqTAjLkmFGXFJKsyIS1JhRlySCjPiklSYEZekwoy4JBVmxCWpMCMuSYUNjHhEvDoi/iUinoiI4xHxR5MYTJI02EyLbf4XuC4zn4+Ii4AvR8RfZ+ZXOp5NkjTAwIhnZgLPN2cvar6yy6EkSe202RMnIrYAi8BPAZ/IzMMrbLMALADMzs6Oc8ZNb27/oWmPIKmoVk9sZuZLmfl2YAdwbUS8dYVtDmTmfGbO93q9cc8pSVrBUK9OyczvAY8CezqZRpI0lDavTulFxGXN6UuAdwNPdT2YJGmwNsfEfwL4THNc/FXA5zLzoW7HkiS10ebVKf8GXDOBWSRJQ/Idm5JUmBGXpMKMuCQVZsQlqTAjLkmFGXFJKsyIS1JhRlySCjPiklSYEZekwoy4JBVmxCWpMCMuSYUZcUkqzIhLUmFGXJIKM+KSVJgRl6TCjLgkFWbEJakwIy5JhRlxSSrMiEtSYUZckgoz4pJUmBGXpMIGRjwi3hgRX4qIExFxPCL2TWIwSdJgMy22OQv8bmY+FhHbgMWI+EJmfrXj2SRJAwzcE8/M72TmY83pHwAngCu6HkySNFibPfGXRcQccA1weIXvLQALALOzs+seaG7/oXVfdxRLt+2dyv1qsvz90mbT+onNiHgNcD/w4cz8/oXfz8wDmTmfmfO9Xm+cM0qSVtEq4hFxEf2A352ZD3Q7kiSprTavTgngLuBEZv5Z9yNJktpqsye+C/hV4LqIONp8vafjuSRJLQx8YjMzvwzEBGaRJA3Jd2xKUmFGXJIKM+KSVJgRl6TCjLgkFWbEJakwIy5JhRlxSSrMiEtSYUZckgoz4pJUmBGXpMKMuCQVZsQlqTAjLkmFGXFJKsyIS1JhRlySCjPiklSYEZekwoy4JBVmxCWpMCMuSYUZcUkqzIhLUmEDIx4Rn4yI0xFxbBIDSZLaa7Mn/mlgT8dzSJLWYWDEM/PvgecmMIskaUgz47qhiFgAFgBmZ2fHdbMTM7f/0LRH0CY2zd+vpdv2Tu2+p2VaP+9p/KzH9sRmZh7IzPnMnO/1euO6WUnSGnx1iiQVZsQlqbA2LzG8F/hn4KqIOBURv9b9WJKkNgY+sZmZN01iEEnS8DycIkmFGXFJKsyIS1JhRlySCjPiklSYEZekwoy4JBVmxCWpMCMuSYUZcUkqzIhLUmFGXJIKM+KSVJgRl6TCjLgkFWbEJakwIy5JhRlxSSrMiEtSYUZckgoz4pJUmBGXpMKMuCQVZsQlqTAjLkmFGXFJKqxVxCNiT0R8LSKejoj9XQ8lSWpnYMQjYgvwCeB64Grgpoi4uuvBJEmDtdkTvxZ4OjNPZuaLwH3A+7sdS5LUxkyLba4AvrXs/CngHRduFBELwEJz9vmI+Nro443VduDZaQ8xItewMZRaQ9z+iotKzb+KDbmGFX7Wa7lwDW9az322iXiscFm+4oLMA8CB9QwxCRFxJDPnpz3HKFzDxlB9DdXnB9ewXJvDKaeANy47vwN4ZtQ7liSNrk3E/xV4S0S8OSIuBm4EPt/tWJKkNgYeTsnMsxHxW8DfAFuAT2bm8c4nG78Ne6hnCK5hY6i+hurzg2t4WWS+4vC2JKkI37EpSYUZcUkqbFNEfNDHAkTE70XE0ebrWES8FBE/tuz7WyLi8Yh4aLKTv3z/654/IpYi4snme0cmP/3LM46yhssi4mBEPBURJyLinZNfwfrXEBFXLbv8aER8PyI+XGkNzfd+OyKON5ffGxGvnvwKRl7Dvuay49N6DJo5Bq3hdRHxVxHxRDPrzW2v+wqZWfqL/pOtXweuBC4GngCuXmP79wFfvOCy3wHuAR6qNj+wBGyv/BgAnwF+vTl9MXBZtTVccDvfBd5UaQ3039T3DeCS5vzngA8VW8NbgWPApfRftPG3wFs24hqAPwRub073gOeabYdaf2Zuij3xYT8W4Cbg3nNnImIHsBe4s9MpVzfS/BvEutcQEa8FdgN3AWTmi5n5vY7nXcm4Hod3AV/PzG92MOMgo65hBrgkImboh3Aa7wcZZQ0/A3wlM/8nM88Cfwd8oNNpV9ZmDQlsi4gAXkM/4mdbXvc8myHiK30swBUrbRgRlwJ7gPuXXfwx4CPAj7oacIBR50/gkYhYbD76YBpGWcOVwBngU80hrTsjYmuXw65i1MfhnBuZ3j+y615DZn4b+FPgP4DvAP+dmY90Ou3KRnkcjgG7I+INzffew/lvVJyUNmv4OP1/dJ4BngT2ZeaPWl73PJsh4q0+FqDxPuAfM/M5gIh4L3A6Mxe7Gq6Fdc/f2JWZO+l/yuQtEbF73AO2MMoaZoCdwB2ZeQ3wAjCNjzse9XGgeTPcDcBfjnm2tkb5u/B6+nt8bwZ+EtgaER/sZMq1rXsNmXkCuB34AvAw/UMRZ7sYcoA2a/hl4Cj9n/XbgY83/ysdZv3A5oj4MB8LcOFe0i7ghohYov/flusi4rNdDLmGUeYnM59p/jwNPEj/v2OTNsoaTgGnMvNwc/4g/ahP2kiPQ+N64LHM/M8xz9bWKGt4N/CNzDyTmT8EHgB+rpMp1zbq34e7MnNnZu6mf4ji3zuZcm1t1nAz8ED2PU3/+Yifbnnd8036oH8HTyLMACfp70GceyLgZ1fY7nX0H9Stq9zOzzOdJzbXPT+wFdi27PQ/AXsqraG5/B+Aq5rTHwX+pNoamu/dB9w86dnH9Lv0DuA4/WPhQf/J5lsrraG5/MebP2eBp4DXb8Q1AHcAH21OXw58m/6nGrZa//KvNp9iuKHlKh8LEBG/2Xz/L5pNPwA8kpkvTGnUFY04/+XAg/3nRpgB7snMhyc3fd8YHoNbgbubwxEn6e+lTNSoa2iOwf4i8BsTHPs8o6whMw9HxEHgMfqHIB5nCm9tH8Pv0v0R8Qbgh8Atmflfk5r9nJZr+GPg0xHxJP1/NH8/M58FWOm6a92fb7uXpMI2wzFxSfp/y4hLUmFGXJIKM+KSVJgRl6TCjLgkFWbEJamw/wPRiHJduTn4JQAAAABJRU5ErkJggg==\n",
310 | "text/plain": [
311 | ""
312 | ]
313 | },
314 | "metadata": {
315 | "needs_background": "light"
316 | },
317 | "output_type": "display_data"
318 | }
319 | ],
320 | "source": [
321 | "pylab.hist(resultados)"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | "# Novas variáveis"
329 | ]
330 | },
331 | {
332 | "cell_type": "markdown",
333 | "metadata": {},
334 | "source": [
335 | "modelo anterior = 0.759601451100922"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": 15,
341 | "metadata": {},
342 | "outputs": [
343 | {
344 | "data": {
345 | "text/html": [
346 | "\n",
347 | "\n",
360 | "
\n",
361 | " \n",
362 | " \n",
363 | " | \n",
364 | " PassengerId | \n",
365 | " Survived | \n",
366 | " Pclass | \n",
367 | " Name | \n",
368 | " Sex | \n",
369 | " Age | \n",
370 | " SibSp | \n",
371 | " Parch | \n",
372 | " Ticket | \n",
373 | " Fare | \n",
374 | " Cabin | \n",
375 | " Embarked | \n",
376 | " Sex_binario | \n",
377 | "
\n",
378 | " \n",
379 | " \n",
380 | " \n",
381 | " 0 | \n",
382 | " 1 | \n",
383 | " 0 | \n",
384 | " 3 | \n",
385 | " Braund, Mr. Owen Harris | \n",
386 | " male | \n",
387 | " 22.0 | \n",
388 | " 1 | \n",
389 | " 0 | \n",
390 | " A/5 21171 | \n",
391 | " 7.2500 | \n",
392 | " NaN | \n",
393 | " S | \n",
394 | " 0 | \n",
395 | "
\n",
396 | " \n",
397 | " 1 | \n",
398 | " 2 | \n",
399 | " 1 | \n",
400 | " 1 | \n",
401 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
402 | " female | \n",
403 | " 38.0 | \n",
404 | " 1 | \n",
405 | " 0 | \n",
406 | " PC 17599 | \n",
407 | " 71.2833 | \n",
408 | " C85 | \n",
409 | " C | \n",
410 | " 1 | \n",
411 | "
\n",
412 | " \n",
413 | " 2 | \n",
414 | " 3 | \n",
415 | " 1 | \n",
416 | " 3 | \n",
417 | " Heikkinen, Miss. Laina | \n",
418 | " female | \n",
419 | " 26.0 | \n",
420 | " 0 | \n",
421 | " 0 | \n",
422 | " STON/O2. 3101282 | \n",
423 | " 7.9250 | \n",
424 | " NaN | \n",
425 | " S | \n",
426 | " 1 | \n",
427 | "
\n",
428 | " \n",
429 | " 3 | \n",
430 | " 4 | \n",
431 | " 1 | \n",
432 | " 1 | \n",
433 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
434 | " female | \n",
435 | " 35.0 | \n",
436 | " 1 | \n",
437 | " 0 | \n",
438 | " 113803 | \n",
439 | " 53.1000 | \n",
440 | " C123 | \n",
441 | " S | \n",
442 | " 1 | \n",
443 | "
\n",
444 | " \n",
445 | " 4 | \n",
446 | " 5 | \n",
447 | " 0 | \n",
448 | " 3 | \n",
449 | " Allen, Mr. William Henry | \n",
450 | " male | \n",
451 | " 35.0 | \n",
452 | " 0 | \n",
453 | " 0 | \n",
454 | " 373450 | \n",
455 | " 8.0500 | \n",
456 | " NaN | \n",
457 | " S | \n",
458 | " 0 | \n",
459 | "
\n",
460 | " \n",
461 | "
\n",
462 | "
"
463 | ],
464 | "text/plain": [
465 | " PassengerId Survived Pclass \\\n",
466 | "0 1 0 3 \n",
467 | "1 2 1 1 \n",
468 | "2 3 1 3 \n",
469 | "3 4 1 1 \n",
470 | "4 5 0 3 \n",
471 | "\n",
472 | " Name Sex Age SibSp \\\n",
473 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
474 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
475 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
476 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
477 | "4 Allen, Mr. William Henry male 35.0 0 \n",
478 | "\n",
479 | " Parch Ticket Fare Cabin Embarked Sex_binario \n",
480 | "0 0 A/5 21171 7.2500 NaN S 0 \n",
481 | "1 0 PC 17599 71.2833 C85 C 1 \n",
482 | "2 0 STON/O2. 3101282 7.9250 NaN S 1 \n",
483 | "3 0 113803 53.1000 C123 S 1 \n",
484 | "4 0 373450 8.0500 NaN S 0 "
485 | ]
486 | },
487 | "execution_count": 15,
488 | "metadata": {},
489 | "output_type": "execute_result"
490 | }
491 | ],
492 | "source": [
493 | "train.head()"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": 29,
499 | "metadata": {},
500 | "outputs": [],
501 | "source": [
502 | "variaveis = ['Sex_binario', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']"
503 | ]
504 | },
505 | {
506 | "cell_type": "code",
507 | "execution_count": 30,
508 | "metadata": {},
509 | "outputs": [],
510 | "source": [
511 | "X = train[variaveis].fillna(-1)\n",
512 | "y = train['Survived']"
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": 19,
518 | "metadata": {},
519 | "outputs": [
520 | {
521 | "name": "stdout",
522 | "output_type": "stream",
523 | "text": [
524 | "Treino: 445\n",
525 | "Valid: 446\n",
526 | "Acc: 0.7869955156950673\n",
527 | "\n",
528 | "Treino: 446\n",
529 | "Valid: 445\n",
530 | "Acc: 0.7797752808988764\n",
531 | "\n",
532 | "Treino: 445\n",
533 | "Valid: 446\n",
534 | "Acc: 0.827354260089686\n",
535 | "\n",
536 | "Treino: 446\n",
537 | "Valid: 445\n",
538 | "Acc: 0.8179775280898877\n",
539 | "\n",
540 | "Treino: 445\n",
541 | "Valid: 446\n",
542 | "Acc: 0.7847533632286996\n",
543 | "\n",
544 | "Treino: 446\n",
545 | "Valid: 445\n",
546 | "Acc: 0.7842696629213484\n",
547 | "\n",
548 | "Treino: 445\n",
549 | "Valid: 446\n",
550 | "Acc: 0.8161434977578476\n",
551 | "\n",
552 | "Treino: 446\n",
553 | "Valid: 445\n",
554 | "Acc: 0.7842696629213484\n",
555 | "\n",
556 | "Treino: 445\n",
557 | "Valid: 446\n",
558 | "Acc: 0.8004484304932735\n",
559 | "\n",
560 | "Treino: 446\n",
561 | "Valid: 445\n",
562 | "Acc: 0.8\n",
563 | "\n",
564 | "Treino: 445\n",
565 | "Valid: 446\n",
566 | "Acc: 0.8183856502242153\n",
567 | "\n",
568 | "Treino: 446\n",
569 | "Valid: 445\n",
570 | "Acc: 0.802247191011236\n",
571 | "\n",
572 | "Treino: 445\n",
573 | "Valid: 446\n",
574 | "Acc: 0.8116591928251121\n",
575 | "\n",
576 | "Treino: 446\n",
577 | "Valid: 445\n",
578 | "Acc: 0.8067415730337079\n",
579 | "\n",
580 | "Treino: 445\n",
581 | "Valid: 446\n",
582 | "Acc: 0.820627802690583\n",
583 | "\n",
584 | "Treino: 446\n",
585 | "Valid: 445\n",
586 | "Acc: 0.7887640449438202\n",
587 | "\n",
588 | "Treino: 445\n",
589 | "Valid: 446\n",
590 | "Acc: 0.8385650224215246\n",
591 | "\n",
592 | "Treino: 446\n",
593 | "Valid: 445\n",
594 | "Acc: 0.8044943820224719\n",
595 | "\n",
596 | "Treino: 445\n",
597 | "Valid: 446\n",
598 | "Acc: 0.7982062780269058\n",
599 | "\n",
600 | "Treino: 446\n",
601 | "Valid: 445\n",
602 | "Acc: 0.8112359550561797\n",
603 | "\n"
604 | ]
605 | }
606 | ],
607 | "source": [
608 | "resultados = []\n",
609 | "kf = RepeatedKFold(n_splits=2, n_repeats=10, random_state=10)\n",
610 | "\n",
611 | "for linhas_treino, linhas_valid in kf.split(X):\n",
612 | " print(\"Treino:\", linhas_treino.shape[0])\n",
613 | " print(\"Valid:\", linhas_valid.shape[0])\n",
614 | "\n",
615 | " X_treino, X_valid = X.iloc[linhas_treino], X.iloc[linhas_valid]\n",
616 | " y_treino, y_valid = y.iloc[linhas_treino], y.iloc[linhas_valid]\n",
617 | "\n",
618 | " modelo = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=0)\n",
619 | " modelo.fit(X_treino, y_treino)\n",
620 | "\n",
621 | " p = modelo.predict(X_valid)\n",
622 | "\n",
623 | " acc = np.mean(y_valid == p)\n",
624 | " resultados.append(acc)\n",
625 | " print(\"Acc:\", acc)\n",
626 | " print()\n",
627 | " #print(X_treino.head())\n",
628 | " #print()"
629 | ]
630 | },
631 | {
632 | "cell_type": "code",
633 | "execution_count": 20,
634 | "metadata": {},
635 | "outputs": [
636 | {
637 | "data": {
638 | "text/plain": [
639 | "(array([4., 2., 0., 4., 2., 2., 4., 0., 1., 1.]),\n",
640 | " array([0.77977528, 0.78565426, 0.79153323, 0.7974122 , 0.80329118,\n",
641 | " 0.80917015, 0.81504913, 0.8209281 , 0.82680707, 0.83268605,\n",
642 | " 0.83856502]),\n",
643 | " )"
644 | ]
645 | },
646 | "execution_count": 20,
647 | "metadata": {},
648 | "output_type": "execute_result"
649 | },
650 | {
651 | "data": {
652 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARNElEQVR4nO3df6zddX3H8edrpQ4UJm69G6w/qEtINiHjR24KjMUQ5jJ+KVnCH5AokSxpILjAZmaYS3D8p8liDNTQNEqU6TAmEtJhmbIoE5YULbXUlupWlY1KEyrEYgNRy97743xnjre3Pd97z7k/+tnzkZz0++Nzvt/3+56bV7/3e77nfFNVSJLa8mtLXYAkafIMd0lqkOEuSQ0y3CWpQYa7JDXolKXa8apVq2r9+vVLtXtJOik988wzP66qqVHjlizc169fz44dO5Zq95J0UkryX33GeVpGkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNah3uCdZkeTbSR6dZV2S3Jtkf5LdSS6ebJmSpLmYy5H7HcC+46y7Gji3e2wE7h+zLknSGHqFe5I1wLXAp44z5HrgwRrYDpyZ5OwJ1ShJmqO+n1D9BPAh4IzjrF8NvDA0f6BbdnB4UJKNDI7sWbdu3ZwKHbb+ri/P+7njev6j1y7ZvpfKUv28/Vkvnv+PP+vWjTxyT3Id8FJVPXOiYbMsO+YWT1W1paqmq2p6amrkVyNIkuapz2mZy4H3JHke+AJwZZLPzRhzAFg7NL8GeHEiFUqS5mxkuFfV31bVmqpaD9wIfK2q3jtj2Fbg5u6qmUuBw1V1cOa2JEmLY97fCpnkVoCq2gxsA64B9gOvAbdMpDpJ0rzMKdyr6gngiW5689DyAm6fZGGSpPnzE6qS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAb1uUH2qUm+meTZJHuT3DPLmCuSHE6yq3vcvTDlSpL66HMnpp8BV1bVkSQrgaeSPFZV22eMe7Kqrpt8iZKkuRoZ7t0t9I50syu7Ry1kUZKk8fQ6555kRZJdwEvA41X19CzDLutO3TyW5LyJVilJmpNe4V5Vb1TVhcAaYEOS82cM2QmcU1UXAPcBj8y2nSQbk+xIsuPQoUPj1C1JOoE5XS1TVT8BngCumrH81ao60k1vA1YmWTXL87dU1XRVTU9NTc2/aknSCfW5WmYqyZnd9GnAu4DvzhhzVpJ00xu67b48+XIlSX30uVrmbOCzSVYwCO0vVtWjSW4FqKrNwA3AbUmOAq8DN3ZvxEqSlkCfq2V2AxfNsnzz0PQmYNNkS5MkzZefUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QG9bmH6qlJvpnk2SR7k9wzy5gkuTfJ/iS7k1y8MOVKkvrocw/VnwFXVtWRJCuBp5I8VlXbh8ZcDZzbPS4B7u/+lSQtgZFH7jVwpJtd2T1m3vz6euDBbux24MwkZ0+2VElSX73OuSdZkWQX8BLweFU9PWPIauCFofkD3bKZ29mYZEeSHYcOHZpvzZKkEXqFe1W9UVUXAmuADUnOnzEksz1tlu1sqarpqpqempqae7WSpF7mdLVMVf0EeAK4asaqA8Daofk1wItjVSZJmrc+V8tMJTmzmz4NeBfw3RnDtgI3d1fNXAocrqqDE69WktRLn6tlzgY+m2QFg/8MvlhVjya5FaCqNgPbgGuA/cBrwC0LVK8kqYeR4V5Vu4GLZlm+eWi6gNsnW5okab78hKokNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1qM89VNcm+XqSfUn2JrljljFXJDmcZFf3uHthypUk9dHnHqpHgQ9W1c4kZwDPJHm8qp6bMe7Jqrpu8iVKkuZq5JF7VR2sqp3d9E+BfcDqhS5MkjR/czrnnmQ9g5tlPz3L6suSPJvksSTnHef5G5PsSLLj0KFDcy5WktRP73BPcjrwJeDOqnp1xuqdwDlVdQFwH/DIbNuoqi1VNV1V01NTU/OtWZI0Qq9wT7KSQbB/vqoenrm+ql6tqiPd9DZgZZJVE61UktRbn6tlAnwa2FdVHz/OmLO6cSTZ0G335UkWKknqr8/VMpcD7wO+k2RXt+zDwDqAqtoM3ADcluQo8DpwY1XVAtQrSephZLhX1VNARozZBGyaVFGSpPH4CVVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqUJ97qK5N8vUk+5LsTXLHLGOS5N4k+5PsTnLxwpQrSeqjzz1UjwIfrKqdSc4AnknyeFU9NzTmauDc7nEJcH/3ryRpCYw8cq+qg1W1s5v+KbAPWD1j2PXAgzWwHTgzydkTr1aS1EufI/dfSrIeuAh4esaq1cALQ/MHumUHZzx/I7ARYN26dXOrdJlYf9eXl2S/z3/02iXZ71Jaqp+11ILeb6gmOR34EnBnVb06c/UsT6ljFlRtqarpqpqempqaW6WSpN56hXuSlQyC/fNV9fAsQw4Aa4fm1wAvjl+eJGk++lwtE+DTwL6q+vhxhm0Fbu6umrkUOFxVB48zVpK0wPqcc78ceB/wnSS7umUfBtYBVNVmYBtwDbAfeA24ZfKlSpL6GhnuVfUUs59THx5TwO2TKkqSNB4/oSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkN6nMP1QeSvJRkz3HWX5HkcJJd3ePuyZcpSZqLPvdQ/QywCXjwBGOerKrrJlKRJGlsI4/cq+obwCuLUIskaUImdc79siTPJnksyXnHG5RkY5IdSXYcOnRoQruWJM00iXDfCZxTVRcA9wGPHG9gVW2pqumqmp6amprAriVJsxk73Kvq1ao60k1vA1YmWTV2ZZKkeRs73JOclSTd9IZumy+Pu11J0vyNvFomyUPAFcCqJAeAjwArAapqM3ADcFuSo8DrwI1VVQtWsSRppJHhXlU3jVi/icGlkpKkZcJPqEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDRoZ7kgeSvJRkz3HWJ8m9SfYn2Z3k4smXKUmaiz5H7p8BrjrB+quBc7vHRuD+8cuSJI1jZLhX1TeAV04w5HrgwRrYDpyZ5OxJFShJmruRN8juYTXwwtD8gW7ZwZkDk2xkcHTPunXrJrBrSSe79Xd9ealLWHTPf/TaBd/HJN5QzSzLaraBVbWlqqaranpqamoCu5YkzWYS4X4AWDs0vwZ4cQLblSTN0yTCfStwc3fVzKXA4ao65pSMJGnxjDznnuQh4ApgVZIDwEeAlQBVtRnYBlwD7AdeA25ZqGIlSf2MDPequmnE+gJun1hFkqSx+QlVSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJalCvcE9yVZLvJdmf5K5Z1l+R5HCSXd3j7smXKknqq889VFcAnwT+FDgAfCvJ1qp6bsbQJ6vqugWoUZI0R32O3DcA+6vqB1X1c+ALwPULW5YkaRx9wn018MLQ/IFu2UyXJXk2yWNJzpttQ0k2JtmRZMehQ4fmUa4kqY8+4Z5ZltWM+Z3AOVV1AXAf8MhsG6qqLVU1XVXTU1NTc6tUktRbn3A/AKwdml8DvDg8oKperaoj3fQ2YGWSVROrUpI0J33C/VvAuUnenuRNwI3A1uEBSc5Kkm56Q7fdlyddrCSpn5FXy1TV0SQfAL4CrAAeqKq9SW7t1m8GbgBuS3IUeB24sapmnrqRJC2SkeEOvzzVsm3Gss1D05uATZMtTZI0X35CVZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhrUK9yTXJXke0n2J7lrlvVJcm+3fneSiydfqiSpr5HhnmQF8EngauAdwE1J3jFj2NXAud1jI3D/hOuUJM1BnyP3DcD+qvpBVf0c+AJw/Ywx1wMP1sB24MwkZ0+4VklST31ukL0aeGFo/gBwSY8xq4GDw4OSbGRwZA9wJMn35lTtsVYBPx5zG8vFCXvJxxaxkslo6bWBxvs5CX+/hp10r82In/eofs7ps48+4Z5ZltU8xlBVW4AtPfbZS5IdVTU9qe0tpZZ6AftZ7lrqp6VeYHL99DktcwBYOzS/BnhxHmMkSYukT7h/Czg3yduTvAm4Edg6Y8xW4ObuqplLgcNVdXDmhiRJi2PkaZmqOprkA8BXgBXAA1W1N8mt3frNwDbgGmA/8Bpwy8KV/CsmdopnGWipF7Cf5a6lflrqBSbUT6qOOTUuSTrJ+QlVSWqQ4S5JDVqW4d7j6w7+Jsmu7rEnyRtJfrNb91dJ9nbLH0py6uJ3cEy94/RzR7dsb5I7F7/6Y/Xo561J/jnJs13dt/R97mIbs5cHkryUZM/iVn188+0nydokX0+yr1t+x+JXf6wx+jk1yTeHlt+z+NUfa5zft279iiTfTvLoyJ1V1bJ6MHjT9vvA7wFvAp4F3nGC8e8GvtZNrwZ+CJzWzX8ReP9J3M/5wB7gzQze/P5X4Nzl3g/wYeBj3fQU8Eo3dk4/i+XcSzf/TuBiYM9SviYTem3OBi7ulp8B/MdSvjYT6CfA6d3ylcDTwKUnaz9D6/8a+Cfg0VH7W45H7n2+7mDYTcBDQ/OnAKclOYVBKC719fbj9PMHwPaqeq2qjgL/Bvz5glY7Wp9+CjgjSYDTGfyCHu353MU0Ti9U1Te6+eVi3v1U1cGq2glQVT8F9jE4WFpK4/RTVXWkG7Oyeyz11SNj/b4lWQNcC3yqz86WY7gf76sMjpHkzcBVwJcAqupHwD8A/83gqw8OV9VXF7Ta0ebdD4Oj9ncm+a1u3TX86ofFlkKffjYx+I/pReA7wB1V9T89n7uYxullOZpIP0nWAxcxONpdSmP1053C2AW8BDxeVSd1P8AngA8BvX7/lmO49/oqg867gX+vqlcAkryNwf+Ebwd+F3hLkvcuSJX9zbufqtoHfAx4HPgXBn/GHV2IIuegTz9/Buxi8BpcCGxK8hs9n7uYxullORq7nySnMzi4uLOqXl2oQnsaq5+qeqOqLmTwifkNSc5fyGJ7mHc/Sa4DXqqqZ/rubDmG+1y+yuBGfvWUzLuAH1bVoar6BfAw8EcLUmV/4/RDVX26qi6uqncy+BPtPxekyv769HML8HD3p/F+Bu+D/H7P5y6mcXpZjsbqJ8lKBsH++ap6eBHqHWUir09V/QR4gsFfxUtpnH4uB96T5HkGp3OuTPK5E+5tKd9gOM6bDqcAP2Bw9P1/bzqcN8u4tzIIu7cMLbsE2MvgXHuAzwJ/ebL20y3/7e7fdcB3gbct934YfJ//33fTvwP8iME33fX6WZwMvQytX8/yeUN1nNcmwIPAJ5a6jwn1MwWc2S0/DXgSuO5k7WfGmCvo8Ybqkr+Ax/khXMPg3frvA3/XLbsVuHVozPuBL8zy3Hu6ENwD/CPw6yd5P08Cz3W/CH+y1L306YfBn5RfZXDOcA/w3hM99yTu5SEG7+38gsFR2V+crP0Af8zgFMFuBqcFdgHXnMT9/CHw7a6fPcDdS93LuL9vQ9u4gh7h7tcPSFKDluM5d0nSmAx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KD/BdGTMajEA4nCAAAAAElFTkSuQmCC\n",
653 | "text/plain": [
654 | ""
655 | ]
656 | },
657 | "metadata": {
658 | "needs_background": "light"
659 | },
660 | "output_type": "display_data"
661 | }
662 | ],
663 | "source": [
664 | "pylab.hist(resultados)"
665 | ]
666 | },
667 | {
668 | "cell_type": "code",
669 | "execution_count": 21,
670 | "metadata": {},
671 | "outputs": [
672 | {
673 | "data": {
674 | "text/plain": [
675 | "0.8041457147175896"
676 | ]
677 | },
678 | "execution_count": 21,
679 | "metadata": {},
680 | "output_type": "execute_result"
681 | }
682 | ],
683 | "source": [
684 | "np.mean(resultados)"
685 | ]
686 | },
687 | {
688 | "cell_type": "markdown",
689 | "metadata": {},
690 | "source": [
691 | "# Retreinar o modelo"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 25,
697 | "metadata": {},
698 | "outputs": [
699 | {
700 | "data": {
701 | "text/html": [
702 | "\n",
703 | "\n",
716 | "
\n",
717 | " \n",
718 | " \n",
719 | " | \n",
720 | " Sex_binario | \n",
721 | " Age | \n",
722 | " Pclass | \n",
723 | " SibSp | \n",
724 | " Parch | \n",
725 | " Fare | \n",
726 | "
\n",
727 | " \n",
728 | " \n",
729 | " \n",
730 | " 0 | \n",
731 | " 0 | \n",
732 | " 22.0 | \n",
733 | " 3 | \n",
734 | " 1 | \n",
735 | " 0 | \n",
736 | " 7.2500 | \n",
737 | "
\n",
738 | " \n",
739 | " 1 | \n",
740 | " 1 | \n",
741 | " 38.0 | \n",
742 | " 1 | \n",
743 | " 1 | \n",
744 | " 0 | \n",
745 | " 71.2833 | \n",
746 | "
\n",
747 | " \n",
748 | " 2 | \n",
749 | " 1 | \n",
750 | " 26.0 | \n",
751 | " 3 | \n",
752 | " 0 | \n",
753 | " 0 | \n",
754 | " 7.9250 | \n",
755 | "
\n",
756 | " \n",
757 | " 3 | \n",
758 | " 1 | \n",
759 | " 35.0 | \n",
760 | " 1 | \n",
761 | " 1 | \n",
762 | " 0 | \n",
763 | " 53.1000 | \n",
764 | "
\n",
765 | " \n",
766 | " 4 | \n",
767 | " 0 | \n",
768 | " 35.0 | \n",
769 | " 3 | \n",
770 | " 0 | \n",
771 | " 0 | \n",
772 | " 8.0500 | \n",
773 | "
\n",
774 | " \n",
775 | "
\n",
776 | "
"
777 | ],
778 | "text/plain": [
779 | " Sex_binario Age Pclass SibSp Parch Fare\n",
780 | "0 0 22.0 3 1 0 7.2500\n",
781 | "1 1 38.0 1 1 0 71.2833\n",
782 | "2 1 26.0 3 0 0 7.9250\n",
783 | "3 1 35.0 1 1 0 53.1000\n",
784 | "4 0 35.0 3 0 0 8.0500"
785 | ]
786 | },
787 | "execution_count": 25,
788 | "metadata": {},
789 | "output_type": "execute_result"
790 | }
791 | ],
792 | "source": [
793 | "X.head()"
794 | ]
795 | },
796 | {
797 | "cell_type": "code",
798 | "execution_count": 26,
799 | "metadata": {},
800 | "outputs": [
801 | {
802 | "data": {
803 | "text/plain": [
804 | "0 0\n",
805 | "1 1\n",
806 | "2 1\n",
807 | "3 1\n",
808 | "4 0\n",
809 | "Name: Survived, dtype: int64"
810 | ]
811 | },
812 | "execution_count": 26,
813 | "metadata": {},
814 | "output_type": "execute_result"
815 | }
816 | ],
817 | "source": [
818 | "y.head()"
819 | ]
820 | },
821 | {
822 | "cell_type": "code",
823 | "execution_count": 31,
824 | "metadata": {},
825 | "outputs": [
826 | {
827 | "data": {
828 | "text/html": [
829 | "\n",
830 | "\n",
843 | "
\n",
844 | " \n",
845 | " \n",
846 | " | \n",
847 | " Sex_binario | \n",
848 | " Age | \n",
849 | " Pclass | \n",
850 | " SibSp | \n",
851 | " Parch | \n",
852 | " Fare | \n",
853 | "
\n",
854 | " \n",
855 | " \n",
856 | " \n",
857 | " 0 | \n",
858 | " 0 | \n",
859 | " 34.5 | \n",
860 | " 3 | \n",
861 | " 0 | \n",
862 | " 0 | \n",
863 | " 7.8292 | \n",
864 | "
\n",
865 | " \n",
866 | " 1 | \n",
867 | " 1 | \n",
868 | " 47.0 | \n",
869 | " 3 | \n",
870 | " 1 | \n",
871 | " 0 | \n",
872 | " 7.0000 | \n",
873 | "
\n",
874 | " \n",
875 | " 2 | \n",
876 | " 0 | \n",
877 | " 62.0 | \n",
878 | " 2 | \n",
879 | " 0 | \n",
880 | " 0 | \n",
881 | " 9.6875 | \n",
882 | "
\n",
883 | " \n",
884 | " 3 | \n",
885 | " 0 | \n",
886 | " 27.0 | \n",
887 | " 3 | \n",
888 | " 0 | \n",
889 | " 0 | \n",
890 | " 8.6625 | \n",
891 | "
\n",
892 | " \n",
893 | " 4 | \n",
894 | " 1 | \n",
895 | " 22.0 | \n",
896 | " 3 | \n",
897 | " 1 | \n",
898 | " 1 | \n",
899 | " 12.2875 | \n",
900 | "
\n",
901 | " \n",
902 | "
\n",
903 | "
"
904 | ],
905 | "text/plain": [
906 | " Sex_binario Age Pclass SibSp Parch Fare\n",
907 | "0 0 34.5 3 0 0 7.8292\n",
908 | "1 1 47.0 3 1 0 7.0000\n",
909 | "2 0 62.0 2 0 0 9.6875\n",
910 | "3 0 27.0 3 0 0 8.6625\n",
911 | "4 1 22.0 3 1 1 12.2875"
912 | ]
913 | },
914 | "execution_count": 31,
915 | "metadata": {},
916 | "output_type": "execute_result"
917 | }
918 | ],
919 | "source": [
920 | "test[variaveis].head()"
921 | ]
922 | },
923 | {
924 | "cell_type": "code",
925 | "execution_count": 32,
926 | "metadata": {},
927 | "outputs": [],
928 | "source": [
929 | "modelo = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=0)\n",
930 | "modelo.fit(X, y)\n",
931 | "\n",
932 | "p = modelo.predict(test[variaveis].fillna(-1))"
933 | ]
934 | },
935 | {
936 | "cell_type": "markdown",
937 | "metadata": {},
938 | "source": [
939 | "## Criar submission"
940 | ]
941 | },
942 | {
943 | "cell_type": "code",
944 | "execution_count": 47,
945 | "metadata": {},
946 | "outputs": [
947 | {
948 | "data": {
949 | "text/plain": [
950 | "(418,)"
951 | ]
952 | },
953 | "execution_count": 47,
954 | "metadata": {},
955 | "output_type": "execute_result"
956 | }
957 | ],
958 | "source": [
959 | "sub = pd.Series(p, index=test['PassengerId'], name='Survived')\n",
960 | "sub.shape"
961 | ]
962 | },
963 | {
964 | "cell_type": "code",
965 | "execution_count": 48,
966 | "metadata": {},
967 | "outputs": [],
968 | "source": [
969 | "sub.to_csv(\"modelo_video5.csv\", header=True)"
970 | ]
971 | },
972 | {
973 | "cell_type": "code",
974 | "execution_count": 49,
975 | "metadata": {},
976 | "outputs": [
977 | {
978 | "name": "stdout",
979 | "output_type": "stream",
980 | "text": [
981 | "PassengerId,Survived\n",
982 | "892,0\n",
983 | "893,0\n",
984 | "894,1\n",
985 | "895,1\n",
986 | "896,1\n",
987 | "897,0\n",
988 | "898,0\n",
989 | "899,0\n",
990 | "900,1\n"
991 | ]
992 | }
993 | ],
994 | "source": [
995 | "!head -n10 modelo_video5.csv"
996 | ]
997 | },
998 | {
999 | "cell_type": "code",
1000 | "execution_count": null,
1001 | "metadata": {},
1002 | "outputs": [],
1003 | "source": []
1004 | }
1005 | ],
1006 | "metadata": {
1007 | "kernelspec": {
1008 | "display_name": "Python 3",
1009 | "language": "python",
1010 | "name": "python3"
1011 | },
1012 | "language_info": {
1013 | "codemirror_mode": {
1014 | "name": "ipython",
1015 | "version": 3
1016 | },
1017 | "file_extension": ".py",
1018 | "mimetype": "text/x-python",
1019 | "name": "python",
1020 | "nbconvert_exporter": "python",
1021 | "pygments_lexer": "ipython3",
1022 | "version": "3.7.3"
1023 | }
1024 | },
1025 | "nbformat": 4,
1026 | "nbformat_minor": 4
1027 | }
1028 |
--------------------------------------------------------------------------------