├── 5_dias_de_kaggle
├── README
├── lightautoml-baseline-tps-june-2021.ipynb
├── lightgbm.ipynb
├── lightgbm2.ipynb
├── lightgbm_permutation.ipynb
└── submission_super_incroivel_do_tutorial.csv
├── README
├── agente-ia-ferramentas
├── README.md
└── agente_do_zero.py
├── atualize_seu_modelo.ipynb
├── como_tunar_hipers.ipynb
├── copa_america
├── 0.ipynb
├── 1.ipynb
└── README
├── deploy_sql
├── README
├── deploy_tip_model_.py
├── monitor.ipynb
└── validator.py
├── euro2021
├── 0_clean_data_merged_v1.ipynb
├── 1_baselines.ipynb
├── 2_model1.ipynb
├── 3_model2.ipynb
├── 4_model3.ipynb
├── 5_model4.ipynb
├── 6_model5.ipynb
├── 7_model6.ipynb
└── README
├── live11_sucesso_musical
├── README
└── nb1.ipynb
├── live12_timeseries_prophet
├── README
└── nb1.ipynb
├── live14_deploy
├── README
├── app.py
├── clickbait_titles.csv
├── mdl.pkl.z
├── nb1.ipynb
└── non_clickbait_titles.csv
├── live15_covid
├── Countries Longitude and Latitude.csv
├── README
├── countries and continents.csv
├── covid_19_data.csv
├── nb3_corona.ipynb
└── nb4_corona.ipynb
├── live16_clustering_texto
├── README
├── nCoV_tweets.csv
└── nb1.ipynb
├── live17_stacking
├── README
├── nb1.ipynb
└── train.csv
├── live18_rede_neural
├── README
├── nb1.ipynb
└── train.csv
├── live20_covid_einstein
├── 10_lgb4.ipynb
├── 11_lgb5.ipynb
├── 12_ag3.ipynb
├── 13_nn1.ipynb
├── 1_nb1.ipynb
├── 2_nb2.ipynb
├── 3_ag1.ipynb
├── 4_ag2.ipynb
├── 5_rf1.ipynb
├── 6_rf2.ipynb
├── 7_lgb1.ipynb
├── 8_lgb2.ipynb
├── 9_lgb3.ipynb
└── README
├── lives_ml_na_industria
├── README
├── continuous_factory_process.csv
├── nb1.ipynb
├── nb2.ipynb
├── nb3.ipynb
└── notes_on_dataset.txt
└── shap_outubro_rosa.ipynb
/5_dias_de_kaggle/README:
--------------------------------------------------------------------------------
1 | Material dos vídeos do desafio "5 dias de Kaggle"
2 | https://www.youtube.com/watch?v=B62dzeqFgPI&list=PLwnip85KhroUbNQcnhCF4cnRT7TTtGQ8F
3 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | Aqui você encontra os notebooks dos tutoriais de alguns vídeos do meu canal no Youtube
2 |
3 | http://youtube.com/mariofilhoml
4 |
--------------------------------------------------------------------------------
/agente-ia-ferramentas/README.md:
--------------------------------------------------------------------------------
1 | Script para rodar um agente de IA que usa ferramentas criado no vídeo https://youtu.be/RT4WYDq2ZkU
--------------------------------------------------------------------------------
/agente-ia-ferramentas/agente_do_zero.py:
--------------------------------------------------------------------------------
1 | import litellm
2 | import json
3 | import os
4 |
5 | class Tool:
6 | def __init__(self, name, description, func, parameters):
7 | self.name = name
8 | self.description = description
9 | self.func = func
10 | self.parameters = parameters
11 | self.schema = self._generate_schema()
12 |
13 | def _generate_schema(self):
14 | return {
15 | "type": "function",
16 | "function": {
17 | "name": self.name,
18 | "description": self.description,
19 | "parameters": self.parameters
20 | }
21 | }
22 |
23 | class Agent:
24 | def __init__(self, tools=None):
25 | self.conversation = list()
26 | self.tools = tools
27 |
28 | def _handle_tool_calls(self, message):
29 | self.conversation.append(message.model_dump())
30 |
31 | for tool_call in message.tool_calls:
32 | tool_name = tool_call.function.name
33 | tool_args = json.loads(tool_call.function.arguments)
34 |
35 | for tool in self.tools:
36 | if tool_name == tool.name:
37 | try:
38 | result = tool.func(**tool_args)
39 | result = str(result)
40 | break
41 | except Exception as e:
42 | result = f"Erro ao executar {tool_name}: {str(e)}"
43 | print(e)
44 | break
45 | else:
46 | result = f"Ferramenta {tool_name} não encontrada"
47 |
48 | self.conversation.append({
49 | "role": "tool",
50 | "tool_call_id": tool_call.id,
51 | "content": result
52 | })
53 |
54 | #print(self.conversation)
55 | response = litellm.completion(
56 | model="gpt-4.1",
57 | messages=self.conversation
58 | )
59 |
60 | response_message = response.choices[0].message.content
61 | self.conversation.append({
62 | "role": "assistant",
63 | "content": response_message
64 | })
65 | print(f"Agente: {response_message}")
66 |
67 |
68 | def run(self):
69 | print("Agente iniciado")
70 |
71 | while True:
72 | user_input = input("Voce: ")
73 |
74 | if user_input.lower() == "sair":
75 | print("Tchau")
76 | break
77 |
78 | user_msg = {"role": "user", "content": user_input}
79 |
80 | self.conversation.append(user_msg)
81 |
82 | tools_schemas = [tool.schema for tool in self.tools]
83 |
84 | response = litellm.completion(
85 | model="gpt-4.1",
86 | messages = self.conversation,
87 | tools=tools_schemas
88 | )
89 |
90 | assistant_message = response.choices[0].message
91 |
92 |
93 |
94 | if assistant_message.tool_calls:
95 | self._handle_tool_calls(assistant_message)
96 | else:
97 | self.conversation.append({"role": "assistant", "content": assistant_message})
98 | print(f"Agente: {assistant_message}")
99 |
100 | def list_files():
101 | files = os.listdir(".")
102 | return "\n".join(files)
103 |
104 | def read_file(file_path):
105 | with open(file_path, "r", encoding='utf-8') as f:
106 | return f.read()
107 |
108 | def write_file(file_path, content):
109 | with open(file_path, "w", encoding='utf-8') as f:
110 | f.write(content)
111 | return f"Conteúdo {content} escrito para o arquivo"
112 |
113 | if __name__ == "__main__":
114 | list_files_tool = Tool(name="list_files",
115 | description="Lista os arquivos disponíveis no diretório atual",
116 | func=list_files,
117 | parameters={
118 | "type": "object",
119 | "properties": {},
120 | "required": []
121 | })
122 | read_file_tool = Tool(name="read_file",
123 | description="Lê o arquivo especificado por file_path",
124 | func=read_file,
125 | parameters={
126 | "type": "object",
127 | "properties": {
128 | "file_path": {
129 | "type": "string",
130 | "description": "Caminho para o arquivo a ser lido"
131 | }
132 | },
133 | "required": ["file_path"]
134 | })
135 | write_file_tool = Tool(name="write_file",
136 | description="Trunca e escreve para o arquivo especificado por file_path",
137 | func=write_file,
138 | parameters={
139 | "type": "object",
140 | "properties": {
141 | "file_path": {
142 | "type": "string",
143 | "description": "Caminho para o arquivo a ser lido"
144 | },
145 | "content": {
146 | "type": "string",
147 | "description": "Conteúdo para escrever dentro do arquivo"
148 | }
149 | },
150 | "required": ["file_path", "content"]
151 | })
152 |
153 |
154 |
155 | agent = Agent(tools=[list_files_tool, read_file_tool, write_file_tool])
156 | agent.run()
--------------------------------------------------------------------------------
/copa_america/0.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 121,
6 | "id": "e7b18d84-49ad-40f1-a56e-f2c03cea8fc0",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 122,
17 | "id": "67296ad3-172f-4aba-ace0-08d0051382a2",
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "data = pd.read_csv(\"data.csv\", parse_dates=['Data'])"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 123,
27 | "id": "e60588c6-5d3f-4374-99f5-37517354f958",
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/html": [
33 | "
\n",
34 | "\n",
47 | "
\n",
48 | " \n",
49 | " \n",
50 | " | \n",
51 | " Data | \n",
52 | " Casa | \n",
53 | " Fora | \n",
54 | " Gols Casa | \n",
55 | " Gols Fora | \n",
56 | " Edição | \n",
57 | " Fase | \n",
58 | "
\n",
59 | " \n",
60 | " \n",
61 | " \n",
62 | " | 0 | \n",
63 | " 2001-07-11 | \n",
64 | " Equador | \n",
65 | " Chile | \n",
66 | " 1.0 | \n",
67 | " 4.0 | \n",
68 | " 2001 | \n",
69 | " Grupo A | \n",
70 | "
\n",
71 | " \n",
72 | " | 1 | \n",
73 | " 2001-07-11 | \n",
74 | " Colombia | \n",
75 | " Venezuela | \n",
76 | " 2.0 | \n",
77 | " 0.0 | \n",
78 | " 2001 | \n",
79 | " Grupo A | \n",
80 | "
\n",
81 | " \n",
82 | " | 2 | \n",
83 | " 2001-07-14 | \n",
84 | " Chile | \n",
85 | " Venezuela | \n",
86 | " 1.0 | \n",
87 | " 0.0 | \n",
88 | " 2001 | \n",
89 | " Grupo A | \n",
90 | "
\n",
91 | " \n",
92 | " | 3 | \n",
93 | " 2001-07-14 | \n",
94 | " Colombia | \n",
95 | " Equador | \n",
96 | " 1.0 | \n",
97 | " 0.0 | \n",
98 | " 2001 | \n",
99 | " Grupo A | \n",
100 | "
\n",
101 | " \n",
102 | " | 4 | \n",
103 | " 2001-07-17 | \n",
104 | " Equador | \n",
105 | " Venezuela | \n",
106 | " 4.0 | \n",
107 | " 0.0 | \n",
108 | " 2001 | \n",
109 | " Grupo A | \n",
110 | "
\n",
111 | " \n",
112 | " | ... | \n",
113 | " ... | \n",
114 | " ... | \n",
115 | " ... | \n",
116 | " ... | \n",
117 | " ... | \n",
118 | " ... | \n",
119 | " ... | \n",
120 | "
\n",
121 | " \n",
122 | " | 207 | \n",
123 | " 2021-06-27 | \n",
124 | " Venezuela | \n",
125 | " Peru | \n",
126 | " 0.0 | \n",
127 | " 1.0 | \n",
128 | " 2021 | \n",
129 | " Grupo B | \n",
130 | "
\n",
131 | " \n",
132 | " | 208 | \n",
133 | " 2021-07-02 | \n",
134 | " Peru | \n",
135 | " Paraguai | \n",
136 | " NaN | \n",
137 | " NaN | \n",
138 | " 2021 | \n",
139 | " Quartas | \n",
140 | "
\n",
141 | " \n",
142 | " | 209 | \n",
143 | " 2021-07-02 | \n",
144 | " Brasil | \n",
145 | " Chile | \n",
146 | " NaN | \n",
147 | " NaN | \n",
148 | " 2021 | \n",
149 | " Quartas | \n",
150 | "
\n",
151 | " \n",
152 | " | 210 | \n",
153 | " 2021-07-03 | \n",
154 | " Uruguai | \n",
155 | " Colombia | \n",
156 | " NaN | \n",
157 | " NaN | \n",
158 | " 2021 | \n",
159 | " Quartas | \n",
160 | "
\n",
161 | " \n",
162 | " | 211 | \n",
163 | " 2021-07-03 | \n",
164 | " Argentina | \n",
165 | " Equador | \n",
166 | " NaN | \n",
167 | " NaN | \n",
168 | " 2021 | \n",
169 | " Quartas | \n",
170 | "
\n",
171 | " \n",
172 | "
\n",
173 | "
212 rows × 7 columns
\n",
174 | "
"
175 | ],
176 | "text/plain": [
177 | " Data Casa Fora Gols Casa Gols Fora Edição Fase\n",
178 | "0 2001-07-11 Equador Chile 1.0 4.0 2001 Grupo A\n",
179 | "1 2001-07-11 Colombia Venezuela 2.0 0.0 2001 Grupo A\n",
180 | "2 2001-07-14 Chile Venezuela 1.0 0.0 2001 Grupo A\n",
181 | "3 2001-07-14 Colombia Equador 1.0 0.0 2001 Grupo A\n",
182 | "4 2001-07-17 Equador Venezuela 4.0 0.0 2001 Grupo A\n",
183 | ".. ... ... ... ... ... ... ...\n",
184 | "207 2021-06-27 Venezuela Peru 0.0 1.0 2021 Grupo B\n",
185 | "208 2021-07-02 Peru Paraguai NaN NaN 2021 Quartas\n",
186 | "209 2021-07-02 Brasil Chile NaN NaN 2021 Quartas\n",
187 | "210 2021-07-03 Uruguai Colombia NaN NaN 2021 Quartas\n",
188 | "211 2021-07-03 Argentina Equador NaN NaN 2021 Quartas\n",
189 | "\n",
190 | "[212 rows x 7 columns]"
191 | ]
192 | },
193 | "execution_count": 123,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "data"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 124,
205 | "id": "afd2bb8b-ad59-4c02-a5ee-dfcd9221c6c4",
206 | "metadata": {},
207 | "outputs": [],
208 | "source": [
209 | "resultados = list()\n",
210 | "for gc, gf in data[['Gols Casa', 'Gols Fora']].values:\n",
211 | " if gc > gf:\n",
212 | " resultados.append(1)\n",
213 | " elif gf > gc:\n",
214 | " resultados.append(-1)\n",
215 | " else:\n",
216 | " resultados.append(0)\n",
217 | "data['resultados'] = resultados"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 125,
223 | "id": "9fa91a5b-112a-438e-9e89-fd292db0dbc6",
224 | "metadata": {},
225 | "outputs": [],
226 | "source": [
227 | "ix = [\"Edição\",\"time\"]\n",
228 | "grupos = data[data['Fase'].str.contains(\"Grupo\")]\n",
229 | "newnames_casa = {\"Casa\": \"time\", -1: \"derrota\", 0:\"empate\", 1:\"vitoria\"}\n",
230 | "newnames_fora = {\"Fora\": \"time\", -1: \"vitoria\", 0:\"empate\", 1:\"derrota\"}\n",
231 | "grupos_casa = grupos.groupby([\"Edição\", \"Casa\", 'resultados']).size().unstack().fillna(0).reset_index().rename(columns=newnames_casa).set_index(ix)\n",
232 | "grupos_fora = grupos.groupby([\"Edição\", \"Fora\", 'resultados']).size().unstack().fillna(0).reset_index().rename(columns=newnames_fora).set_index(ix)\n",
233 | "partidas = grupos_casa.add(grupos_fora, fill_value = 0).reset_index()"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 126,
239 | "id": "7854bfc0-573b-4713-8b50-85becb589f2a",
240 | "metadata": {},
241 | "outputs": [],
242 | "source": [
243 | "ix = [\"Edição\",\"time\"]\n",
244 | "newnames_gcasa = {\"Casa\": \"time\", 'Gols Casa': \"marcados\", 'Gols Fora': \"sofridos\"}\n",
245 | "gols_casa = grupos.groupby([\"Edição\", \"Casa\"])[['Gols Casa', 'Gols Fora']].sum().reset_index().rename(columns=newnames_gcasa).set_index(ix)\n",
246 | "newnames_gfora = {\"Fora\": \"time\", 'Gols Casa': \"sofridos\", 'Gols Fora': \"marcados\"}\n",
247 | "gols_fora = grupos.groupby([\"Edição\", \"Fora\"])[['Gols Casa', 'Gols Fora']].sum().reset_index().rename(columns=newnames_gfora).set_index(ix)\n",
248 | "gols = gols_casa.add(gols_fora, fill_value=0).reset_index()"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 127,
254 | "id": "185a3874-ae62-4326-b3a7-770c5b47e5bc",
255 | "metadata": {},
256 | "outputs": [],
257 | "source": [
258 | "non_grupos = grupos = data[~data['Fase'].str.contains(\"Grupo\")][['Edição','Casa', 'Fora', 'resultados']]"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 128,
264 | "id": "6151044b-e214-4756-a2be-c2c0ff360404",
265 | "metadata": {},
266 | "outputs": [],
267 | "source": [
268 | "df = pd.merge(non_grupos, partidas, left_on=[\"Edição\",\"Casa\"], right_on=[\"Edição\",\"time\"], how='left')\n",
269 | "df = pd.merge(df, partidas, left_on=[\"Edição\",\"Fora\"], right_on=[\"Edição\",\"time\"], how='left', suffixes=[\"_casa\", \"_fora\"])\n",
270 | "df = pd.merge(df, gols, left_on=[\"Edição\",\"Casa\"], right_on=[\"Edição\",\"time\"], how='left')\n",
271 | "df = pd.merge(df, gols, left_on=[\"Edição\",\"Fora\"], right_on=[\"Edição\",\"time\"], how='left', suffixes=[\"_casa\", \"_fora\"])"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 129,
277 | "id": "1a00d613-7564-41cc-aff1-581cb00af81b",
278 | "metadata": {},
279 | "outputs": [
280 | {
281 | "data": {
282 | "text/html": [
283 | "\n",
284 | "\n",
297 | "
\n",
298 | " \n",
299 | " \n",
300 | " | \n",
301 | " Edição | \n",
302 | " Casa | \n",
303 | " Fora | \n",
304 | " resultados | \n",
305 | " time_casa | \n",
306 | " derrota_casa | \n",
307 | " empate_casa | \n",
308 | " vitoria_casa | \n",
309 | " time_fora | \n",
310 | " derrota_fora | \n",
311 | " empate_fora | \n",
312 | " vitoria_fora | \n",
313 | " time_casa | \n",
314 | " marcados_casa | \n",
315 | " sofridos_casa | \n",
316 | " time_fora | \n",
317 | " marcados_fora | \n",
318 | " sofridos_fora | \n",
319 | "
\n",
320 | " \n",
321 | " \n",
322 | " \n",
323 | " | 0 | \n",
324 | " 2001 | \n",
325 | " Chile | \n",
326 | " Mexico | \n",
327 | " -1 | \n",
328 | " Chile | \n",
329 | " 1.0 | \n",
330 | " 0.0 | \n",
331 | " 2.0 | \n",
332 | " Mexico | \n",
333 | " 1.0 | \n",
334 | " 1.0 | \n",
335 | " 1.0 | \n",
336 | " Chile | \n",
337 | " 5.0 | \n",
338 | " 3.0 | \n",
339 | " Mexico | \n",
340 | " 1.0 | \n",
341 | " 1.0 | \n",
342 | "
\n",
343 | " \n",
344 | " | 1 | \n",
345 | " 2001 | \n",
346 | " Costa Rica | \n",
347 | " Uruguai | \n",
348 | " -1 | \n",
349 | " Costa Rica | \n",
350 | " 0.0 | \n",
351 | " 1.0 | \n",
352 | " 2.0 | \n",
353 | " Uruguai | \n",
354 | " 1.0 | \n",
355 | " 1.0 | \n",
356 | " 1.0 | \n",
357 | " Costa Rica | \n",
358 | " 6.0 | \n",
359 | " 1.0 | \n",
360 | " Uruguai | \n",
361 | " 2.0 | \n",
362 | " 2.0 | \n",
363 | "
\n",
364 | " \n",
365 | " | 2 | \n",
366 | " 2001 | \n",
367 | " Colombia | \n",
368 | " Peru | \n",
369 | " 1 | \n",
370 | " Colombia | \n",
371 | " 0.0 | \n",
372 | " 0.0 | \n",
373 | " 3.0 | \n",
374 | " Peru | \n",
375 | " 1.0 | \n",
376 | " 1.0 | \n",
377 | " 1.0 | \n",
378 | " Colombia | \n",
379 | " 5.0 | \n",
380 | " 0.0 | \n",
381 | " Peru | \n",
382 | " 4.0 | \n",
383 | " 5.0 | \n",
384 | "
\n",
385 | " \n",
386 | " | 3 | \n",
387 | " 2001 | \n",
388 | " Honduras | \n",
389 | " Brasil | \n",
390 | " 1 | \n",
391 | " Honduras | \n",
392 | " 1.0 | \n",
393 | " 0.0 | \n",
394 | " 2.0 | \n",
395 | " Brasil | \n",
396 | " 1.0 | \n",
397 | " 0.0 | \n",
398 | " 2.0 | \n",
399 | " Honduras | \n",
400 | " 3.0 | \n",
401 | " 1.0 | \n",
402 | " Brasil | \n",
403 | " 5.0 | \n",
404 | " 2.0 | \n",
405 | "
\n",
406 | " \n",
407 | " | 4 | \n",
408 | " 2001 | \n",
409 | " Mexico | \n",
410 | " Uruguai | \n",
411 | " 1 | \n",
412 | " Mexico | \n",
413 | " 1.0 | \n",
414 | " 1.0 | \n",
415 | " 1.0 | \n",
416 | " Uruguai | \n",
417 | " 1.0 | \n",
418 | " 1.0 | \n",
419 | " 1.0 | \n",
420 | " Mexico | \n",
421 | " 1.0 | \n",
422 | " 1.0 | \n",
423 | " Uruguai | \n",
424 | " 2.0 | \n",
425 | " 2.0 | \n",
426 | "
\n",
427 | " \n",
428 | "
\n",
429 | "
"
430 | ],
431 | "text/plain": [
432 | " Edição Casa Fora resultados time_casa derrota_casa \\\n",
433 | "0 2001 Chile Mexico -1 Chile 1.0 \n",
434 | "1 2001 Costa Rica Uruguai -1 Costa Rica 0.0 \n",
435 | "2 2001 Colombia Peru 1 Colombia 0.0 \n",
436 | "3 2001 Honduras Brasil 1 Honduras 1.0 \n",
437 | "4 2001 Mexico Uruguai 1 Mexico 1.0 \n",
438 | "\n",
439 | " empate_casa vitoria_casa time_fora derrota_fora empate_fora \\\n",
440 | "0 0.0 2.0 Mexico 1.0 1.0 \n",
441 | "1 1.0 2.0 Uruguai 1.0 1.0 \n",
442 | "2 0.0 3.0 Peru 1.0 1.0 \n",
443 | "3 0.0 2.0 Brasil 1.0 0.0 \n",
444 | "4 1.0 1.0 Uruguai 1.0 1.0 \n",
445 | "\n",
446 | " vitoria_fora time_casa marcados_casa sofridos_casa time_fora \\\n",
447 | "0 1.0 Chile 5.0 3.0 Mexico \n",
448 | "1 1.0 Costa Rica 6.0 1.0 Uruguai \n",
449 | "2 1.0 Colombia 5.0 0.0 Peru \n",
450 | "3 2.0 Honduras 3.0 1.0 Brasil \n",
451 | "4 1.0 Mexico 1.0 1.0 Uruguai \n",
452 | "\n",
453 | " marcados_fora sofridos_fora \n",
454 | "0 1.0 1.0 \n",
455 | "1 2.0 2.0 \n",
456 | "2 4.0 5.0 \n",
457 | "3 5.0 2.0 \n",
458 | "4 2.0 2.0 "
459 | ]
460 | },
461 | "execution_count": 129,
462 | "metadata": {},
463 | "output_type": "execute_result"
464 | }
465 | ],
466 | "source": [
467 | "df.head()"
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": 146,
473 | "id": "f540b9f1-4057-4721-81dc-649c80d47448",
474 | "metadata": {},
475 | "outputs": [],
476 | "source": [
477 | "\n",
478 | "from sklearn.linear_model import LogisticRegression\n",
479 | "from sklearn.ensemble import RandomForestClassifier\n",
480 | "from sklearn.metrics import log_loss, roc_auc_score, classification_report, f1_score"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": 135,
486 | "id": "d9a516d4-c964-4ab2-aefe-86db91997fd2",
487 | "metadata": {},
488 | "outputs": [
489 | {
490 | "data": {
491 | "text/plain": [
492 | "0.45783730158730157"
493 | ]
494 | },
495 | "execution_count": 135,
496 | "metadata": {},
497 | "output_type": "execute_result"
498 | }
499 | ],
500 | "source": [
501 | "0.45783730158730157"
502 | ]
503 | },
504 | {
505 | "cell_type": "code",
506 | "execution_count": 142,
507 | "id": "6bc669d6-deb1-4183-86c9-2254b7cf0ca4",
508 | "metadata": {},
509 | "outputs": [
510 | {
511 | "name": "stdout",
512 | "output_type": "stream",
513 | "text": [
514 | "Ano: 2004 | LL: 2.9514717901829335 | AUC: 0.22916666666666666\n",
515 | "\n",
516 | "Ano: 2007 | LL: 1.1069725818244613 | AUC: 0.690873015873016\n",
517 | "\n",
518 | "Ano: 2011 | LL: 3.32783622818049 | AUC: 0.25555555555555554\n",
519 | "\n",
520 | "Ano: 2015 | LL: 1.3715442860035738 | AUC: 0.47142857142857136\n",
521 | "\n",
522 | "Ano: 2016 | LL: 1.2713562077495835 | AUC: 0.6111111111111112\n",
523 | "\n",
524 | "Ano: 2019 | LL: 1.4586912545073727 | AUC: 0.4888888888888889\n",
525 | "\n",
526 | "0.45783730158730157\n"
527 | ]
528 | },
529 | {
530 | "name": "stderr",
531 | "output_type": "stream",
532 | "text": [
533 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
534 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
535 | "\n",
536 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
537 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
538 | "Please also refer to the documentation for alternative solver options:\n",
539 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
540 | " n_iter_i = _check_optimize_result(\n",
541 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
542 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
543 | "\n",
544 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
545 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
546 | "Please also refer to the documentation for alternative solver options:\n",
547 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
548 | " n_iter_i = _check_optimize_result(\n"
549 | ]
550 | }
551 | ],
552 | "source": [
553 | "auc = list()\n",
554 | "for e in [2004, 2007, 2011, 2015, 2016, 2019]:\n",
555 | " dftr = df[df['Edição'] < e]\n",
556 | " dfval = df[df['Edição'] == e]\n",
557 | " f = ['derrota_casa','empate_casa', 'vitoria_casa', 'derrota_fora',\n",
558 | " 'empate_fora', 'vitoria_fora', 'marcados_casa',\n",
559 | " 'sofridos_casa', 'marcados_fora', 'sofridos_fora']\n",
560 | " \n",
561 | " Xtr = dftr[f]\n",
562 | " ytr = dftr['resultados']\n",
563 | " Xval = dfval[f]\n",
564 | " yval = dfval['resultados']\n",
565 | " \n",
566 | " #print(dftr.shape)\n",
567 | " #print(dfval.shape)\n",
568 | " mdl = LogisticRegression(C=1, class_weight='balanced')\n",
569 | " #mdl = RandomForestClassifier(n_jobs=6, min_samples_leaf=2, n_estimators=100, random_state=0)\n",
570 | " mdl.fit(Xtr, ytr)\n",
571 | " \n",
572 | " \n",
573 | " p = mdl.predict_proba(Xval)\n",
574 | " p_ = mdl.predict(Xval)\n",
575 | " auc_ = roc_auc_score(yval,p,multi_class='ovr')\n",
576 | " f1 = \n",
577 | " print(\"Ano: {} | LL: {} | AUC: {}\".format(e, log_loss(yval, p), auc_))\n",
578 | " auc.append(auc_)\n",
579 | " #print(classification_report(yval, p_))\n",
580 | " print()\n",
581 | "print(np.mean(auc))\n",
582 | " "
583 | ]
584 | },
585 | {
586 | "cell_type": "code",
587 | "execution_count": 143,
588 | "id": "8226b29f-ff12-443f-bbe9-1c8fea85fc36",
589 | "metadata": {},
590 | "outputs": [
591 | {
592 | "data": {
593 | "text/plain": [
594 | "array([ 1, 0, 1, 0, 1, 0, -1, 1])"
595 | ]
596 | },
597 | "execution_count": 143,
598 | "metadata": {},
599 | "output_type": "execute_result"
600 | }
601 | ],
602 | "source": [
603 | "p_"
604 | ]
605 | },
606 | {
607 | "cell_type": "code",
608 | "execution_count": 145,
609 | "id": "9e186daf-9613-4898-9b22-d84984203b8b",
610 | "metadata": {},
611 | "outputs": [
612 | {
613 | "data": {
614 | "text/html": [
615 | "\n",
616 | "\n",
629 | "
\n",
630 | " \n",
631 | " \n",
632 | " | \n",
633 | " Edição | \n",
634 | " Casa | \n",
635 | " Fora | \n",
636 | " resultados | \n",
637 | " time_casa | \n",
638 | " derrota_casa | \n",
639 | " empate_casa | \n",
640 | " vitoria_casa | \n",
641 | " time_fora | \n",
642 | " derrota_fora | \n",
643 | " empate_fora | \n",
644 | " vitoria_fora | \n",
645 | " time_casa | \n",
646 | " marcados_casa | \n",
647 | " sofridos_casa | \n",
648 | " time_fora | \n",
649 | " marcados_fora | \n",
650 | " sofridos_fora | \n",
651 | "
\n",
652 | " \n",
653 | " \n",
654 | " \n",
655 | " | 48 | \n",
656 | " 2019 | \n",
657 | " Brasil | \n",
658 | " Paraguai | \n",
659 | " 0 | \n",
660 | " Brasil | \n",
661 | " 0.0 | \n",
662 | " 1.0 | \n",
663 | " 2.0 | \n",
664 | " Paraguai | \n",
665 | " 1.0 | \n",
666 | " 2.0 | \n",
667 | " 0.0 | \n",
668 | " Brasil | \n",
669 | " 8.0 | \n",
670 | " 0.0 | \n",
671 | " Paraguai | \n",
672 | " 3.0 | \n",
673 | " 4.0 | \n",
674 | "
\n",
675 | " \n",
676 | " | 49 | \n",
677 | " 2019 | \n",
678 | " Venezuela | \n",
679 | " Argentina | \n",
680 | " -1 | \n",
681 | " Venezuela | \n",
682 | " 0.0 | \n",
683 | " 2.0 | \n",
684 | " 1.0 | \n",
685 | " Argentina | \n",
686 | " 1.0 | \n",
687 | " 1.0 | \n",
688 | " 1.0 | \n",
689 | " Venezuela | \n",
690 | " 3.0 | \n",
691 | " 1.0 | \n",
692 | " Argentina | \n",
693 | " 3.0 | \n",
694 | " 3.0 | \n",
695 | "
\n",
696 | " \n",
697 | " | 50 | \n",
698 | " 2019 | \n",
699 | " Colombia | \n",
700 | " Chile | \n",
701 | " 0 | \n",
702 | " Colombia | \n",
703 | " 0.0 | \n",
704 | " 0.0 | \n",
705 | " 3.0 | \n",
706 | " Chile | \n",
707 | " 1.0 | \n",
708 | " 0.0 | \n",
709 | " 2.0 | \n",
710 | " Colombia | \n",
711 | " 4.0 | \n",
712 | " 0.0 | \n",
713 | " Chile | \n",
714 | " 6.0 | \n",
715 | " 2.0 | \n",
716 | "
\n",
717 | " \n",
718 | " | 51 | \n",
719 | " 2019 | \n",
720 | " Uruguai | \n",
721 | " Peru | \n",
722 | " 0 | \n",
723 | " Uruguai | \n",
724 | " 0.0 | \n",
725 | " 1.0 | \n",
726 | " 2.0 | \n",
727 | " Peru | \n",
728 | " 1.0 | \n",
729 | " 1.0 | \n",
730 | " 1.0 | \n",
731 | " Uruguai | \n",
732 | " 7.0 | \n",
733 | " 2.0 | \n",
734 | " Peru | \n",
735 | " 3.0 | \n",
736 | " 6.0 | \n",
737 | "
\n",
738 | " \n",
739 | " | 52 | \n",
740 | " 2019 | \n",
741 | " Brasil | \n",
742 | " Argentina | \n",
743 | " 1 | \n",
744 | " Brasil | \n",
745 | " 0.0 | \n",
746 | " 1.0 | \n",
747 | " 2.0 | \n",
748 | " Argentina | \n",
749 | " 1.0 | \n",
750 | " 1.0 | \n",
751 | " 1.0 | \n",
752 | " Brasil | \n",
753 | " 8.0 | \n",
754 | " 0.0 | \n",
755 | " Argentina | \n",
756 | " 3.0 | \n",
757 | " 3.0 | \n",
758 | "
\n",
759 | " \n",
760 | " | 53 | \n",
761 | " 2019 | \n",
762 | " Chile | \n",
763 | " Peru | \n",
764 | " -1 | \n",
765 | " Chile | \n",
766 | " 1.0 | \n",
767 | " 0.0 | \n",
768 | " 2.0 | \n",
769 | " Peru | \n",
770 | " 1.0 | \n",
771 | " 1.0 | \n",
772 | " 1.0 | \n",
773 | " Chile | \n",
774 | " 6.0 | \n",
775 | " 2.0 | \n",
776 | " Peru | \n",
777 | " 3.0 | \n",
778 | " 6.0 | \n",
779 | "
\n",
780 | " \n",
781 | " | 54 | \n",
782 | " 2019 | \n",
783 | " Argentina | \n",
784 | " Chile | \n",
785 | " 1 | \n",
786 | " Argentina | \n",
787 | " 1.0 | \n",
788 | " 1.0 | \n",
789 | " 1.0 | \n",
790 | " Chile | \n",
791 | " 1.0 | \n",
792 | " 0.0 | \n",
793 | " 2.0 | \n",
794 | " Argentina | \n",
795 | " 3.0 | \n",
796 | " 3.0 | \n",
797 | " Chile | \n",
798 | " 6.0 | \n",
799 | " 2.0 | \n",
800 | "
\n",
801 | " \n",
802 | " | 55 | \n",
803 | " 2019 | \n",
804 | " Brasil | \n",
805 | " Peru | \n",
806 | " 1 | \n",
807 | " Brasil | \n",
808 | " 0.0 | \n",
809 | " 1.0 | \n",
810 | " 2.0 | \n",
811 | " Peru | \n",
812 | " 1.0 | \n",
813 | " 1.0 | \n",
814 | " 1.0 | \n",
815 | " Brasil | \n",
816 | " 8.0 | \n",
817 | " 0.0 | \n",
818 | " Peru | \n",
819 | " 3.0 | \n",
820 | " 6.0 | \n",
821 | "
\n",
822 | " \n",
823 | "
\n",
824 | "
"
825 | ],
826 | "text/plain": [
827 | " Edição Casa Fora resultados time_casa derrota_casa \\\n",
828 | "48 2019 Brasil Paraguai 0 Brasil 0.0 \n",
829 | "49 2019 Venezuela Argentina -1 Venezuela 0.0 \n",
830 | "50 2019 Colombia Chile 0 Colombia 0.0 \n",
831 | "51 2019 Uruguai Peru 0 Uruguai 0.0 \n",
832 | "52 2019 Brasil Argentina 1 Brasil 0.0 \n",
833 | "53 2019 Chile Peru -1 Chile 1.0 \n",
834 | "54 2019 Argentina Chile 1 Argentina 1.0 \n",
835 | "55 2019 Brasil Peru 1 Brasil 0.0 \n",
836 | "\n",
837 | " empate_casa vitoria_casa time_fora derrota_fora empate_fora \\\n",
838 | "48 1.0 2.0 Paraguai 1.0 2.0 \n",
839 | "49 2.0 1.0 Argentina 1.0 1.0 \n",
840 | "50 0.0 3.0 Chile 1.0 0.0 \n",
841 | "51 1.0 2.0 Peru 1.0 1.0 \n",
842 | "52 1.0 2.0 Argentina 1.0 1.0 \n",
843 | "53 0.0 2.0 Peru 1.0 1.0 \n",
844 | "54 1.0 1.0 Chile 1.0 0.0 \n",
845 | "55 1.0 2.0 Peru 1.0 1.0 \n",
846 | "\n",
847 | " vitoria_fora time_casa marcados_casa sofridos_casa time_fora \\\n",
848 | "48 0.0 Brasil 8.0 0.0 Paraguai \n",
849 | "49 1.0 Venezuela 3.0 1.0 Argentina \n",
850 | "50 2.0 Colombia 4.0 0.0 Chile \n",
851 | "51 1.0 Uruguai 7.0 2.0 Peru \n",
852 | "52 1.0 Brasil 8.0 0.0 Argentina \n",
853 | "53 1.0 Chile 6.0 2.0 Peru \n",
854 | "54 2.0 Argentina 3.0 3.0 Chile \n",
855 | "55 1.0 Brasil 8.0 0.0 Peru \n",
856 | "\n",
857 | " marcados_fora sofridos_fora \n",
858 | "48 3.0 4.0 \n",
859 | "49 3.0 3.0 \n",
860 | "50 6.0 2.0 \n",
861 | "51 3.0 6.0 \n",
862 | "52 3.0 3.0 \n",
863 | "53 3.0 6.0 \n",
864 | "54 6.0 2.0 \n",
865 | "55 3.0 6.0 "
866 | ]
867 | },
868 | "execution_count": 145,
869 | "metadata": {},
870 | "output_type": "execute_result"
871 | }
872 | ],
873 | "source": [
874 | "dfval"
875 | ]
876 | },
877 | {
878 | "cell_type": "code",
879 | "execution_count": null,
880 | "id": "90b55cff-aa80-4391-b47c-bb2c62e95346",
881 | "metadata": {},
882 | "outputs": [],
883 | "source": []
884 | }
885 | ],
886 | "metadata": {
887 | "kernelspec": {
888 | "display_name": "Python 3",
889 | "language": "python",
890 | "name": "python3"
891 | },
892 | "language_info": {
893 | "codemirror_mode": {
894 | "name": "ipython",
895 | "version": 3
896 | },
897 | "file_extension": ".py",
898 | "mimetype": "text/x-python",
899 | "name": "python",
900 | "nbconvert_exporter": "python",
901 | "pygments_lexer": "ipython3",
902 | "version": "3.8.5"
903 | }
904 | },
905 | "nbformat": 4,
906 | "nbformat_minor": 5
907 | }
908 |
--------------------------------------------------------------------------------
/copa_america/README:
--------------------------------------------------------------------------------
1 | Notebook para prever o campeão da copa América
2 |
--------------------------------------------------------------------------------
/deploy_sql/README:
--------------------------------------------------------------------------------
1 | Scripts para o tutorial de deploy de um modelo de machine learning que precisa rodar todos os dias e colocar as previsões num banco de dados SQL
2 |
3 | Eu acabei deletando o notebook do modelo por acidente, por isso ele não está aqui 🤦♂️
4 |
--------------------------------------------------------------------------------
/deploy_sql/deploy_tip_model_.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from validator import schema
3 | import joblib
4 | import schedule
5 |
6 | def load_data(yesterday, today):
7 | data = pd.read_sql(f'SELECT * FROM yellow_tripdata WHERE tpep_pickup_datetime >= "{yesterday}" AND tpep_pickup_datetime < "{today}"', 'sqlite:///data.db')
8 | return data
9 |
10 | def create_features(data):
11 | data["fare_amount_per_person"] = data["fare_amount"] / (data["passenger_count"] + 1)
12 | return data
13 |
14 | def make_predictions():
15 | #today = pd.to_datetime("today").strftime("%Y-%m-%d")
16 | #yesterday = pd.to_datetime("today") - pd.Timedelta(days=1)
17 | #yesterday = yesterday.strftime("%Y-%m-%d")
18 | today = "2022-01-05"
19 | yesterday = "2022-01-04"
20 |
21 | data = load_data(yesterday, today)
22 | data = create_features(data)
23 |
24 | validated_data = schema.validate(data)
25 |
26 | #print(data.columns)
27 |
28 | #print("validado", validated_data.columns)
29 |
30 | model = joblib.load("model.joblib")
31 | predictions = model.predict(validated_data)
32 |
33 | predictions_df = pd.DataFrame(predictions, columns=["prediction"], index=data["index"])
34 | predictions_df.to_sql("predictions", "sqlite:///data.db", if_exists="append")
35 |
36 |
37 | if __name__ == '__main__':
38 | schedule.every().day.at("00:00").do(make_predictions)
39 | print("Rodando")
40 | while True:
41 | schedule.run_pending()
--------------------------------------------------------------------------------
/deploy_sql/monitor.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 19,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "date = \"2022-01-05\"\n",
19 | "data = pd.read_sql(f'SELECT * FROM yellow_tripdata WHERE tpep_pickup_datetime < \"{date}\"', 'sqlite:///data.db')\n",
20 | "preds = pd.read_sql(f'SELECT * FROM predictions', 'sqlite:///data.db')"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 21,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/html": [
31 | "\n",
32 | "\n",
45 | "
\n",
46 | " \n",
47 | " \n",
48 | " | \n",
49 | " index | \n",
50 | " VendorID | \n",
51 | " tpep_pickup_datetime | \n",
52 | " tpep_dropoff_datetime | \n",
53 | " passenger_count | \n",
54 | " trip_distance | \n",
55 | " RatecodeID | \n",
56 | " store_and_fwd_flag | \n",
57 | " PULocationID | \n",
58 | " DOLocationID | \n",
59 | " payment_type | \n",
60 | " fare_amount | \n",
61 | " extra | \n",
62 | " mta_tax | \n",
63 | " tip_amount | \n",
64 | " tolls_amount | \n",
65 | " improvement_surcharge | \n",
66 | " total_amount | \n",
67 | " congestion_surcharge | \n",
68 | " airport_fee | \n",
69 | "
\n",
70 | " \n",
71 | " \n",
72 | " \n",
73 | " | 268862 | \n",
74 | " 2398555 | \n",
75 | " 2 | \n",
76 | " 2022-01-04 23:47:27 | \n",
77 | " 2022-01-04 23:55:03 | \n",
78 | " NaN | \n",
79 | " 1.66 | \n",
80 | " NaN | \n",
81 | " None | \n",
82 | " 143 | \n",
83 | " 50 | \n",
84 | " 0 | \n",
85 | " 13.20 | \n",
86 | " 0.0 | \n",
87 | " 0.5 | \n",
88 | " 4.38 | \n",
89 | " 0.0 | \n",
90 | " 0.3 | \n",
91 | " 20.88 | \n",
92 | " NaN | \n",
93 | " NaN | \n",
94 | "
\n",
95 | " \n",
96 | " | 268863 | \n",
97 | " 2398556 | \n",
98 | " 2 | \n",
99 | " 2022-01-04 23:44:58 | \n",
100 | " 2022-01-04 23:56:10 | \n",
101 | " NaN | \n",
102 | " 3.23 | \n",
103 | " NaN | \n",
104 | " None | \n",
105 | " 236 | \n",
106 | " 164 | \n",
107 | " 0 | \n",
108 | " 14.59 | \n",
109 | " 0.0 | \n",
110 | " 0.5 | \n",
111 | " 2.86 | \n",
112 | " 0.0 | \n",
113 | " 0.3 | \n",
114 | " 20.75 | \n",
115 | " NaN | \n",
116 | " NaN | \n",
117 | "
\n",
118 | " \n",
119 | " | 268864 | \n",
120 | " 2398557 | \n",
121 | " 2 | \n",
122 | " 2022-01-04 23:07:07 | \n",
123 | " 2022-01-04 23:10:54 | \n",
124 | " NaN | \n",
125 | " 0.55 | \n",
126 | " NaN | \n",
127 | " None | \n",
128 | " 129 | \n",
129 | " 129 | \n",
130 | " 0 | \n",
131 | " 13.20 | \n",
132 | " 0.0 | \n",
133 | " 0.5 | \n",
134 | " 1.50 | \n",
135 | " 0.0 | \n",
136 | " 0.3 | \n",
137 | " 15.50 | \n",
138 | " NaN | \n",
139 | " NaN | \n",
140 | "
\n",
141 | " \n",
142 | " | 268865 | \n",
143 | " 2398558 | \n",
144 | " 2 | \n",
145 | " 2022-01-04 23:43:00 | \n",
146 | " 2022-01-04 23:56:00 | \n",
147 | " NaN | \n",
148 | " 3.05 | \n",
149 | " NaN | \n",
150 | " None | \n",
151 | " 137 | \n",
152 | " 143 | \n",
153 | " 0 | \n",
154 | " 13.61 | \n",
155 | " 0.0 | \n",
156 | " 0.5 | \n",
157 | " 2.69 | \n",
158 | " 0.0 | \n",
159 | " 0.3 | \n",
160 | " 19.60 | \n",
161 | " NaN | \n",
162 | " NaN | \n",
163 | "
\n",
164 | " \n",
165 | " | 268866 | \n",
166 | " 2398559 | \n",
167 | " 2 | \n",
168 | " 2022-01-04 23:34:00 | \n",
169 | " 2022-01-04 23:45:00 | \n",
170 | " NaN | \n",
171 | " 3.66 | \n",
172 | " NaN | \n",
173 | " None | \n",
174 | " 146 | \n",
175 | " 236 | \n",
176 | " 0 | \n",
177 | " 14.77 | \n",
178 | " 0.0 | \n",
179 | " 0.5 | \n",
180 | " 2.34 | \n",
181 | " 0.0 | \n",
182 | " 0.3 | \n",
183 | " 20.41 | \n",
184 | " NaN | \n",
185 | " NaN | \n",
186 | "
\n",
187 | " \n",
188 | "
\n",
189 | "
"
190 | ],
191 | "text/plain": [
192 | " index VendorID tpep_pickup_datetime tpep_dropoff_datetime \\\n",
193 | "268862 2398555 2 2022-01-04 23:47:27 2022-01-04 23:55:03 \n",
194 | "268863 2398556 2 2022-01-04 23:44:58 2022-01-04 23:56:10 \n",
195 | "268864 2398557 2 2022-01-04 23:07:07 2022-01-04 23:10:54 \n",
196 | "268865 2398558 2 2022-01-04 23:43:00 2022-01-04 23:56:00 \n",
197 | "268866 2398559 2 2022-01-04 23:34:00 2022-01-04 23:45:00 \n",
198 | "\n",
199 | " passenger_count trip_distance RatecodeID store_and_fwd_flag \\\n",
200 | "268862 NaN 1.66 NaN None \n",
201 | "268863 NaN 3.23 NaN None \n",
202 | "268864 NaN 0.55 NaN None \n",
203 | "268865 NaN 3.05 NaN None \n",
204 | "268866 NaN 3.66 NaN None \n",
205 | "\n",
206 | " PULocationID DOLocationID payment_type fare_amount extra mta_tax \\\n",
207 | "268862 143 50 0 13.20 0.0 0.5 \n",
208 | "268863 236 164 0 14.59 0.0 0.5 \n",
209 | "268864 129 129 0 13.20 0.0 0.5 \n",
210 | "268865 137 143 0 13.61 0.0 0.5 \n",
211 | "268866 146 236 0 14.77 0.0 0.5 \n",
212 | "\n",
213 | " tip_amount tolls_amount improvement_surcharge total_amount \\\n",
214 | "268862 4.38 0.0 0.3 20.88 \n",
215 | "268863 2.86 0.0 0.3 20.75 \n",
216 | "268864 1.50 0.0 0.3 15.50 \n",
217 | "268865 2.69 0.0 0.3 19.60 \n",
218 | "268866 2.34 0.0 0.3 20.41 \n",
219 | "\n",
220 | " congestion_surcharge airport_fee \n",
221 | "268862 NaN NaN \n",
222 | "268863 NaN NaN \n",
223 | "268864 NaN NaN \n",
224 | "268865 NaN NaN \n",
225 | "268866 NaN NaN "
226 | ]
227 | },
228 | "execution_count": 21,
229 | "metadata": {},
230 | "output_type": "execute_result"
231 | }
232 | ],
233 | "source": [
234 | "data.tail()"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 22,
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/html": [
245 | "\n",
246 | "\n",
259 | "
\n",
260 | " \n",
261 | " \n",
262 | " | \n",
263 | " index | \n",
264 | " prediction | \n",
265 | "
\n",
266 | " \n",
267 | " \n",
268 | " \n",
269 | " | 146962 | \n",
270 | " 2398555 | \n",
271 | " 2.694889 | \n",
272 | "
\n",
273 | " \n",
274 | " | 146963 | \n",
275 | " 2398556 | \n",
276 | " 3.326200 | \n",
277 | "
\n",
278 | " \n",
279 | " | 146964 | \n",
280 | " 2398557 | \n",
281 | " 4.061624 | \n",
282 | "
\n",
283 | " \n",
284 | " | 146965 | \n",
285 | " 2398558 | \n",
286 | " 3.535200 | \n",
287 | "
\n",
288 | " \n",
289 | " | 146966 | \n",
290 | " 2398559 | \n",
291 | " 2.957333 | \n",
292 | "
\n",
293 | " \n",
294 | "
\n",
295 | "
"
296 | ],
297 | "text/plain": [
298 | " index prediction\n",
299 | "146962 2398555 2.694889\n",
300 | "146963 2398556 3.326200\n",
301 | "146964 2398557 4.061624\n",
302 | "146965 2398558 3.535200\n",
303 | "146966 2398559 2.957333"
304 | ]
305 | },
306 | "execution_count": 22,
307 | "metadata": {},
308 | "output_type": "execute_result"
309 | }
310 | ],
311 | "source": [
312 | "preds.tail()"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": 25,
318 | "metadata": {},
319 | "outputs": [],
320 | "source": [
321 | "r = preds.merge(data[[\"index\", \"tip_amount\"]], on=\"index\", how=\"left\")"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 26,
327 | "metadata": {},
328 | "outputs": [
329 | {
330 | "data": {
331 | "text/html": [
332 | "\n",
333 | "\n",
346 | "
\n",
347 | " \n",
348 | " \n",
349 | " | \n",
350 | " index | \n",
351 | " prediction | \n",
352 | " tip_amount | \n",
353 | "
\n",
354 | " \n",
355 | " \n",
356 | " \n",
357 | " | 0 | \n",
358 | " 97175 | \n",
359 | " 1.988800 | \n",
360 | " 4.21 | \n",
361 | "
\n",
362 | " \n",
363 | " | 1 | \n",
364 | " 101532 | \n",
365 | " 4.135173 | \n",
366 | " 7.41 | \n",
367 | "
\n",
368 | " \n",
369 | " | 2 | \n",
370 | " 114802 | \n",
371 | " 3.578470 | \n",
372 | " 5.55 | \n",
373 | "
\n",
374 | " \n",
375 | " | 3 | \n",
376 | " 117062 | \n",
377 | " 4.904552 | \n",
378 | " 8.62 | \n",
379 | "
\n",
380 | " \n",
381 | " | 4 | \n",
382 | " 117145 | \n",
383 | " 0.834859 | \n",
384 | " 0.00 | \n",
385 | "
\n",
386 | " \n",
387 | "
\n",
388 | "
"
389 | ],
390 | "text/plain": [
391 | " index prediction tip_amount\n",
392 | "0 97175 1.988800 4.21\n",
393 | "1 101532 4.135173 7.41\n",
394 | "2 114802 3.578470 5.55\n",
395 | "3 117062 4.904552 8.62\n",
396 | "4 117145 0.834859 0.00"
397 | ]
398 | },
399 | "execution_count": 26,
400 | "metadata": {},
401 | "output_type": "execute_result"
402 | }
403 | ],
404 | "source": [
405 | "r.head()"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 27,
411 | "metadata": {},
412 | "outputs": [
413 | {
414 | "data": {
415 | "text/plain": [
416 | "2.728058382025522"
417 | ]
418 | },
419 | "execution_count": 27,
420 | "metadata": {},
421 | "output_type": "execute_result"
422 | }
423 | ],
424 | "source": [
425 | "from sklearn.metrics import mean_squared_error\n",
426 | "mean_squared_error(r[\"tip_amount\"], r[\"prediction\"], squared=False) "
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": null,
432 | "metadata": {},
433 | "outputs": [],
434 | "source": []
435 | }
436 | ],
437 | "metadata": {
438 | "kernelspec": {
439 | "display_name": "Python 3.9.7 ('base': conda)",
440 | "language": "python",
441 | "name": "python3"
442 | },
443 | "language_info": {
444 | "codemirror_mode": {
445 | "name": "ipython",
446 | "version": 3
447 | },
448 | "file_extension": ".py",
449 | "mimetype": "text/x-python",
450 | "name": "python",
451 | "nbconvert_exporter": "python",
452 | "pygments_lexer": "ipython3",
453 | "version": "3.9.7"
454 | },
455 | "orig_nbformat": 4,
456 | "vscode": {
457 | "interpreter": {
458 | "hash": "7a2c4b191d1ae843dde5cb5f4d1f62fa892f6b79b0f9392a84691e890e33c5a4"
459 | }
460 | }
461 | },
462 | "nbformat": 4,
463 | "nbformat_minor": 2
464 | }
465 |
--------------------------------------------------------------------------------
/deploy_sql/validator.py:
--------------------------------------------------------------------------------
1 | from pandera import DataFrameSchema, Column, Check, Index, MultiIndex
2 | import pandera
3 |
4 | schema = DataFrameSchema(
5 | columns={
6 | "passenger_count": Column(
7 | dtype=pandera.engines.numpy_engine.Float64,
8 | checks=[Check(lambda s: s >= 0),
9 | Check(lambda s: s <= 10)],
10 | nullable=True,
11 | unique=False,
12 | coerce=False,
13 | required=True,
14 | regex=False,
15 | ),
16 | "trip_distance": Column(
17 | dtype=pandera.engines.numpy_engine.Float64,
18 | checks=[Check(lambda s: s >= 0)],
19 | nullable=False,
20 | unique=False,
21 | coerce=False,
22 | required=True,
23 | regex=False,
24 | ),
25 | "fare_amount": Column(
26 | dtype=pandera.engines.numpy_engine.Float64,
27 | checks=None,
28 | nullable=False,
29 | unique=False,
30 | coerce=False,
31 | required=True,
32 | regex=False,
33 | ),
34 | "fare_amount_per_person": Column(
35 | dtype=pandera.engines.numpy_engine.Float64,
36 | checks=None,
37 | nullable=True,
38 | unique=False,
39 | coerce=False,
40 | required=True,
41 | regex=False,
42 | ),
43 | },
44 | index=Index(
45 | dtype=pandera.engines.numpy_engine.Int64,
46 | checks=None,
47 | nullable=False,
48 | coerce=False,
49 | name=None,
50 | ),
51 | coerce=False,
52 | strict="filter",
53 | name=None,
54 | )
55 |
--------------------------------------------------------------------------------
/euro2021/0_clean_data_merged_v1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 14,
6 | "id": "16f7a6a2-2588-45e5-a7c7-cd61c2699a42",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 15,
17 | "id": "afe8030f-53f3-4d90-8eea-eb3211bd8294",
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "data = pd.read_csv(\"data/Uefa Euro Cup All Matches.csv\")"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 16,
27 | "id": "9a825ea6-8f5c-4559-b49d-d767f843dcbb",
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/html": [
33 | "\n",
34 | "\n",
47 | "
\n",
48 | " \n",
49 | " \n",
50 | " | \n",
51 | " Date | \n",
52 | " Time | \n",
53 | " HomeTeamName | \n",
54 | " AwayTeamName | \n",
55 | " HomeTeamGoals | \n",
56 | " AwayTeamGoals | \n",
57 | " Stage | \n",
58 | " SpecialWinConditions | \n",
59 | " Stadium | \n",
60 | " City | \n",
61 | " Attendance | \n",
62 | " Year | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 0 | \n",
68 | " 6 July 1960 (1960-07-06) | \n",
69 | " 20:00 | \n",
70 | " France | \n",
71 | " Yugoslavia | \n",
72 | " 4 | \n",
73 | " 5 | \n",
74 | " Semi-finals | \n",
75 | " NaN | \n",
76 | " Parc des Princes | \n",
77 | " Paris | \n",
78 | " 26370 | \n",
79 | " 1960 | \n",
80 | "
\n",
81 | " \n",
82 | "
\n",
83 | "
"
84 | ],
85 | "text/plain": [
86 | " Date Time HomeTeamName AwayTeamName HomeTeamGoals \\\n",
87 | "0 6 July 1960 (1960-07-06) 20:00 France Yugoslavia 4 \n",
88 | "\n",
89 | " AwayTeamGoals Stage SpecialWinConditions Stadium City \\\n",
90 | "0 5 Semi-finals NaN Parc des Princes Paris \n",
91 | "\n",
92 | " Attendance Year \n",
93 | "0 26370 1960 "
94 | ]
95 | },
96 | "execution_count": 16,
97 | "metadata": {},
98 | "output_type": "execute_result"
99 | }
100 | ],
101 | "source": [
102 | "data.head(1)"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "id": "2769e675-0e20-46f0-8af4-d8e4c5afa41a",
108 | "metadata": {},
109 | "source": [
110 | "Date (with Time), Home Team, Away Team, HGoals, AGoals"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 17,
116 | "id": "d55071fa-0460-431e-919f-30a1a109debd",
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "# https://regexr.com/\n",
121 | "# https://strftime.org/\n",
122 | "# https://www.kaggle.com/c/ncaam-march-mania-2021"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 18,
128 | "id": "0c0d0044-346b-48de-9c50-b16d322ca84f",
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/plain": [
134 | "0 1960-07-06 20:00\n",
135 | "1 1960-07-06 21:30\n",
136 | "2 1960-07-09 21:30\n",
137 | "3 1960-07-10 21:30\n",
138 | "4 1964-06-17 20:00\n",
139 | " ... \n",
140 | "281 2016-07-02 21:00\n",
141 | "282 2016-07-03 21:00\n",
142 | "283 2016-07-06 21:00\n",
143 | "284 2016-07-07 21:00\n",
144 | "285 2016-07-10 21:00\n",
145 | "Name: 0, Length: 286, dtype: object"
146 | ]
147 | },
148 | "execution_count": 18,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "captured_date = (data['Date'].str.extract(r\"\\((.*)\\)\").squeeze() + \" \" \n",
155 | " + data['Time'].str.extract(r\"(\\d+\\:\\d+)\").squeeze())\n",
156 | "captured_date"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 19,
162 | "id": "f7ccdc43-a40b-4ef3-ac10-c5360693749d",
163 | "metadata": {},
164 | "outputs": [
165 | {
166 | "data": {
167 | "text/html": [
168 | "\n",
169 | "\n",
182 | "
\n",
183 | " \n",
184 | " \n",
185 | " | \n",
186 | " date | \n",
187 | "
\n",
188 | " \n",
189 | " \n",
190 | " \n",
191 | " | 0 | \n",
192 | " 1960-07-06 20:00:00 | \n",
193 | "
\n",
194 | " \n",
195 | " | 1 | \n",
196 | " 1960-07-06 21:30:00 | \n",
197 | "
\n",
198 | " \n",
199 | " | 2 | \n",
200 | " 1960-07-09 21:30:00 | \n",
201 | "
\n",
202 | " \n",
203 | " | 3 | \n",
204 | " 1960-07-10 21:30:00 | \n",
205 | "
\n",
206 | " \n",
207 | " | 4 | \n",
208 | " 1964-06-17 20:00:00 | \n",
209 | "
\n",
210 | " \n",
211 | "
\n",
212 | "
"
213 | ],
214 | "text/plain": [
215 | " date\n",
216 | "0 1960-07-06 20:00:00\n",
217 | "1 1960-07-06 21:30:00\n",
218 | "2 1960-07-09 21:30:00\n",
219 | "3 1960-07-10 21:30:00\n",
220 | "4 1964-06-17 20:00:00"
221 | ]
222 | },
223 | "execution_count": 19,
224 | "metadata": {},
225 | "output_type": "execute_result"
226 | }
227 | ],
228 | "source": [
229 | "captured_date = pd.to_datetime(captured_date, format=\"%Y-%m-%d %H:%M\")\n",
230 | "clean_data = pd.DataFrame({\"date\": captured_date})\n",
231 | "clean_data.head()"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": 20,
237 | "id": "c73c52fd-6984-4a88-83a3-c78fa03b9a98",
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "match_data = data[['HomeTeamName', 'AwayTeamName', 'HomeTeamGoals', 'AwayTeamGoals']]\n",
242 | "clean_data_merged = pd.concat([clean_data, match_data], axis=1)"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 21,
248 | "id": "a2e8ce15-7807-406d-bdee-042900c6a534",
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "data": {
253 | "text/html": [
254 | "\n",
255 | "\n",
268 | "
\n",
269 | " \n",
270 | " \n",
271 | " | \n",
272 | " date | \n",
273 | " HomeTeamName | \n",
274 | " AwayTeamName | \n",
275 | " HomeTeamGoals | \n",
276 | " AwayTeamGoals | \n",
277 | "
\n",
278 | " \n",
279 | " \n",
280 | " \n",
281 | " | 0 | \n",
282 | " 1960-07-06 20:00:00 | \n",
283 | " France | \n",
284 | " Yugoslavia | \n",
285 | " 4 | \n",
286 | " 5 | \n",
287 | "
\n",
288 | " \n",
289 | " | 1 | \n",
290 | " 1960-07-06 21:30:00 | \n",
291 | " Czechoslovakia | \n",
292 | " Soviet Union | \n",
293 | " 0 | \n",
294 | " 3 | \n",
295 | "
\n",
296 | " \n",
297 | " | 2 | \n",
298 | " 1960-07-09 21:30:00 | \n",
299 | " Czechoslovakia | \n",
300 | " France | \n",
301 | " 2 | \n",
302 | " 0 | \n",
303 | "
\n",
304 | " \n",
305 | " | 3 | \n",
306 | " 1960-07-10 21:30:00 | \n",
307 | " Soviet Union | \n",
308 | " Yugoslavia | \n",
309 | " 2 | \n",
310 | " 1 | \n",
311 | "
\n",
312 | " \n",
313 | " | 4 | \n",
314 | " 1964-06-17 20:00:00 | \n",
315 | " Spain | \n",
316 | " Hungary | \n",
317 | " 2 | \n",
318 | " 1 | \n",
319 | "
\n",
320 | " \n",
321 | " | ... | \n",
322 | " ... | \n",
323 | " ... | \n",
324 | " ... | \n",
325 | " ... | \n",
326 | " ... | \n",
327 | "
\n",
328 | " \n",
329 | " | 281 | \n",
330 | " 2016-07-02 21:00:00 | \n",
331 | " Germany | \n",
332 | " Italy | \n",
333 | " 1 | \n",
334 | " 1 | \n",
335 | "
\n",
336 | " \n",
337 | " | 282 | \n",
338 | " 2016-07-03 21:00:00 | \n",
339 | " France | \n",
340 | " Iceland | \n",
341 | " 5 | \n",
342 | " 2 | \n",
343 | "
\n",
344 | " \n",
345 | " | 283 | \n",
346 | " 2016-07-06 21:00:00 | \n",
347 | " Portugal | \n",
348 | " Wales | \n",
349 | " 2 | \n",
350 | " 0 | \n",
351 | "
\n",
352 | " \n",
353 | " | 284 | \n",
354 | " 2016-07-07 21:00:00 | \n",
355 | " Germany | \n",
356 | " France | \n",
357 | " 0 | \n",
358 | " 2 | \n",
359 | "
\n",
360 | " \n",
361 | " | 285 | \n",
362 | " 2016-07-10 21:00:00 | \n",
363 | " Portugal | \n",
364 | " France | \n",
365 | " 1 | \n",
366 | " 0 | \n",
367 | "
\n",
368 | " \n",
369 | "
\n",
370 | "
286 rows × 5 columns
\n",
371 | "
"
372 | ],
373 | "text/plain": [
374 | " date HomeTeamName AwayTeamName HomeTeamGoals \\\n",
375 | "0 1960-07-06 20:00:00 France Yugoslavia 4 \n",
376 | "1 1960-07-06 21:30:00 Czechoslovakia Soviet Union 0 \n",
377 | "2 1960-07-09 21:30:00 Czechoslovakia France 2 \n",
378 | "3 1960-07-10 21:30:00 Soviet Union Yugoslavia 2 \n",
379 | "4 1964-06-17 20:00:00 Spain Hungary 2 \n",
380 | ".. ... ... ... ... \n",
381 | "281 2016-07-02 21:00:00 Germany Italy 1 \n",
382 | "282 2016-07-03 21:00:00 France Iceland 5 \n",
383 | "283 2016-07-06 21:00:00 Portugal Wales 2 \n",
384 | "284 2016-07-07 21:00:00 Germany France 0 \n",
385 | "285 2016-07-10 21:00:00 Portugal France 1 \n",
386 | "\n",
387 | " AwayTeamGoals \n",
388 | "0 5 \n",
389 | "1 3 \n",
390 | "2 0 \n",
391 | "3 1 \n",
392 | "4 1 \n",
393 | ".. ... \n",
394 | "281 1 \n",
395 | "282 2 \n",
396 | "283 0 \n",
397 | "284 2 \n",
398 | "285 0 \n",
399 | "\n",
400 | "[286 rows x 5 columns]"
401 | ]
402 | },
403 | "execution_count": 21,
404 | "metadata": {},
405 | "output_type": "execute_result"
406 | }
407 | ],
408 | "source": [
409 | "clean_data_merged"
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "execution_count": 22,
415 | "id": "5ab062a3-705e-4e4f-a582-9e6307e49985",
416 | "metadata": {},
417 | "outputs": [],
418 | "source": [
419 | "def decide_y(home_goals, away_goals):\n",
420 | " if home_goals > away_goals:\n",
421 | " return 1\n",
422 | " elif home_goals < away_goals:\n",
423 | " return -1\n",
424 | " else:\n",
425 | " return 0\n",
426 | " \n",
427 | "labels = np.zeros(clean_data_merged.shape[0])\n",
428 | "for i, (home_goals, away_goals) in enumerate(clean_data_merged[['HomeTeamGoals', 'AwayTeamGoals']].values): \n",
429 | " if home_goals > away_goals:\n",
430 | " labels[i] = 1\n",
431 | " elif home_goals < away_goals:\n",
432 | " labels[i] = -1\n",
433 | " else:\n",
434 | " labels[i] = 0\n",
435 | "clean_data_merged['y_classif'] = labels"
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": 23,
441 | "id": "9d4eaf30-8a8f-43d7-8dc8-0fe0a031647a",
442 | "metadata": {},
443 | "outputs": [],
444 | "source": [
445 | "clean_data_merged['y_reg'] = clean_data_merged['HomeTeamGoals'] - clean_data_merged['AwayTeamGoals']"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": 24,
451 | "id": "dea8fd0b-9247-412a-bb62-0b884058d266",
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "data": {
456 | "text/html": [
457 | "\n",
458 | "\n",
471 | "
\n",
472 | " \n",
473 | " \n",
474 | " | \n",
475 | " date | \n",
476 | " HomeTeamName | \n",
477 | " AwayTeamName | \n",
478 | " HomeTeamGoals | \n",
479 | " AwayTeamGoals | \n",
480 | " y_classif | \n",
481 | " y_reg | \n",
482 | "
\n",
483 | " \n",
484 | " \n",
485 | " \n",
486 | " | 0 | \n",
487 | " 1960-07-06 20:00:00 | \n",
488 | " France | \n",
489 | " Yugoslavia | \n",
490 | " 4 | \n",
491 | " 5 | \n",
492 | " -1.0 | \n",
493 | " -1 | \n",
494 | "
\n",
495 | " \n",
496 | " | 1 | \n",
497 | " 1960-07-06 21:30:00 | \n",
498 | " Czechoslovakia | \n",
499 | " Soviet Union | \n",
500 | " 0 | \n",
501 | " 3 | \n",
502 | " -1.0 | \n",
503 | " -3 | \n",
504 | "
\n",
505 | " \n",
506 | " | 2 | \n",
507 | " 1960-07-09 21:30:00 | \n",
508 | " Czechoslovakia | \n",
509 | " France | \n",
510 | " 2 | \n",
511 | " 0 | \n",
512 | " 1.0 | \n",
513 | " 2 | \n",
514 | "
\n",
515 | " \n",
516 | " | 3 | \n",
517 | " 1960-07-10 21:30:00 | \n",
518 | " Soviet Union | \n",
519 | " Yugoslavia | \n",
520 | " 2 | \n",
521 | " 1 | \n",
522 | " 1.0 | \n",
523 | " 1 | \n",
524 | "
\n",
525 | " \n",
526 | " | 4 | \n",
527 | " 1964-06-17 20:00:00 | \n",
528 | " Spain | \n",
529 | " Hungary | \n",
530 | " 2 | \n",
531 | " 1 | \n",
532 | " 1.0 | \n",
533 | " 1 | \n",
534 | "
\n",
535 | " \n",
536 | " | ... | \n",
537 | " ... | \n",
538 | " ... | \n",
539 | " ... | \n",
540 | " ... | \n",
541 | " ... | \n",
542 | " ... | \n",
543 | " ... | \n",
544 | "
\n",
545 | " \n",
546 | " | 281 | \n",
547 | " 2016-07-02 21:00:00 | \n",
548 | " Germany | \n",
549 | " Italy | \n",
550 | " 1 | \n",
551 | " 1 | \n",
552 | " 0.0 | \n",
553 | " 0 | \n",
554 | "
\n",
555 | " \n",
556 | " | 282 | \n",
557 | " 2016-07-03 21:00:00 | \n",
558 | " France | \n",
559 | " Iceland | \n",
560 | " 5 | \n",
561 | " 2 | \n",
562 | " 1.0 | \n",
563 | " 3 | \n",
564 | "
\n",
565 | " \n",
566 | " | 283 | \n",
567 | " 2016-07-06 21:00:00 | \n",
568 | " Portugal | \n",
569 | " Wales | \n",
570 | " 2 | \n",
571 | " 0 | \n",
572 | " 1.0 | \n",
573 | " 2 | \n",
574 | "
\n",
575 | " \n",
576 | " | 284 | \n",
577 | " 2016-07-07 21:00:00 | \n",
578 | " Germany | \n",
579 | " France | \n",
580 | " 0 | \n",
581 | " 2 | \n",
582 | " -1.0 | \n",
583 | " -2 | \n",
584 | "
\n",
585 | " \n",
586 | " | 285 | \n",
587 | " 2016-07-10 21:00:00 | \n",
588 | " Portugal | \n",
589 | " France | \n",
590 | " 1 | \n",
591 | " 0 | \n",
592 | " 1.0 | \n",
593 | " 1 | \n",
594 | "
\n",
595 | " \n",
596 | "
\n",
597 | "
286 rows × 7 columns
\n",
598 | "
"
599 | ],
600 | "text/plain": [
601 | " date HomeTeamName AwayTeamName HomeTeamGoals \\\n",
602 | "0 1960-07-06 20:00:00 France Yugoslavia 4 \n",
603 | "1 1960-07-06 21:30:00 Czechoslovakia Soviet Union 0 \n",
604 | "2 1960-07-09 21:30:00 Czechoslovakia France 2 \n",
605 | "3 1960-07-10 21:30:00 Soviet Union Yugoslavia 2 \n",
606 | "4 1964-06-17 20:00:00 Spain Hungary 2 \n",
607 | ".. ... ... ... ... \n",
608 | "281 2016-07-02 21:00:00 Germany Italy 1 \n",
609 | "282 2016-07-03 21:00:00 France Iceland 5 \n",
610 | "283 2016-07-06 21:00:00 Portugal Wales 2 \n",
611 | "284 2016-07-07 21:00:00 Germany France 0 \n",
612 | "285 2016-07-10 21:00:00 Portugal France 1 \n",
613 | "\n",
614 | " AwayTeamGoals y_classif y_reg \n",
615 | "0 5 -1.0 -1 \n",
616 | "1 3 -1.0 -3 \n",
617 | "2 0 1.0 2 \n",
618 | "3 1 1.0 1 \n",
619 | "4 1 1.0 1 \n",
620 | ".. ... ... ... \n",
621 | "281 1 0.0 0 \n",
622 | "282 2 1.0 3 \n",
623 | "283 0 1.0 2 \n",
624 | "284 2 -1.0 -2 \n",
625 | "285 0 1.0 1 \n",
626 | "\n",
627 | "[286 rows x 7 columns]"
628 | ]
629 | },
630 | "execution_count": 24,
631 | "metadata": {},
632 | "output_type": "execute_result"
633 | }
634 | ],
635 | "source": [
636 | "clean_data_merged"
637 | ]
638 | },
639 | {
640 | "cell_type": "markdown",
641 | "id": "e641c09b-002e-4483-be91-1722f0856aaf",
642 | "metadata": {},
643 | "source": [
644 | "p > 1 - time da casa ganhou \n",
645 | "-1 < p < 1 - empate \n",
646 | "p < -1 - time de fora ganhou"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": 26,
652 | "id": "75e2c27c-447c-4cad-a5d6-0b64f60aa10d",
653 | "metadata": {},
654 | "outputs": [],
655 | "source": [
656 | "clean_data_merged.to_parquet(\"./data/clean_data_merged_v1.parquet\")"
657 | ]
658 | },
659 | {
660 | "cell_type": "code",
661 | "execution_count": null,
662 | "id": "1dbc1bda-8f00-4486-b4f4-2e56302925d1",
663 | "metadata": {},
664 | "outputs": [],
665 | "source": []
666 | }
667 | ],
668 | "metadata": {
669 | "kernelspec": {
670 | "display_name": "Python 3",
671 | "language": "python",
672 | "name": "python3"
673 | },
674 | "language_info": {
675 | "codemirror_mode": {
676 | "name": "ipython",
677 | "version": 3
678 | },
679 | "file_extension": ".py",
680 | "mimetype": "text/x-python",
681 | "name": "python",
682 | "nbconvert_exporter": "python",
683 | "pygments_lexer": "ipython3",
684 | "version": "3.8.5"
685 | }
686 | },
687 | "nbformat": 4,
688 | "nbformat_minor": 5
689 | }
690 |
--------------------------------------------------------------------------------
/euro2021/1_baselines.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "16f7a6a2-2588-45e5-a7c7-cd61c2699a42",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "id": "afe8030f-53f3-4d90-8eea-eb3211bd8294",
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "data = pd.read_parquet(\"./data/clean_data_merged_v1.parquet\")\n",
22 | "data['HomeTeamName'] = data['HomeTeamName'].str.strip()\n",
23 | "data['AwayTeamName'] = data['AwayTeamName'].str.strip()"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 3,
29 | "id": "9a825ea6-8f5c-4559-b49d-d767f843dcbb",
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "data": {
34 | "text/html": [
35 | "\n",
36 | "\n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " | \n",
53 | " date | \n",
54 | " HomeTeamName | \n",
55 | " AwayTeamName | \n",
56 | " HomeTeamGoals | \n",
57 | " AwayTeamGoals | \n",
58 | " y_classif | \n",
59 | " y_reg | \n",
60 | "
\n",
61 | " \n",
62 | " \n",
63 | " \n",
64 | " | 0 | \n",
65 | " 1960-07-06 20:00:00 | \n",
66 | " France | \n",
67 | " Yugoslavia | \n",
68 | " 4 | \n",
69 | " 5 | \n",
70 | " -1.0 | \n",
71 | " -1 | \n",
72 | "
\n",
73 | " \n",
74 | " | 1 | \n",
75 | " 1960-07-06 21:30:00 | \n",
76 | " Czechoslovakia | \n",
77 | " Soviet Union | \n",
78 | " 0 | \n",
79 | " 3 | \n",
80 | " -1.0 | \n",
81 | " -3 | \n",
82 | "
\n",
83 | " \n",
84 | " | 2 | \n",
85 | " 1960-07-09 21:30:00 | \n",
86 | " Czechoslovakia | \n",
87 | " France | \n",
88 | " 2 | \n",
89 | " 0 | \n",
90 | " 1.0 | \n",
91 | " 2 | \n",
92 | "
\n",
93 | " \n",
94 | " | 3 | \n",
95 | " 1960-07-10 21:30:00 | \n",
96 | " Soviet Union | \n",
97 | " Yugoslavia | \n",
98 | " 2 | \n",
99 | " 1 | \n",
100 | " 1.0 | \n",
101 | " 1 | \n",
102 | "
\n",
103 | " \n",
104 | " | 4 | \n",
105 | " 1964-06-17 20:00:00 | \n",
106 | " Spain | \n",
107 | " Hungary | \n",
108 | " 2 | \n",
109 | " 1 | \n",
110 | " 1.0 | \n",
111 | " 1 | \n",
112 | "
\n",
113 | " \n",
114 | "
\n",
115 | "
"
116 | ],
117 | "text/plain": [
118 | " date HomeTeamName AwayTeamName HomeTeamGoals \\\n",
119 | "0 1960-07-06 20:00:00 France Yugoslavia 4 \n",
120 | "1 1960-07-06 21:30:00 Czechoslovakia Soviet Union 0 \n",
121 | "2 1960-07-09 21:30:00 Czechoslovakia France 2 \n",
122 | "3 1960-07-10 21:30:00 Soviet Union Yugoslavia 2 \n",
123 | "4 1964-06-17 20:00:00 Spain Hungary 2 \n",
124 | "\n",
125 | " AwayTeamGoals y_classif y_reg \n",
126 | "0 5 -1.0 -1 \n",
127 | "1 3 -1.0 -3 \n",
128 | "2 0 1.0 2 \n",
129 | "3 1 1.0 1 \n",
130 | "4 1 1.0 1 "
131 | ]
132 | },
133 | "execution_count": 3,
134 | "metadata": {},
135 | "output_type": "execute_result"
136 | }
137 | ],
138 | "source": [
139 | "data.head(5)"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 4,
145 | "id": "2bf5eb6b-4696-4a1f-95af-824004e1a7d2",
146 | "metadata": {},
147 | "outputs": [],
148 | "source": [
149 | "def baseline_wins(df):\n",
150 | " home = df.groupby(\"HomeTeamName\")['y_classif'].mean()\n",
151 | " away = df.groupby(\"AwayTeamName\")['y_classif'].mean() * -1\n",
152 | " \n",
153 | " return 0.5*home + 0.5*away"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 5,
159 | "id": "926b213b-3715-4992-b114-bd714de33594",
160 | "metadata": {},
161 | "outputs": [],
162 | "source": [
163 | "from sklearn.metrics import classification_report, roc_auc_score"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 9,
169 | "id": "75e2c27c-447c-4cad-a5d6-0b64f60aa10d",
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "name": "stdout",
174 | "output_type": "stream",
175 | "text": [
176 | "2004 Baseline Historical wins 0.25806451612903225\n",
177 | "2004 Baseline Home Win 0.3548387096774194\n",
178 | " precision recall f1-score support\n",
179 | "\n",
180 | " -1.0 0.19 0.30 0.23 10\n",
181 | " 0.0 0.00 0.00 0.00 10\n",
182 | " 1.0 0.33 0.45 0.38 11\n",
183 | "\n",
184 | " accuracy 0.26 31\n",
185 | " macro avg 0.17 0.25 0.21 31\n",
186 | "weighted avg 0.18 0.26 0.21 31\n",
187 | "\n"
188 | ]
189 | },
190 | {
191 | "name": "stderr",
192 | "output_type": "stream",
193 | "text": [
194 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
195 | " _warn_prf(average, modifier, msg_start, len(result))\n",
196 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
197 | " _warn_prf(average, modifier, msg_start, len(result))\n",
198 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
199 | " _warn_prf(average, modifier, msg_start, len(result))\n"
200 | ]
201 | },
202 | {
203 | "ename": "AxisError",
204 | "evalue": "axis 1 is out of bounds for array of dimension 1",
205 | "output_type": "error",
206 | "traceback": [
207 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
208 | "\u001b[0;31mAxisError\u001b[0m Traceback (most recent call last)",
209 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m#print(classification_report(val['y_classif'], np.ones(val.shape[0])))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0myear\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"AUC Baseline Historical wins\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mroc_auc_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'y_classif'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'p'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmulti_class\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ovr'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
210 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
211 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_ranking.py\u001b[0m in \u001b[0;36mroc_auc_score\u001b[0;34m(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmulti_class\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'raise'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"multi_class must be in ('ovo', 'ovr')\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 537\u001b[0;31m return _multiclass_roc_auc_score(y_true, y_score, labels,\n\u001b[0m\u001b[1;32m 538\u001b[0m multi_class, average, sample_weight)\n\u001b[1;32m 539\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0my_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"binary\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
212 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_ranking.py\u001b[0m in \u001b[0;36m_multiclass_roc_auc_score\u001b[0;34m(y_true, y_score, labels, multi_class, average, sample_weight)\u001b[0m\n\u001b[1;32m 593\u001b[0m \"\"\"\n\u001b[1;32m 594\u001b[0m \u001b[0;31m# validation of the input y_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 595\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mallclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_score\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 596\u001b[0m raise ValueError(\n\u001b[1;32m 597\u001b[0m \u001b[0;34m\"Target scores need to be probabilities for multiclass \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
213 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/numpy/core/_methods.py\u001b[0m in \u001b[0;36m_sum\u001b[0;34m(a, axis, dtype, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 45\u001b[0m def _sum(a, axis=None, dtype=None, out=None, keepdims=False,\n\u001b[1;32m 46\u001b[0m initial=_NoValue, where=True):\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mumr_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdims\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minitial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwhere\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m def _prod(a, axis=None, dtype=None, out=None, keepdims=False,\n",
214 | "\u001b[0;31mAxisError\u001b[0m: axis 1 is out of bounds for array of dimension 1"
215 | ]
216 | }
217 | ],
218 | "source": [
219 | "for year in [2004, 2008, 2012, 2016]:\n",
220 | " tr = data[data['date'].dt.year < year].copy()\n",
221 | " val = data[data['date'].dt.year == year].copy()\n",
222 | " #print(year, tr.shape, val.shape)\n",
223 | " \n",
224 | " baseline = baseline_wins(tr)\n",
225 | " val['p'] = (val['HomeTeamName'].map(baseline) > val['AwayTeamName'].map(baseline)).astype(int)\n",
226 | " val['p'] = val['p'].map(lambda x: -1 if x == 0 else 1 )\n",
227 | " \n",
228 | " print(year,\"Baseline Historical wins\", (val['y_classif'] == val['p']).mean())\n",
229 | " print(year, \"Baseline Home Win\", (val['y_classif'] == np.ones(val.shape[0])).mean())\n",
230 | " print(classification_report(val['y_classif'], val['p']))\n",
231 | " #print(classification_report(val['y_classif'], np.ones(val.shape[0])))\n",
232 | " \n",
233 | " print(year,\"AUC Baseline Historical wins\", roc_auc_score(val['y_classif'], val['p'], multi_class='ovr'))\n",
234 | " print()\n",
235 | " \n",
236 | " \n",
237 | " "
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": null,
243 | "id": "53acd66c-03cf-45d3-90ed-2020a4fbab01",
244 | "metadata": {},
245 | "outputs": [],
246 | "source": []
247 | }
248 | ],
249 | "metadata": {
250 | "kernelspec": {
251 | "display_name": "Python 3",
252 | "language": "python",
253 | "name": "python3"
254 | },
255 | "language_info": {
256 | "codemirror_mode": {
257 | "name": "ipython",
258 | "version": 3
259 | },
260 | "file_extension": ".py",
261 | "mimetype": "text/x-python",
262 | "name": "python",
263 | "nbconvert_exporter": "python",
264 | "pygments_lexer": "ipython3",
265 | "version": "3.8.5"
266 | }
267 | },
268 | "nbformat": 4,
269 | "nbformat_minor": 5
270 | }
271 |
--------------------------------------------------------------------------------
/euro2021/README:
--------------------------------------------------------------------------------
1 | Material para os vídeos da Playlist: https://www.youtube.com/watch?v=5cI9YLfl5pA&list=PLwnip85KhroXqla3GfqVxDHzFBF3xLcAT
2 |
3 | Machine Learning na UEFA Euro 2021
4 |
--------------------------------------------------------------------------------
/live11_sucesso_musical/README:
--------------------------------------------------------------------------------
1 | Revelando os Segredos do Sucesso de uma Música com Machine Learning - Live de Data Science #11
2 | https://youtu.be/6UBReBQZGmo
3 |
--------------------------------------------------------------------------------
/live12_timeseries_prophet/README:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/live14_deploy/README:
--------------------------------------------------------------------------------
1 | Arquivos da live 14 - Como Colocar um Modelo de Machine Learning em Produção
2 | https://youtu.be/1hdZ0AVbQcw
3 |
4 |
5 | Dataset: https://www.kaggle.com/rogeriochaves/clickbait-buzzfeed-brasil
6 |
--------------------------------------------------------------------------------
/live14_deploy/app.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request
2 | import joblib as jb
3 | import json
4 |
5 | app = Flask(__name__)
6 |
7 | mdl = jb.load("mdl.pkl.z")
8 |
9 | @app.route("/") # decorator
10 | def main():
11 |
12 | print(request.args)
13 |
14 | title = request.args.get("titulo", default='')
15 | res = {"titulo": title, "p": mdl.predict_proba([title])[0][1]}
16 | return json.dumps(res)
17 |
18 | if __name__ == "__main__":
19 | app.run()
20 |
21 |
22 | #https://gunicorn.org/#quickstart
--------------------------------------------------------------------------------
/live14_deploy/mdl.pkl.z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ledmaster/notebooks_tutoriais/616c87c44f79bda5e1b0a2d33a2d5f7c31b86a08/live14_deploy/mdl.pkl.z
--------------------------------------------------------------------------------
/live15_covid/Countries Longitude and Latitude.csv:
--------------------------------------------------------------------------------
1 | ,longitude,latitude,name
2 | 0,33.791638,-84.389488,
3 | 1,33.791638,-84.389488,
4 | 2,33.93911,67.709953,Afghanistan
5 | 3,41.153332,20.168331,Albania
6 | 4,28.033886,1.659626,Algeria
7 | 5,-14.270972,-170.132217,American Samoa
8 | 6,42.506285,1.521801,Andorra
9 | 7,-11.202692,17.873887,Angola
10 | 8,18.220554,-63.06861499999999,Anguilla
11 | 9,-82.862752,135.0,Antarctica
12 | 10,17.060816,-61.796428,Antigua & Barbuda
13 | 11,-38.416097,-63.61667199999999,Argentina
14 | 12,40.069099,45.038189,Armenia
15 | 13,12.52111,-69.968338,Aruba
16 | 14,-25.274398,133.775136,Australia
17 | 15,47.516231,14.550072,Austria
18 | 16,40.143105,47.576927,Azerbaijan
19 | 17,25.03428,-77.39627999999999,Bahamas
20 | 18,26.0667,50.5577,Bahrain
21 | 19,23.684994,90.356331,Bangladesh
22 | 20,13.193887,-59.543198,Barbados
23 | 21,53.709807,27.953389,Belarus
24 | 22,50.503887,4.469936,Belgium
25 | 23,17.189877,-88.49765,Belize
26 | 24,9.30769,2.315834,Benin
27 | 25,32.3078,-64.7505,Bermuda
28 | 26,27.514162,90.433601,Bhutan
29 | 27,-16.290154,-63.58865299999999,Bolivia
30 | 28,43.915886,17.679076,Bosnia
31 | 29,-22.328474,24.684866,Botswana
32 | 30,-54.4207915,3.3464497,Bouvet Island
33 | 31,-14.235004,-51.92528,Brazil
34 | 32,-6.343194,71.876519,British Indian Ocean Territory
35 | 33,18.420695,-64.639968,British Virgin Islands
36 | 34,4.535277,114.727669,Brunei
37 | 35,42.733883,25.48583,Bulgaria
38 | 36,12.238333,-1.561593,Burkina Faso
39 | 37,-3.373056,29.918886,Burundi
40 | 38,12.565679,104.990963,Cambodia
41 | 39,7.369721999999999,12.354722,Cameroon
42 | 40,56.130366,-106.346771,Canada
43 | 41,16.5388,-23.0418,Cape Verde
44 | 42,12.1783611,-68.2385339,Caribbean Netherlands
45 | 43,19.3133,-81.2546,Cayman Islands
46 | 44,6.611110999999999,20.939444,Central African Republic
47 | 45,15.454166,18.732207,Chad
48 | 46,-35.675147,-71.542969,Chile
49 | 47,35.86166,104.195397,China
50 | 48,-10.447525,105.690449,Christmas Island
51 | 49,-12.164165,96.87095599999999,Cocos (Keeling) Islands
52 | 50,4.570868,-74.297333,Colombia
53 | 51,-11.6455,43.3333,Comoros
54 | 52,-0.228021,15.827659,Congo - Brazzaville
55 | 53,-4.038333,21.758664,Congo - Kinshasa
56 | 54,-21.236736,-159.777671,Cook Islands
57 | 55,9.748916999999999,-83.753428,Costa Rica
58 | 56,45.1,15.2000001,Croatia
59 | 57,21.521757,-77.781167,Cuba
60 | 58,12.16957,-68.99002,Curaçao
61 | 59,35.126413,33.429859,Cyprus
62 | 60,49.81749199999999,15.472962,Czech Republic
63 | 61,7.539988999999999,-5.547079999999999,Côte d’Ivoire
64 | 62,56.26392,9.501785,Denmark
65 | 63,11.825138,42.590275,Djibouti
66 | 64,15.414999,-61.37097600000001,Dominica
67 | 65,18.735693,-70.162651,Dominican Republic
68 | 66,-1.831239,-78.18340599999999,Ecuador
69 | 67,26.820553,30.802498,Egypt
70 | 68,13.794185,-88.89653,El Salvador
71 | 69,1.650801,10.267895,Equatorial Guinea
72 | 70,15.179384,39.782334,Eritrea
73 | 71,58.595272,25.0136071,Estonia
74 | 72,9.145000000000001,40.489673,Ethiopia
75 | 73,-51.796253,-59.523613,Falkland Islands
76 | 74,61.89263500000001,-6.9118061,Faroe Islands
77 | 75,-17.713371,178.065032,Fiji
78 | 76,61.92410999999999,25.7481511,Finland
79 | 77,46.227638,2.213749,France
80 | 78,3.933889,-53.125782,French Guiana
81 | 79,-17.679742,-149.406843,French Polynesia
82 | 80,-49.280366,69.3485571,French Southern Territories
83 | 81,-0.803689,11.609444,Gabon
84 | 82,13.443182,-15.310139,Gambia
85 | 83,32.1656221,-82.9000751,Georgia
86 | 84,51.165691,10.451526,Germany
87 | 85,7.946527,-1.023194,Ghana
88 | 86,36.140751,-5.353585,Gibraltar
89 | 87,39.074208,21.824312,Greece
90 | 88,71.706936,-42.604303,Greenland
91 | 89,12.1165,-61.67899999999999,Grenada
92 | 90,16.265,-61.55099999999999,Guadeloupe
93 | 91,13.444304,144.793731,Guam
94 | 92,15.783471,-90.23075899999999,Guatemala
95 | 93,49.465691,-2.585278,Guernsey
96 | 94,9.945587,-9.696645,Guinea
97 | 95,11.803749,-15.180413,Guinea-Bissau
98 | 96,4.860416,-58.93018,Guyana
99 | 97,18.971187,-72.285215,Haiti
100 | 98,-53.08181,73.50415799999999,Heard & McDonald Islands
101 | 99,15.199999,-86.241905,Honduras
102 | 100,22.396428,114.109497,Hong Kong
103 | 101,47.162494,19.5033041,Hungary
104 | 102,64.963051,-19.020835,Iceland
105 | 103,20.593684,78.96288,India
106 | 104,-0.789275,113.921327,Indonesia
107 | 105,32.427908,53.688046,Iran
108 | 106,33.223191,43.679291,Iraq
109 | 107,53.1423672,-7.692053599999999,Ireland
110 | 108,54.236107,-4.548056,Isle of Man
111 | 109,31.046051,34.851612,Israel
112 | 110,41.87194,12.56738,Italy
113 | 111,18.109581,-77.297508,Jamaica
114 | 112,36.204824,138.252924,Japan
115 | 113,49.214439,-2.13125,Jersey
116 | 114,30.585164,36.238414,Jordan
117 | 115,48.019573,66.923684,Kazakhstan
118 | 116,-0.023559,37.906193,Kenya
119 | 117,-3.370417,-168.734039,Kiribati
120 | 118,29.31166,47.481766,Kuwait
121 | 119,41.20438,74.766098,Kyrgyzstan
122 | 120,19.85627,102.495496,Laos
123 | 121,56.879635,24.603189,Latvia
124 | 122,33.854721,35.862285,Lebanon
125 | 123,-29.609988,28.233608,Lesotho
126 | 124,6.428055,-9.429499000000002,Liberia
127 | 125,26.3351,17.228331,Libya
128 | 126,47.166,9.555373,Liechtenstein
129 | 127,55.169438,23.881275,Lithuania
130 | 128,49.815273,6.129582999999999,Luxembourg
131 | 129,22.198745,113.543873,Macau
132 | 130,41.608635,21.745275,Macedonia
133 | 131,-18.766947,46.869107,Madagascar
134 | 132,-13.254308,34.301525,Malawi
135 | 133,4.210484,101.975766,Malaysia
136 | 134,3.202778,73.22068,Maldives
137 | 135,17.570692,-3.996166,Mali
138 | 136,35.937496,14.375416,Malta
139 | 137,7.131474,171.184478,Marshall Islands
140 | 138,14.641528,-61.024174,Martinique
141 | 139,21.00789,-10.940835,Mauritania
142 | 140,-20.348404,57.55215200000001,Mauritius
143 | 141,-12.8275,45.166244,Mayotte
144 | 142,23.634501,-102.552784,Mexico
145 | 143,7.425554,150.550812,Micronesia
146 | 144,47.411631,28.369885,Moldova
147 | 145,43.73841760000001,7.424615799999999,Monaco
148 | 146,46.862496,103.846656,Mongolia
149 | 147,42.708678,19.37439,Montenegro
150 | 148,16.742498,-62.187366,Montserrat
151 | 149,31.791702,-7.092619999999999,Morocco
152 | 150,-18.665695,35.529562,Mozambique
153 | 151,21.916221,95.955974,Myanmar
154 | 152,-22.95764,18.49041,Namibia
155 | 153,-0.522778,166.931503,Nauru
156 | 154,28.394857,84.12400799999999,Nepal
157 | 155,52.132633,5.291265999999999,Netherlands
158 | 156,-20.904305,165.618042,New Caledonia
159 | 157,-40.900557,174.885971,New Zealand
160 | 158,12.865416,-85.207229,Nicaragua
161 | 159,17.607789,8.081666,Niger
162 | 160,9.081999,8.675277,Nigeria
163 | 161,-19.054445,-169.867233,Niue
164 | 162,-29.040835,167.954712,Norfolk Island
165 | 163,40.339852,127.510093,North Korea
166 | 164,15.0979,145.6739,Northern Mariana Islands
167 | 165,60.47202399999999,8.468945999999999,Norway
168 | 166,21.4735329,55.975413,Oman
169 | 167,30.375321,69.34511599999999,Pakistan
170 | 168,7.514979999999999,134.58252,Palau
171 | 169,31.952162,35.233154,Palestine
172 | 170,8.537981,-80.782127,Panama
173 | 171,-6.314992999999999,143.95555,Papua New Guinea
174 | 172,-23.442503,-58.443832,Paraguay
175 | 173,-9.189967,-75.015152,Peru
176 | 174,12.879721,121.774017,Philippines
177 | 175,-24.3767537,-128.3242376,Pitcairn Islands
178 | 176,51.919438,19.145136,Poland
179 | 177,39.39987199999999,-8.224454,Portugal
180 | 178,18.220833,-66.590149,Puerto Rico
181 | 179,25.354826,51.183884,Qatar
182 | 180,45.943161,24.96676,Romania
183 | 181,61.52401,105.318756,Russia
184 | 182,-1.940278,29.873888,Rwanda
185 | 183,-21.115141,55.536384,Réunion
186 | 184,-13.759029,-172.104629,Samoa
187 | 185,43.94236,12.457777,San Marino
188 | 186,23.885942,45.079162,Saudi Arabia
189 | 187,14.497401,-14.452362,Senegal
190 | 188,44.016521,21.005859,Serbia
191 | 189,-4.679574,55.491977,Seychelles
192 | 190,8.460555,-11.779889,Sierra Leone
193 | 191,1.352083,103.819836,Singapore
194 | 192,18.04248,-63.05483,Sint Maarten
195 | 193,48.669026,19.699024,Slovakia
196 | 194,46.151241,14.995463,Slovenia
197 | 195,-9.64571,160.156194,Solomon Islands
198 | 196,5.152149,46.199616,Somalia
199 | 197,-30.559482,22.937506,South Africa
200 | 198,-54.429579,-36.587909,South Georgia & South Sandwich Islands
201 | 199,35.907757,127.766922,South Korea
202 | 200,6.876991899999999,31.3069788,South Sudan
203 | 201,40.46366700000001,-3.74922,Spain
204 | 202,7.873053999999999,80.77179699999999,Sri Lanka
205 | 203,17.9,-62.833333,St. Barthélemy
206 | 204,-15.9650104,-5.7089241,St. Helena
207 | 205,17.357822,-62.782998,St. Kitts & Nevis
208 | 206,13.909444,-60.978893,St. Lucia
209 | 207,18.0708298,-63.0500809,St. Martin
210 | 208,46.8852,-56.3159,St. Pierre & Miquelon
211 | 209,12.984305,-61.287228,St. Vincent & Grenadines
212 | 210,12.862807,30.217636,Sudan
213 | 211,3.919305,-56.027783,Suriname
214 | 212,77.55360399999999,23.6702719,Svalbard & Jan Mayen
215 | 213,-26.522503,31.465866,Swaziland
216 | 214,60.12816100000001,18.643501,Sweden
217 | 215,46.818188,8.227511999999999,Switzerland
218 | 216,34.80207499999999,38.996815,Syria
219 | 217,0.18636,6.613080999999999,São Tomé & Príncipe
220 | 218,23.69781,120.960515,Taiwan
221 | 219,38.861034,71.276093,Tajikistan
222 | 220,-6.369028,34.888822,Tanzania
223 | 221,15.870032,100.992541,Thailand
224 | 222,-8.874217,125.727539,Timor-Leste
225 | 223,8.619543,0.824782,Togo
226 | 224,-9.200199999999999,-171.8484,Tokelau
227 | 225,-21.178986,-175.198242,Tonga
228 | 226,10.691803,-61.222503,Trinidad & Tobago
229 | 227,33.886917,9.537499,Tunisia
230 | 228,38.963745,35.243322,Turkey
231 | 229,38.969719,59.556278,Turkmenistan
232 | 230,21.694025,-71.797928,Turks & Caicos Islands
233 | 231,-7.109534999999999,177.64933,Tuvalu
234 | 232,19.2823192,166.647047,U.S. Outlying Islands
235 | 233,18.335765,-64.896335,U.S. Virgin Islands
236 | 234,55.378051,-3.435973,UK
237 | 235,40.7605367,-73.9788903,US
238 | 236,1.373333,32.290275,Uganda
239 | 237,48.379433,31.1655799,Ukraine
240 | 238,23.424076,53.847818,United Arab Emirates
241 | 239,-32.522779,-55.765835,Uruguay
242 | 240,41.377491,64.585262,Uzbekistan
243 | 241,-15.376706,166.959158,Vanuatu
244 | 242,41.902916,12.453389,Vatican City
245 | 243,6.42375,-66.58973,Venezuela
246 | 244,14.058324,108.277199,Vietnam
247 | 245,-14.2938,-178.1165,Wallis & Futuna
248 | 246,24.215527,-12.885834,Western Sahara
249 | 247,15.552727,48.516388,Yemen
250 | 248,-13.133897,27.849332,Zambia
251 | 249,-19.015438,29.154857,Zimbabwe
252 | 250,60.1785247,19.9156105,Åland Islands
253 |
--------------------------------------------------------------------------------
/live15_covid/README:
--------------------------------------------------------------------------------
1 | Arquivos da live "Coronavírus: Prevendo Próximos Países a Descobrirem Casos Confirmados - Live de Data Science #15"
2 | https://youtu.be/zg_Y8qNCKto
3 |
--------------------------------------------------------------------------------
/live15_covid/countries and continents.csv:
--------------------------------------------------------------------------------
1 | name,official_name_en,official_name_fr,ISO3166-1-Alpha-2,ISO3166-1-Alpha-3,M49,ITU,MARC,WMO,DS,Dial,FIFA,FIPS,GAUL,IOC,ISO4217-currency_alphabetic_code,ISO4217-currency_country_name,ISO4217-currency_minor_unit,ISO4217-currency_name,ISO4217-currency_numeric_code,is_independent,Capital,Continent,TLD,Languages,Geoname ID,EDGAR
2 | ,Channel Islands,Îles Anglo-Normandes,,,830,,,,,,,,,,,,,,,,,,,,,
3 | ,Sark,Sercq,,,680,,,,,,,,,,,,,,,,,,,,,
4 | Afghanistan,Afghanistan,Afghanistan,AF,AFG,4,AFG,af,AF,AFG,93,AFG,AF,1,AFG,AFN,AFGHANISTAN,2,Afghani,971,Yes,Kabul,AS,.af,"fa-AF,ps,uz-AF,tk",1149361,B2
5 | Albania,Albania,Albanie,AL,ALB,8,ALB,aa,AB,AL,355,ALB,AL,3,ALB,ALL,ALBANIA,2,Lek,8,Yes,Tirana,EU,.al,"sq,el",783754,B3
6 | Algeria,Algeria,Algérie,DZ,DZA,12,ALG,ae,AL,DZ,213,ALG,AG,4,ALG,DZD,ALGERIA,2,Algerian Dinar,12,Yes,Algiers,AF,.dz,ar-DZ,2589581,B4
7 | American Samoa,American Samoa,Samoa américaines,AS,ASM,16,SMA,as,,USA,1-684,ASA,AQ,5,ASA,USD,AMERICAN SAMOA,2,US Dollar,840,Territory of US,Pago Pago,OC,.as,"en-AS,sm,to",5880801,B5
8 | Andorra,Andorra,Andorre,AD,AND,20,AND,an,,AND,376,AND,AN,7,AND,EUR,ANDORRA,2,Euro,978,Yes,Andorra la Vella,EU,.ad,ca,3041565,B6
9 | Angola,Angola,Angola,AO,AGO,24,AGL,ao,AN,AO,244,ANG,AO,8,ANG,AOA,ANGOLA,2,Kwanza,973,Yes,Luanda,AF,.ao,pt-AO,3351879,B7
10 | Anguilla,Anguilla,Anguilla,AI,AIA,660,AIA,am,,,1-264,AIA,AV,9,AIA,XCD,ANGUILLA,2,East Caribbean Dollar,951,Territory of GB,The Valley,NA,.ai,en-AI,3573511,1A
11 | Antarctica,,,AQ,ATA,10,,ay,AA,,672,ROS,AY,10,,,,,,,International,,AN,.aq,,6697173,
12 | Antigua & Barbuda,Antigua and Barbuda,Antigua-et-Barbuda,AG,ATG,28,ATG,aq,AT,,1-268,ATG,AC,11,ANT,XCD,ANTIGUA AND BARBUDA,2,East Caribbean Dollar,951,Yes,St. John's,NA,.ag,en-AG,3576396,B9
13 | Argentina,Argentina,Argentine,AR,ARG,32,ARG,ag,AG,RA,54,ARG,AR,12,ARG,ARS,ARGENTINA,2,Argentine Peso,32,Yes,Buenos Aires,SA,.ar,"es-AR,en,it,de,fr,gn",3865483,C1
14 | Armenia,Armenia,Arménie,AM,ARM,51,ARM,ai,AY,AM,374,ARM,AM,13,ARM,AMD,ARMENIA,2,Armenian Dram,51,Yes,Yerevan,AS,.am,hy,174982,1B
15 | Aruba,Aruba,Aruba,AW,ABW,533,ABW,aw,NU,AW,297,ARU,AA,14,ARU,AWG,ARUBA,2,Aruban Florin,533,Part of NL,Oranjestad,NA,.aw,"nl-AW,es,en",3577279,1C
16 | Australia,Australia,Australie,AU,AUS,36,AUS,at,AU,AUS,61,AUS,AS,17,AUS,AUD,AUSTRALIA,2,Australian Dollar,36,Yes,Canberra,OC,.au,en-AU,2077456,C3
17 | Austria,Austria,Autriche,AT,AUT,40,AUT,au,OS,A,43,AUT,AU,18,AUT,EUR,AUSTRIA,2,Euro,978,Yes,Vienna,EU,.at,"de-AT,hr,hu,sl",2782113,C4
18 | Azerbaijan,Azerbaijan,Azerbaïdjan,AZ,AZE,31,AZE,aj,AJ,AZ,994,AZE,AJ,19,AZE,AZN,AZERBAIJAN,2,Azerbaijanian Manat,944,Yes,Baku,AS,.az,"az,ru,hy",587116,1D
19 | Bahamas,Bahamas,Bahamas,BS,BHS,44,BAH,bf,BA,BS,1-242,BAH,BF,20,BAH,BSD,BAHAMAS,2,Bahamian Dollar,44,Yes,Nassau,NA,.bs,en-BS,3572887,C5
20 | Bahrain,Bahrain,Bahreïn,BH,BHR,48,BHR,ba,BN,BRN,973,BHR,BA,21,BRN,BHD,BAHRAIN,3,Bahraini Dinar,48,Yes,Manama,AS,.bh,"ar-BH,en,fa,ur",290291,C6
21 | Bangladesh,Bangladesh,Bangladesh,BD,BGD,50,BGD,bg,BW,BD,880,BAN,BG,23,BAN,BDT,BANGLADESH,2,Taka,50,Yes,Dhaka,AS,.bd,"bn-BD,en",1210997,C7
22 | Barbados,Barbados,Barbade,BB,BRB,52,BRB,bb,BR,BDS,1-246,BRB,BB,24,BAR,BBD,BARBADOS,2,Barbados Dollar,52,Yes,Bridgetown,NA,.bb,en-BB,3374084,C8
23 | Belarus,Belarus,Bélarus,BY,BLR,112,BLR,bw,BY,BY,375,BLR,BO,26,BLR,BYR,BELARUS,0,Belarussian Ruble,974,Yes,Minsk,EU,.by,"be,ru",630336,1F
24 | Belgium,Belgium,Belgique,BE,BEL,56,BEL,be,BX,B,32,BEL,BE,27,BEL,EUR,BELGIUM,2,Euro,978,Yes,Brussels,EU,.be,"nl-BE,fr-BE,de-BE",2802361,C9
25 | Belize,Belize,Belize,BZ,BLZ,84,BLZ,bh,BH,BH,501,BLZ,BH,28,BIZ,BZD,BELIZE,2,Belize Dollar,84,Yes,Belmopan,NA,.bz,"en-BZ,es",3582678,D1
26 | Benin,Benin,Bénin,BJ,BEN,204,BEN,dm,BJ,DY,229,BEN,BN,29,BEN,XOF,BENIN,0,CFA Franc BCEAO,952,Yes,Porto-Novo,AF,.bj,fr-BJ,2395170,G6
27 | Bermuda,Bermuda,Bermudes,BM,BMU,60,BER,bm,BE,BM,1-441,BER,BD,30,BER,BMD,BERMUDA,2,Bermudian Dollar,60,Territory of GB,Hamilton,NA,.bm,"en-BM,pt",3573345,D0
28 | Bhutan,Bhutan,Bhoutan,BT,BTN,64,BTN,bt,,BT,975,BHU,BT,31,BHU,INR,BHUTAN,2,Indian Rupee,356,Yes,Thimphu,AS,.bt,dz,1252634,D2
29 | Bolivia,Bolivia (Plurinational State of),Bolivie (État plurinational de),BO,BOL,68,BOL,bo,BO,BOL,591,BOL,BL,33,BOL,BOB,"BOLIVIA, PLURINATIONAL STATE OF",2,Boliviano,68,Yes,Sucre,SA,.bo,"es-BO,qu,ay",3923057,
30 | Bosnia,Bosnia and Herzegovina,Bosnie-Herzégovine,BA,BIH,70,BIH,bn,BG,BIH,387,BIH,BK,34,BIH,BAM,BOSNIA AND HERZEGOVINA,2,Convertible Mark,977,Yes,Sarajevo,EU,.ba,"bs,hr-BA,sr-BA",3277605,1E
31 | Botswana,Botswana,Botswana,BW,BWA,72,BOT,bs,BC,BW,267,BOT,BC,35,BOT,BWP,BOTSWANA,2,Pula,72,Yes,Gaborone,AF,.bw,"en-BW,tn-BW",933860,B1
32 | Bouvet Island,,,BV,BVT,74,,bv,BV,BV,47,,BV,36,,,,,,,Territory of NO,,AN,.bv,,3371123,
33 | Brazil,Brazil,Brésil,BR,BRA,76,B,bl,BZ,BR,55,BRA,BR,37,BRA,BRL,BRAZIL,2,Brazilian Real,986,Yes,Brasilia,SA,.br,"pt-BR,es,en,fr",3469034,D5
34 | British Indian Ocean Territory,,,IO,IOT,86,BIO,bi,,,246,,IO,38,,,,,,,Territory of GB,Diego Garcia,AS,.io,en-IO,1282588,
35 | British Virgin Islands,British Virgin Islands,Îles Vierges britanniques,VG,VGB,92,VRG,vb,VI,BVI,1-284,VGB,VI,39,IVB,USD,VIRGIN ISLANDS (BRITISH),2,US Dollar,840,Territory of GB,Road Town,NA,.vg,en-VG,3577718,
36 | Brunei,Brunei Darussalam,Brunéi Darussalam,BN,BRN,96,BRU,bx,BD,BRU,673,BRU,BX,40,BRU,BND,BRUNEI DARUSSALAM,2,Brunei Dollar,96,Yes,Bandar Seri Begawan,AS,.bn,"ms-BN,en-BN",1820814,D9
37 | Bulgaria,Bulgaria,Bulgarie,BG,BGR,100,BUL,bu,BU,BG,359,BUL,BU,41,BUL,BGN,BULGARIA,2,Bulgarian Lev,975,Yes,Sofia,EU,.bg,"bg,tr-BG,rom",732800,E0
38 | Burkina Faso,Burkina Faso,Burkina Faso,BF,BFA,854,BFA,uv,HV,BF,226,BFA,UV,42,BUR,XOF,BURKINA FASO,0,CFA Franc BCEAO,952,Yes,Ouagadougou,AF,.bf,fr-BF,2361809,X2
39 | Burundi,Burundi,Burundi,BI,BDI,108,BDI,bd,BI,RU,257,BDI,BY,43,BDI,BIF,BURUNDI,0,Burundi Franc,108,Yes,Bujumbura,AF,.bi,"fr-BI,rn",433561,E2
40 | Cambodia,Cambodia,Cambodge,KH,KHM,116,CBG,cb,KP,K,855,CAM,CB,44,CAM,KHR,CAMBODIA,2,Riel,116,Yes,Phnom Penh,AS,.kh,"km,fr,en",1831722,E3
41 | Cameroon,Cameroon,Cameroun,CM,CMR,120,CME,cm,CM,CAM,237,CMR,CM,45,CMR,XAF,CAMEROON,0,CFA Franc BEAC,950,Yes,Yaounde,AF,.cm,"en-CM,fr-CM",2233387,E4
42 | Canada,Canada,Canada,CA,CAN,124,CAN,xxc,CN,CDN,1,CAN,CA,46,CAN,CAD,CANADA,2,Canadian Dollar,124,Yes,Ottawa,NA,.ca,"en-CA,fr-CA,iu",6251999,
43 | Cape Verde,Cabo Verde,Cabo Verde,CV,CPV,132,CPV,cv,CV,CV,238,CPV,CV,47,CPV,CVE,CABO VERDE,2,Cabo Verde Escudo,132,Yes,Praia,AF,.cv,pt-CV,3374766,
44 | Caribbean Netherlands,"Bonaire, Sint Eustatius and Saba","Bonaire, Saint-Eustache et Saba",BQ,BES,535,ATN,ca,NU,NA,599,ANT,NL,176,AHO,USD,"BONAIRE, SINT EUSTATIUS AND SABA",2,US Dollar,840,Part of NL,,NA,.bq,"nl,pap,en",7626844,
45 | Cayman Islands,Cayman Islands,Îles Caïmanes,KY,CYM,136,CYM,cj,GC,KY,1-345,CAY,CJ,48,CAY,KYD,CAYMAN ISLANDS,2,Cayman Islands Dollar,136,Territory of GB,George Town,NA,.ky,en-KY,3580718,E9
46 | Central African Republic,Central African Republic,République centrafricaine,CF,CAF,140,CAF,cx,CE,RCA,236,CTA,CT,49,CAF,XAF,CENTRAL AFRICAN REPUBLIC,0,CFA Franc BEAC,950,Yes,Bangui,AF,.cf,"fr-CF,sg,ln,kg",239880,F0
47 | Chad,Chad,Tchad,TD,TCD,148,TCD,cd,CD,TCH,235,CHA,CD,50,CHA,XAF,CHAD,0,CFA Franc BEAC,950,Yes,N'Djamena,AF,.td,"fr-TD,ar-TD,sre",2434508,F2
48 | Chile,Chile,Chili,CL,CHL,152,CHL,cl,CH,RCH,56,CHI,CI,51,CHI,CLP,CHILE,0,Chilean Peso,152,Yes,Santiago,SA,.cl,es-CL,3895114,F3
49 | China,China,Chine,CN,CHN,156,CHN,cc,CI,CN,86,CHN,CH,53,CHN,CNY,CHINA,2,Yuan Renminbi,156,Yes,Beijing,AS,.cn,"zh-CN,yue,wuu,dta,ug,za",1814991,F4
50 | Christmas Island,,,CX,CXR,162,CHR,xa,KI,AUS,61,CXR,KT,54,,,,,,,Territory of AU,Flying Fish Cove,AS,.cx,"en,zh,ms-CC",2078138,
51 | Cocos (Keeling) Islands,,,CC,CCK,166,ICO,xb,KK,AUS,61,CCK,CK,56,,,,,,,Territory of AU,West Island,AS,.cc,"ms-CC,en",1547376,
52 | Colombia,Colombia,Colombie,CO,COL,170,CLM,ck,CO,CO,57,COL,CO,57,COL,COP,COLOMBIA,2,Colombian Peso,170,Yes,Bogota,SA,.co,es-CO,3686110,F8
53 | Comoros,Comoros,Comores,KM,COM,174,COM,cq,IC,KM,269,COM,CN,58,COM,KMF,COMOROS,0,Comoro Franc,174,Yes,Moroni,AF,.km,"ar,fr-KM",921929,F9
54 | Congo - Brazzaville,Congo,Congo,CG,COG,178,COG,cf,CG,RCB,242,CGO,CF,59,CGO,XAF,CONGO,0,CFA Franc BEAC,950,Yes,Brazzaville,AF,.cg,"fr-CG,kg,ln-CG",2260494,G0
55 | Congo - Kinshasa,Democratic Republic of the Congo,République démocratique du Congo,CD,COD,180,COD,cg,ZR,ZRE,243,COD,CG,68,COD,,,,,,Yes,Kinshasa,AF,.cd,"fr-CD,ln,kg",203312,
56 | Cook Islands,Cook Islands,Îles Cook,CK,COK,184,CKH,cw,KU,NZ,682,COK,CW,60,COK,NZD,COOK ISLANDS,2,New Zealand Dollar,554,Associated with NZ,Avarua,OC,.ck,"en-CK,mi",1899402,G1
57 | Costa Rica,Costa Rica,Costa Rica,CR,CRI,188,CTR,cr,CS,CR,506,CRC,CS,61,CRC,CRC,COSTA RICA,2,Costa Rican Colon,188,Yes,San Jose,NA,.cr,"es-CR,en",3624060,G2
58 | Croatia,Croatia,Croatie,HR,HRV,191,HRV,ci,RH,HR,385,CRO,HR,62,CRO,HRK,CROATIA,2,Croatian Kuna,191,Yes,Zagreb,EU,.hr,"hr-HR,sr",3202326,1M
59 | Cuba,Cuba,Cuba,CU,CUB,192,CUB,cu,CU,C,53,CUB,CU,63,CUB,CUP,CUBA,2,Cuban Peso,192,Yes,Havana,NA,.cu,es-CU,3562981,G3
60 | Curaçao,Curaçao,Curaçao,CW,CUW,531,,co,,,599,,UC,,,ANG,CURAÇAO,2,Netherlands Antillean Guilder,532,Part of NL,Willemstad,NA,.cw,"nl,pap",7626836,
61 | Cyprus,Cyprus,Chypre,CY,CYP,196,CYP,cy,CY,CY,357,CYP,CY,64,CYP,EUR,CYPRUS,2,Euro,978,Yes,Nicosia,EU,.cy,"el-CY,tr-CY,en",146669,G4
62 | Czech Republic,Czechia,Tchéquie,CZ,CZE,203,CZE,xr,CZ,CZ,420,CZE,EZ,65,CZE,,,,,,Yes,Prague,EU,.cz,"cs,sk",3077311,
63 | Côte d’Ivoire,Côte d'Ivoire,Côte d'Ivoire,CI,CIV,384,CTI,iv,IV,CI,225,CIV,IV,66,CIV,XOF,CÔTE D'IVOIRE,0,CFA Franc BCEAO,952,Yes,Yamoussoukro,AF,.ci,fr-CI,2287781,
64 | Denmark,Denmark,Danemark,DK,DNK,208,DNK,dk,DN,DK,45,DEN,DA,69,DEN,DKK,DENMARK,2,Danish Krone,208,Yes,Copenhagen,EU,.dk,"da-DK,en,fo,de-DK",2623032,G7
65 | Djibouti,Djibouti,Djibouti,DJ,DJI,262,DJI,ft,DJ,F,253,DJI,DJ,70,DJI,DJF,DJIBOUTI,0,Djibouti Franc,262,Yes,Djibouti,AF,.dj,"fr-DJ,ar,so-DJ,aa",223816,1G
66 | Dominica,Dominica,Dominique,DM,DMA,212,DMA,dq,DO,WD,1-767,DMA,DO,71,DMA,XCD,DOMINICA,2,East Caribbean Dollar,951,Yes,Roseau,NA,.dm,en-DM,3575830,G9
67 | Dominican Republic,Dominican Republic,République dominicaine,DO,DOM,214,DOM,dr,DR,DOM,"1-809,1-829,1-849",DOM,DR,72,DOM,DOP,DOMINICAN REPUBLIC,2,Dominican Peso,214,Yes,Santo Domingo,NA,.do,es-DO,3508796,G8
68 | Ecuador,Ecuador,Équateur,EC,ECU,218,EQA,ec,EQ,EC,593,ECU,EC,73,ECU,USD,ECUADOR,2,US Dollar,840,Yes,Quito,SA,.ec,es-EC,3658394,H1
69 | Egypt,Egypt,Égypte,EG,EGY,818,EGY,ua,EG,ET,20,EGY,EG,40765,EGY,EGP,EGYPT,2,Egyptian Pound,818,Yes,Cairo,AF,.eg,"ar-EG,en,fr",357994,H2
70 | El Salvador,El Salvador,El Salvador,SV,SLV,222,SLV,es,ES,ES,503,SLV,ES,75,ESA,USD,EL SALVADOR,2,US Dollar,840,Yes,San Salvador,NA,.sv,es-SV,3585968,H3
71 | Equatorial Guinea,Equatorial Guinea,Guinée équatoriale,GQ,GNQ,226,GNE,eg,GQ,EQ,240,EQG,EK,76,GEQ,XAF,EQUATORIAL GUINEA,0,CFA Franc BEAC,950,Yes,Malabo,AF,.gq,"es-GQ,fr",2309096,H4
72 | Eritrea,Eritrea,Érythrée,ER,ERI,232,ERI,ea,,ER,291,ERI,ER,77,ERI,ERN,ERITREA,2,Nakfa,232,Yes,Asmara,AF,.er,"aa-ER,ar,tig,kun,ti-ER",338010,1J
73 | Estonia,Estonia,Estonie,EE,EST,233,EST,er,EO,EST,372,EST,EN,78,EST,EUR,ESTONIA,2,Euro,978,Yes,Tallinn,EU,.ee,"et,ru",453733,1H
74 | Ethiopia,Ethiopia,Éthiopie,ET,ETH,231,ETH,et,ET,ETH,251,ETH,ET,79,ETH,ETB,ETHIOPIA,2,Ethiopian Birr,230,Yes,Addis Ababa,AF,.et,"am,en-ET,om-ET,ti-ET,so-ET,sid",337996,H5
75 | Falkland Islands,Falkland Islands (Malvinas),Îles Falkland (Malvinas),FK,FLK,238,FLK,fk,FK,,500,FLK,FK,81,FLK,FKP,FALKLAND ISLANDS (MALVINAS),2,Falkland Islands Pound,238,Territory of GB,Stanley,SA,.fk,en-FK,3474414,H7
76 | Faroe Islands,Faeroe Islands,Îles Féroé,FO,FRO,234,FRO,fa,FA,FO,298,FRO,FO,82,FAR,,,,,,Part of DK,Torshavn,EU,.fo,"fo,da-FO",2622320,
77 | Fiji,Fiji,Fidji,FJ,FJI,242,FJI,fj,FJ,FJI,679,FIJ,FJ,83,FIJ,FJD,FIJI,2,Fiji Dollar,242,Yes,Suva,OC,.fj,"en-FJ,fj",2205218,H8
78 | Finland,Finland,Finlande,FI,FIN,246,FIN,fi,FI,FIN,358,FIN,FI,84,FIN,EUR,FINLAND,2,Euro,978,Yes,Helsinki,EU,.fi,"fi-FI,sv-FI,smn",660013,H9
79 | France,France,France,FR,FRA,250,F,fr,FR,F,33,FRA,FR,85,FRA,EUR,FRANCE,2,Euro,978,Yes,Paris,EU,.fr,"fr-FR,frp,br,co,ca,eu,oc",3017382,I0
80 | French Guiana,French Guiana,Guyane française,GF,GUF,254,GUF,fg,FG,F,594,GUF,FG,86,FGU,EUR,FRENCH GUIANA,2,Euro,978,Part of FR,Cayenne,SA,.gf,fr-GF,3381670,I3
81 | French Polynesia,French Polynesia,Polynésie française,PF,PYF,258,OCE,fp,PF,F,689,TAH,FP,87,FPO,XPF,FRENCH POLYNESIA,0,CFP Franc,953,Territory of FR,Papeete,OC,.pf,"fr-PF,ty",4030656,I4
82 | French Southern Territories,,,TF,ATF,260,,fs,,F,262,,FS,88,,,,,,,Territory of FR,Port-aux-Francais,AN,.tf,fr,1546748,
83 | Gabon,Gabon,Gabon,GA,GAB,266,GAB,go,GO,G,241,GAB,GB,89,GAB,XAF,GABON,0,CFA Franc BEAC,950,Yes,Libreville,AF,.ga,fr-GA,2400553,I5
84 | Gambia,Gambia,Gambie,GM,GMB,270,GMB,gm,GB,WAG,220,GAM,GA,90,GAM,GMD,GAMBIA,2,Dalasi,270,Yes,Banjul,AF,.gm,"en-GM,mnk,wof,wo,ff",2413451,I6
85 | Georgia,Georgia,Géorgie,GE,GEO,268,GEO,gs,GG,GE,995,GEO,GG,92,GEO,GEL,GEORGIA,2,Lari,981,Yes,Tbilisi,AS,.ge,"ka,ru,hy,az",614540,2Q
86 | Germany,Germany,Allemagne,DE,DEU,276,D,gw,DL,D,49,GER,GM,93,GER,EUR,GERMANY,2,Euro,978,Yes,Berlin,EU,.de,de,2921044,2M
87 | Ghana,Ghana,Ghana,GH,GHA,288,GHA,gh,GH,GH,233,GHA,GH,94,GHA,GHS,GHANA,2,Ghana Cedi,936,Yes,Accra,AF,.gh,"en-GH,ak,ee,tw",2300660,J0
88 | Gibraltar,Gibraltar,Gibraltar,GI,GIB,292,GIB,gi,GI,GBZ,350,GBZ,GI,95,GIB,GIP,GIBRALTAR,2,Gibraltar Pound,292,Territory of GB,Gibraltar,EU,.gi,"en-GI,es,it,pt",2411586,J1
89 | Greece,Greece,Grèce,GR,GRC,300,GRC,gr,GR,GR,30,GRE,GR,97,GRE,EUR,GREECE,2,Euro,978,Yes,Athens,EU,.gr,"el-GR,en,fr",390903,J3
90 | Greenland,Greenland,Groenland,GL,GRL,304,GRL,gl,GL,DK,299,GRL,GL,98,GRL,DKK,GREENLAND,2,Danish Krone,208,Part of DK,Nuuk,NA,.gl,"kl,da-GL,en",3425505,J4
91 | Grenada,Grenada,Grenade,GD,GRD,308,GRD,gd,GD,WG,1-473,GRN,GJ,99,GRN,XCD,GRENADA,2,East Caribbean Dollar,951,Yes,St. George's,NA,.gd,en-GD,3580239,J5
92 | Guadeloupe,Guadeloupe,Guadeloupe,GP,GLP,312,GDL,gp,MF,F,590,GLP,GP,100,GUD,EUR,GUADELOUPE,2,Euro,978,Part of FR,Basse-Terre,NA,.gp,fr-GP,3579143,J6
93 | Guam,Guam,Guam,GU,GUM,316,GUM,gu,GM,USA,1-671,GUM,GQ,101,GUM,USD,GUAM,2,US Dollar,840,Territory of US,Hagatna,OC,.gu,"en-GU,ch-GU",4043988,GU
94 | Guatemala,Guatemala,Guatemala,GT,GTM,320,GTM,gt,GU,GCA,502,GUA,GT,103,GUA,GTQ,GUATEMALA,2,Quetzal,320,Yes,Guatemala City,NA,.gt,es-GT,3595528,J8
95 | Guernsey,Guernsey,Guernesey,GG,GGY,831,,uik,,GBG,44,GBG,GK,104,,GBP,GUERNSEY,2,Pound Sterling,826,Crown dependency of GB,St Peter Port,EU,.gg,"en,fr",3042362,Y7
96 | Guinea,Guinea,Guinée,GN,GIN,324,GUI,gv,GN,RG,224,GUI,GV,106,GUI,GNF,GUINEA,0,Guinea Franc,324,Yes,Conakry,AF,.gn,fr-GN,2420477,J9
97 | Guinea-Bissau,Guinea-Bissau,Guinée-Bissau,GW,GNB,624,GNB,pg,GW,GW,245,GNB,PU,105,GBS,XOF,GUINEA-BISSAU,0,CFA Franc BCEAO,952,Yes,Bissau,AF,.gw,"pt-GW,pov",2372248,S0
98 | Guyana,Guyana,Guyana,GY,GUY,328,GUY,gy,GY,GUY,592,GUY,GY,107,GUY,GYD,GUYANA,2,Guyana Dollar,328,Yes,Georgetown,SA,.gy,en-GY,3378535,K0
99 | Haiti,Haiti,Haïti,HT,HTI,332,HTI,ht,HA,RH,509,HAI,HA,108,HAI,USD,HAITI,2,US Dollar,840,Yes,Port-au-Prince,NA,.ht,"ht,fr-HT",3723988,K1
100 | Heard & McDonald Islands,,,HM,HMD,334,,hm,,AUS,672,,HM,109,,,,,,,Territory of AU,,AN,.hm,,1547314,
101 | Honduras,Honduras,Honduras,HN,HND,340,HND,ho,HO,,504,HON,HO,111,HON,HNL,HONDURAS,2,Lempira,340,Yes,Tegucigalpa,NA,.hn,es-HN,3608932,K2
102 | Hong Kong,"China, Hong Kong Special Administrative Region","Chine, région administrative spéciale de Hong Kong",HK,HKG,344,HKG,,HK,HK,852,HKG,HK,33364,HKG,,,,,,Part of CN,Hong Kong,AS,.hk,"zh-HK,yue,zh,en",1819730,
103 | Hungary,Hungary,Hongrie,HU,HUN,348,HNG,hu,HU,H,36,HUN,HU,113,HUN,HUF,HUNGARY,2,Forint,348,Yes,Budapest,EU,.hu,hu-HU,719819,K5
104 | Iceland,Iceland,Islande,IS,ISL,352,ISL,ic,IL,IS,354,ISL,IC,114,ISL,ISK,ICELAND,0,Iceland Krona,352,Yes,Reykjavik,EU,.is,"is,en,de,da,sv,no",2629691,K6
105 | India,India,Inde,IN,IND,356,IND,ii,IN,IND,91,IND,IN,115,IND,INR,INDIA,2,Indian Rupee,356,Yes,New Delhi,AS,.in,"en-IN,hi,bn,te,mr,ta,ur,gu,kn,ml,or,pa,as,bh,sat,ks,ne,sd,kok,doi,mni,sit,sa,fr,lus,inc",1269750,K7
106 | Indonesia,Indonesia,Indonésie,ID,IDN,360,INS,io,ID,RI,62,IDN,ID,116,INA,IDR,INDONESIA,2,Rupiah,360,Yes,Jakarta,AS,.id,"id,en,nl,jv",1643084,K8
107 | Iran,Iran (Islamic Republic of),Iran (République islamique d'),IR,IRN,364,IRN,ir,IR,IR,98,IRN,IR,117,IRI,IRR,"IRAN, ISLAMIC REPUBLIC OF",2,Iranian Rial,364,Yes,Tehran,AS,.ir,"fa-IR,ku",130758,K9
108 | Iraq,Iraq,Iraq,IQ,IRQ,368,IRQ,iq,IQ,IRQ,964,IRQ,IZ,118,IRQ,IQD,IRAQ,3,Iraqi Dinar,368,Yes,Baghdad,AS,.iq,"ar-IQ,ku,hy",99237,L0
109 | Ireland,Ireland,Irlande,IE,IRL,372,IRL,ie,IE,IRL,353,IRL,EI,119,IRL,EUR,IRELAND,2,Euro,978,Yes,Dublin,EU,.ie,"en-IE,ga-IE",2963597,L2
110 | Isle of Man,Isle of Man,Île de Man,IM,IMN,833,,uik,,GBM,44,GBM,IM,120,,GBP,ISLE OF MAN,2,Pound Sterling,826,Crown dependency of GB,Douglas,EU,.im,"en,gv",3042225,Y8
111 | Israel,Israel,Israël,IL,ISR,376,ISR,is,IS,IL,972,ISR,IS,121,ISR,ILS,ISRAEL,2,New Israeli Sheqel,376,Yes,Jerusalem,AS,.il,"he,ar-IL,en-IL,",294640,L3
112 | Italy,Italy,Italie,IT,ITA,380,I,it,IY,I,39,ITA,IT,122,ITA,EUR,ITALY,2,Euro,978,Yes,Rome,EU,.it,"it-IT,de-IT,fr-IT,sc,ca,co,sl",3175395,L6
113 | Jamaica,Jamaica,Jamaïque,JM,JAM,388,JMC,jm,JM,JA,1-876,JAM,JM,123,JAM,JMD,JAMAICA,2,Jamaican Dollar,388,Yes,Kingston,NA,.jm,en-JM,3489940,L8
114 | Japan,Japan,Japon,JP,JPN,392,J,ja,JP,J,81,JPN,JA,126,JPN,JPY,JAPAN,0,Yen,392,Yes,Tokyo,AS,.jp,ja,1861060,M0
115 | Jersey,Jersey,Jersey,JE,JEY,832,,uik,,GBJ,44,GBJ,JE,128,,GBP,JERSEY,2,Pound Sterling,826,Crown dependency of GB,Saint Helier,EU,.je,"en,pt",3042142,Y9
116 | Jordan,Jordan,Jordanie,JO,JOR,400,JOR,jo,JD,HKJ,962,JOR,JO,130,JOR,JOD,JORDAN,3,Jordanian Dinar,400,Yes,Amman,AS,.jo,"ar-JO,en",248816,M2
117 | Kazakhstan,Kazakhstan,Kazakhstan,KZ,KAZ,398,KAZ,kz,KZ,KZ,7,KAZ,KZ,132,KAZ,KZT,KAZAKHSTAN,2,Tenge,398,Yes,Astana,AS,.kz,"kk,ru",1522867,
118 | Kenya,Kenya,Kenya,KE,KEN,404,KEN,ke,KN,EAK,254,KEN,KE,133,KEN,KES,KENYA,2,Kenyan Shilling,404,Yes,Nairobi,AF,.ke,"en-KE,sw-KE",192950,M3
119 | Kiribati,Kiribati,Kiribati,KI,KIR,296,KIR,gb,KB,,686,KIR,KR,135,KIR,AUD,KIRIBATI,2,Australian Dollar,36,Yes,Tarawa,OC,.ki,"en-KI,gil",4030945,J2
120 | Kuwait,Kuwait,Koweït,KW,KWT,414,KWT,ku,KW,KWT,965,KUW,KU,137,KUW,KWD,KUWAIT,3,Kuwaiti Dinar,414,Yes,Kuwait City,AS,.kw,"ar-KW,en",285570,M6
121 | Kyrgyzstan,Kyrgyzstan,Kirghizistan,KG,KGZ,417,KGZ,kg,KG,KS,996,KGZ,KG,138,KGZ,KGS,KYRGYZSTAN,2,Som,417,Yes,Bishkek,AS,.kg,"ky,uz,ru",1527747,1N
122 | Laos,Lao People's Democratic Republic,République démocratique populaire lao,LA,LAO,418,LAO,ls,LA,LAO,856,LAO,LA,139,LAO,LAK,LAO PEOPLE’S DEMOCRATIC REPUBLIC,2,Kip,418,Yes,Vientiane,AS,.la,"lo,fr,en",1655842,
123 | Latvia,Latvia,Lettonie,LV,LVA,428,LVA,lv,LV,LV,371,LVA,LG,140,LAT,EUR,LATVIA,2,Euro,978,Yes,Riga,EU,.lv,"lv,ru,lt",458258,1R
124 | Lebanon,Lebanon,Liban,LB,LBN,422,LBN,le,LB,RL,961,LIB,LE,141,LIB,LBP,LEBANON,2,Lebanese Pound,422,Yes,Beirut,AS,.lb,"ar-LB,fr-LB,en,hy",272103,M8
125 | Lesotho,Lesotho,Lesotho,LS,LSO,426,LSO,lo,LS,LS,266,LES,LT,142,LES,ZAR,LESOTHO,2,Rand,710,Yes,Maseru,AF,.ls,"en-LS,st,zu,xh",932692,M9
126 | Liberia,Liberia,Libéria,LR,LBR,430,LBR,lb,LI,LB,231,LBR,LI,144,LBR,LRD,LIBERIA,2,Liberian Dollar,430,Yes,Monrovia,AF,.lr,en-LR,2275384,N0
127 | Libya,Libya,Libye,LY,LBY,434,LBY,ly,LY,LAR,218,LBY,LY,145,LBA,LYD,LIBYA,3,Libyan Dinar,434,Yes,Tripoli,AF,.ly,"ar-LY,it,en",2215636,
128 | Liechtenstein,Liechtenstein,Liechtenstein,LI,LIE,438,LIE,lh,,FL,423,LIE,LS,146,LIE,CHF,LIECHTENSTEIN,2,Swiss Franc,756,Yes,Vaduz,EU,.li,de-LI,3042058,N2
129 | Lithuania,Lithuania,Lituanie,LT,LTU,440,LTU,li,LT,LT,370,LTU,LH,147,LTU,EUR,LITHUANIA,2,Euro,978,Yes,Vilnius,EU,.lt,"lt,ru,pl",597427,1Q
130 | Luxembourg,Luxembourg,Luxembourg,LU,LUX,442,LUX,lu,BX,L,352,LUX,LU,148,LUX,EUR,LUXEMBOURG,2,Euro,978,Yes,Luxembourg,EU,.lu,"lb,de-LU,fr-LU",2960313,N4
131 | Macau,"China, Macao Special Administrative Region","Chine, région administrative spéciale de Macao",MO,MAC,446,MAC,,MU,MO,853,MAC,MC,149,MAC,MOP,MACAO,2,Pataca,446,Part of CN,Macao,AS,.mo,"zh,zh-MO,pt",1821275,
132 | Macedonia,The former Yugoslav Republic of Macedonia,Ex-République yougoslave de Macédoine,MK,MKD,807,MKD,xn,MJ,MK,389,MKD,MK,241,MKD,MKD,"MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF",2,Denar,807,Yes,Skopje,EU,.mk,"mk,sq,tr,rmm,sr",718075,1U
133 | Madagascar,Madagascar,Madagascar,MG,MDG,450,MDG,mg,MG,RM,261,MAD,MA,150,MAD,MGA,MADAGASCAR,2,Malagasy Ariary,969,Yes,Antananarivo,AF,.mg,"fr-MG,mg",1062947,N6
134 | Malawi,Malawi,Malawi,MW,MWI,454,MWI,mw,MW,MW,265,MWI,MI,152,MAW,MWK,MALAWI,2,Kwacha,454,Yes,Lilongwe,AF,.mw,"ny,yao,tum,swk",927384,N7
135 | Malaysia,Malaysia,Malaisie,MY,MYS,458,MLA,my,MS,MAL,60,MAS,MY,153,MAS,MYR,MALAYSIA,2,Malaysian Ringgit,458,Yes,Kuala Lumpur,AS,.my,"ms-MY,en,zh,ta,te,ml,pa,th",1733045,N8
136 | Maldives,Maldives,Maldives,MV,MDV,462,MLD,xc,MV,MV,960,MDV,MV,154,MDV,MVR,MALDIVES,2,Rufiyaa,462,Yes,Male,AS,.mv,"dv,en",1282028,N9
137 | Mali,Mali,Mali,ML,MLI,466,MLI,ml,MI,RMM,223,MLI,ML,155,MLI,XOF,MALI,0,CFA Franc BCEAO,952,Yes,Bamako,AF,.ml,"fr-ML,bm",2453866,O0
138 | Malta,Malta,Malte,MT,MLT,470,MLT,mm,ML,M,356,MLT,MT,156,MLT,EUR,MALTA,2,Euro,978,Yes,Valletta,EU,.mt,"mt,en-MT",2562770,O1
139 | Marshall Islands,Marshall Islands,Îles Marshall,MH,MHL,584,MHL,xe,MH,,692,MHL,RM,157,MSH,USD,MARSHALL ISLANDS,2,US Dollar,840,Yes,Majuro,OC,.mh,"mh,en-MH",2080185,1T
140 | Martinique,Martinique,Martinique,MQ,MTQ,474,MRT,mq,MR,F,596,MTQ,MB,158,MRT,EUR,MARTINIQUE,2,Euro,978,Part of FR,Fort-de-France,NA,.mq,fr-MQ,3570311,O2
141 | Mauritania,Mauritania,Mauritanie,MR,MRT,478,MTN,mu,MT,RIM,222,MTN,MR,159,MTN,MRO,MAURITANIA,2,Ouguiya,478,Yes,Nouakchott,AF,.mr,"ar-MR,fuc,snk,fr,mey,wo",2378080,O3
142 | Mauritius,Mauritius,Maurice,MU,MUS,480,MAU,mf,MA,MS,230,MRI,MP,160,MRI,MUR,MAURITIUS,2,Mauritius Rupee,480,Yes,Port Louis,AF,.mu,"en-MU,bho,fr",934292,O4
143 | Mayotte,Mayotte,Mayotte,YT,MYT,175,MYT,ot,,,262,MYT,MF,161,MAY,EUR,MAYOTTE,2,Euro,978,Part of FR,Mamoudzou,AF,.yt,fr-YT,1024031,2P
144 | Mexico,Mexico,Mexique,MX,MEX,484,MEX,mx,MX,MEX,52,MEX,MX,162,MEX,MXN,MEXICO,2,Mexican Peso,484,Yes,Mexico City,NA,.mx,es-MX,3996063,O5
145 | Micronesia,Micronesia (Federated States of),Micronésie (États fédérés de),FM,FSM,583,FSM,fm,,,691,FSM,FM,163,FSM,USD,"MICRONESIA, FEDERATED STATES OF",2,US Dollar,840,Yes,Palikir,OC,.fm,"en-FM,chk,pon,yap,kos,uli,woe,nkr,kpg",2081918,1K
146 | Moldova,Republic of Moldova,République de Moldova,MD,MDA,498,MDA,mv,RM,MD,373,MDA,MD,165,MDA,MDL,"MOLDOVA, REPUBLIC OF",2,Moldovan Leu,498,Yes,Chisinau,EU,.md,"ro,ru,gag,tr",617790,1S
147 | Monaco,Monaco,Monaco,MC,MCO,492,MCO,mc,,MC,377,MON,MN,166,MON,EUR,MONACO,2,Euro,978,Yes,Monaco,EU,.mc,"fr-MC,en,it",2993457,O9
148 | Mongolia,Mongolia,Mongolie,MN,MNG,496,MNG,mp,MO,MGL,976,MNG,MG,167,MGL,MNT,MONGOLIA,2,Tugrik,496,Yes,Ulan Bator,AS,.mn,"mn,ru",2029969,P0
149 | Montenegro,Montenegro,Monténégro,ME,MNE,499,MNE,mo,,MNE,382,MNE,MJ,2647,MGO,EUR,MONTENEGRO,2,Euro,978,Yes,Podgorica,EU,.me,"sr,hu,bs,sq,hr,rom",3194884,Z5
150 | Montserrat,Montserrat,Montserrat,MS,MSR,500,MSR,mj,,,1-664,MSR,MH,168,MNT,XCD,MONTSERRAT,2,East Caribbean Dollar,951,Territory of GB,Plymouth,NA,.ms,en-MS,3578097,P1
151 | Morocco,Morocco,Maroc,MA,MAR,504,MRC,mr,MC,MA,212,MAR,MO,169,MAR,MAD,MOROCCO,2,Moroccan Dirham,504,Yes,Rabat,AF,.ma,"ar-MA,ber,fr",2542007,P2
152 | Mozambique,Mozambique,Mozambique,MZ,MOZ,508,MOZ,mz,MZ,MOC,258,MOZ,MZ,170,MOZ,MZN,MOZAMBIQUE,2,Mozambique Metical,943,Yes,Maputo,AF,.mz,"pt-MZ,vmw",1036973,P3
153 | Myanmar,Myanmar,Myanmar,MM,MMR,104,MYA,br,BM,BUR,95,MYA,BM,171,MYA,MMK,MYANMAR,2,Kyat,104,Yes,Nay Pyi Taw,AS,.mm,my,1327865,E1
154 | Namibia,Namibia,Namibie,NA,NAM,516,NMB,sx,NM,NAM,264,NAM,WA,172,NAM,ZAR,NAMIBIA,2,Rand,710,Yes,Windhoek,AF,.na,"en-NA,af,de,hz,naq",3355338,T6
155 | Nauru,Nauru,Nauru,NR,NRU,520,NRU,nu,NW,NAU,674,NRU,NR,173,NRU,AUD,NAURU,2,Australian Dollar,36,Yes,Yaren,OC,.nr,"na,en-NR",2110425,P5
156 | Nepal,Nepal,Népal,NP,NPL,524,NPL,np,NP,NEP,977,NEP,NP,175,NEP,NPR,NEPAL,2,Nepalese Rupee,524,Yes,Kathmandu,AS,.np,"ne,en",1282988,P6
157 | Netherlands,Netherlands,Pays-Bas,NL,NLD,528,HOL,ne,NL,NL,31,NED,NL,177,NED,EUR,NETHERLANDS,2,Euro,978,Yes,Amsterdam,EU,.nl,"nl-NL,fy-NL",2750405,P7
158 | New Caledonia,New Caledonia,Nouvelle-Calédonie,NC,NCL,540,NCL,nl,NC,F,687,NCL,NC,178,NCD,XPF,NEW CALEDONIA,0,CFP Franc,953,Territory of FR,Noumea,OC,.nc,fr-NC,2139685,1W
159 | New Zealand,New Zealand,Nouvelle-Zélande,NZ,NZL,554,NZL,nz,NZ,NZ,64,NZL,NZ,179,NZL,NZD,NEW ZEALAND,2,New Zealand Dollar,554,Yes,Wellington,OC,.nz,"en-NZ,mi",2186224,Q2
160 | Nicaragua,Nicaragua,Nicaragua,NI,NIC,558,NCG,nq,NK,NIC,505,NCA,NU,180,NCA,NIO,NICARAGUA,2,Cordoba Oro,558,Yes,Managua,NA,.ni,"es-NI,en",3617476,Q3
161 | Niger,Niger,Niger,NE,NER,562,NGR,ng,NR,RN,227,NIG,NG,181,NIG,XOF,NIGER,0,CFA Franc BCEAO,952,Yes,Niamey,AF,.ne,"fr-NE,ha,kr,dje",2440476,Q4
162 | Nigeria,Nigeria,Nigéria,NG,NGA,566,NIG,nr,NI,WAN,234,NGA,NI,182,NGR,NGN,NIGERIA,2,Naira,566,Yes,Abuja,AF,.ng,"en-NG,ha,yo,ig,ff",2328926,Q5
163 | Niue,Niue,Nioué,NU,NIU,570,NIU,xh,,NZ,683,NIU,NE,183,NIU,NZD,NIUE,2,New Zealand Dollar,554,Associated with NZ,Alofi,OC,.nu,"niu,en-NU",4036232,Q6
164 | Norfolk Island,Norfolk Island,Île Norfolk,NF,NFK,574,NFK,nx,NF,AUS,672,NFK,NF,184,NFI,AUD,NORFOLK ISLAND,2,Australian Dollar,36,Territory of AU,Kingston,OC,.nf,en-NF,2155115,Q7
165 | North Korea,Democratic People's Republic of Korea,République populaire démocratique de Corée,KP,PRK,408,KRE,kn,KR,,850,PRK,KN,67,PRK,KPW,"KOREA, DEMOCRATIC PEOPLE’S REPUBLIC OF",2,North Korean Won,408,Yes,Pyongyang,AS,.kp,ko-KP,1873107,
166 | Northern Mariana Islands,Northern Mariana Islands,Îles Mariannes septentrionales,MP,MNP,580,MRA,nw,MY,USA,1-670,NMI,CQ,185,NMA,USD,NORTHERN MARIANA ISLANDS,2,US Dollar,840,Commonwealth of US,Saipan,OC,.mp,"fil,tl,zh,ch-MP,en-MP",4041468,1V
167 | Norway,Norway,Norvège,NO,NOR,578,NOR,no,NO,N,47,NOR,NO,186,NOR,NOK,NORWAY,2,Norwegian Krone,578,Yes,Oslo,EU,.no,"no,nb,nn,se,fi",3144096,Q8
168 | Oman,Oman,Oman,OM,OMN,512,OMA,mk,OM,,968,OMA,MU,187,OMA,OMR,OMAN,3,Rial Omani,512,Yes,Muscat,AS,.om,"ar-OM,en,bal,ur",286963,P4
169 | Pakistan,Pakistan,Pakistan,PK,PAK,586,PAK,pk,PK,PK,92,PAK,PK,188,PAK,PKR,PAKISTAN,2,Pakistan Rupee,586,Yes,Islamabad,AS,.pk,"ur-PK,en-PK,pa,sd,ps,brh",1168579,R0
170 | Palau,Palau,Palaos,PW,PLW,585,PLW,pw,,,680,PLW,PS,189,PLW,USD,PALAU,2,US Dollar,840,Yes,Melekeok,OC,.pw,"pau,sov,en-PW,tox,ja,fil,zh",1559582,1Y
171 | Palestine,State of Palestine,État de Palestine,PS,PSE,275,,"gz,wj",,,970,PLE,"GZ,WE","91,267",PLE,,"PALESTINE, STATE OF",,No universal currency,,In contention,East Jerusalem,AS,.ps,ar-PS,6254930,
172 | Panama,Panama,Panama,PA,PAN,591,PNR,pn,PM,PA,507,PAN,PM,191,PAN,USD,PANAMA,2,US Dollar,840,Yes,Panama City,NA,.pa,"es-PA,en",3703430,R1
173 | Papua New Guinea,Papua New Guinea,Papouasie-Nouvelle-Guinée,PG,PNG,598,PNG,pp,NG,PNG,675,PNG,PP,192,PNG,PGK,PAPUA NEW GUINEA,2,Kina,598,Yes,Port Moresby,OC,.pg,"en-PG,ho,meu,tpi",2088628,R2
174 | Paraguay,Paraguay,Paraguay,PY,PRY,600,PRG,py,PY,PY,595,PAR,PA,194,PAR,PYG,PARAGUAY,0,Guarani,600,Yes,Asuncion,SA,.py,"es-PY,gn",3437598,R4
175 | Peru,Peru,Pérou,PE,PER,604,PRU,pe,PR,PE,51,PER,PE,195,PER,PEN,PERU,2,Nuevo Sol,604,Yes,Lima,SA,.pe,"es-PE,qu,ay",3932488,R5
176 | Philippines,Philippines,Philippines,PH,PHL,608,PHL,ph,PH,RP,63,PHI,RP,196,PHI,PHP,PHILIPPINES,2,Philippine Peso,608,Yes,Manila,AS,.ph,"tl,en-PH,fil",1694008,R6
177 | Pitcairn Islands,Pitcairn,Pitcairn,PN,PCN,612,PTC,pc,PT,,870,PCN,PC,197,,NZD,PITCAIRN,2,New Zealand Dollar,554,Territory of GB,Adamstown,OC,.pn,en-PN,4030699,R8
178 | Poland,Poland,Pologne,PL,POL,616,POL,pl,PL,PL,48,POL,PL,198,POL,PLN,POLAND,2,Zloty,985,Yes,Warsaw,EU,.pl,pl,798544,R9
179 | Portugal,Portugal,Portugal,PT,PRT,620,POR,po,PO,P,351,POR,PO,199,POR,EUR,PORTUGAL,2,Euro,978,Yes,Lisbon,EU,.pt,"pt-PT,mwl",2264397,S1
180 | Puerto Rico,Puerto Rico,Porto Rico,PR,PRI,630,PTR,pr,PU,USA,1,PUR,RQ,200,PUR,USD,PUERTO RICO,2,US Dollar,840,Commonwealth of US,San Juan,NA,.pr,"en-PR,es-PR",4566966,PR
181 | Qatar,Qatar,Qatar,QA,QAT,634,QAT,qa,QT,Q,974,QAT,QA,201,QAT,QAR,QATAR,2,Qatari Rial,634,Yes,Doha,AS,.qa,"ar-QA,es",289688,S3
182 | Romania,Romania,Roumanie,RO,ROU,642,ROU,rm,RO,RO,40,ROU,RO,203,ROU,RON,ROMANIA,2,New Romanian Leu,946,Yes,Bucharest,EU,.ro,"ro,hu,rom",798549,S5
183 | Russia,Russian Federation,Fédération de Russie,RU,RUS,643,RUS,ru,RS,RUS,7,RUS,RS,204,RUS,RUB,RUSSIAN FEDERATION,2,Russian Ruble,643,Yes,Moscow,EU,.ru,"ru,tt,xal,cau,ady,kv,ce,tyv,cv,udm,tut,mns,bua,myv,mdf,chm,ba,inh,tut,kbd,krc,ava,sah,nog",2017370,1Z
184 | Rwanda,Rwanda,Rwanda,RW,RWA,646,RRW,rw,RW,RWA,250,RWA,RW,205,RWA,RWF,RWANDA,0,Rwanda Franc,646,Yes,Kigali,AF,.rw,"rw,en-RW,fr-RW,sw",49518,S6
185 | Réunion,Réunion,Réunion,RE,REU,638,REU,re,RE,F,262,REU,RE,206,REU,EUR,RÉUNION,2,Euro,978,Part of FR,Saint-Denis,AF,.re,fr-RE,935317,
186 | Samoa,Samoa,Samoa,WS,WSM,882,SMO,ws,ZM,WS,685,SAM,WS,212,SAM,WST,SAMOA,2,Tala,882,Yes,Apia,OC,.ws,"sm,en-WS",4034894,Y0
187 | San Marino,San Marino,Saint-Marin,SM,SMR,674,SMR,sm,,RSM,378,SMR,SM,213,SMR,EUR,SAN MARINO,2,Euro,978,Yes,San Marino,EU,.sm,it-SM,3168068,S8
188 | Saudi Arabia,Saudi Arabia,Arabie saoudite,SA,SAU,682,ARS,su,SD,SA,966,KSA,SA,215,KSA,SAR,SAUDI ARABIA,2,Saudi Riyal,682,Yes,Riyadh,AS,.sa,ar-SA,102358,T0
189 | Senegal,Senegal,Sénégal,SN,SEN,686,SEN,sg,SG,SN,221,SEN,SG,217,SEN,XOF,SENEGAL,0,CFA Franc BCEAO,952,Yes,Dakar,AF,.sn,"fr-SN,wo,fuc,mnk",2245662,T1
190 | Serbia,Serbia,Serbie,RS,SRB,688,SRB,rb,YG,SRB,381 p,SRB,"RI,KV",2648,SRB,RSD,SERBIA,2,Serbian Dinar,941,Yes,Belgrade,EU,.rs,"sr,hu,bs,rom",6290252,Z2
191 | Seychelles,Seychelles,Seychelles,SC,SYC,690,SEY,se,SC,SY,248,SEY,SE,220,SEY,SCR,SEYCHELLES,2,Seychelles Rupee,690,Yes,Victoria,AF,.sc,"en-SC,fr-SC",241170,T2
192 | Sierra Leone,Sierra Leone,Sierra Leone,SL,SLE,694,SRL,sl,SL,WAL,232,SLE,SL,221,SLE,SLL,SIERRA LEONE,2,Leone,694,Yes,Freetown,AF,.sl,"en-SL,men,tem",2403846,T8
193 | Singapore,Singapore,Singapour,SG,SGP,702,SNG,si,SR,SGP,65,SIN,SN,222,SIN,SGD,SINGAPORE,2,Singapore Dollar,702,Yes,Singapore,AS,.sg,"cmn,en-SG,ms-SG,ta-SG,zh-SG",1880251,U0
194 | Sint Maarten,Sint Maarten (Dutch part),Saint-Martin (partie néerlandaise),SX,SXM,534,,sn,,,1-721,,NN,,,ANG,SINT MAARTEN (DUTCH PART),2,Netherlands Antillean Guilder,532,Part of NL,Philipsburg,NA,.sx,"nl,en",7609695,
195 | Slovakia,Slovakia,Slovaquie,SK,SVK,703,SVK,xo,SQ,SK,421,SVK,LO,223,SVK,EUR,SLOVAKIA,2,Euro,978,Yes,Bratislava,EU,.sk,"sk,hu",3057568,2B
196 | Slovenia,Slovenia,Slovénie,SI,SVN,705,SVN,xv,LJ,SLO,386,SVN,SI,224,SLO,EUR,SLOVENIA,2,Euro,978,Yes,Ljubljana,EU,.si,"sl,sh",3190538,2A
197 | Solomon Islands,Solomon Islands,Îles Salomon,SB,SLB,90,SLM,bp,SO,SB,677,SOL,BP,225,SOL,SBD,SOLOMON ISLANDS,2,Solomon Islands Dollar,90,Yes,Honiara,OC,.sb,"en-SB,tpi",2103350,D7
198 | Somalia,Somalia,Somalie,SO,SOM,706,SOM,so,SI,SO,252,SOM,SO,226,SOM,SOS,SOMALIA,2,Somali Shilling,706,Yes,Mogadishu,AF,.so,"so-SO,ar-SO,it,en-SO",51537,U1
199 | South Africa,South Africa,Afrique du Sud,ZA,ZAF,710,AFS,sa,ZA,ZA,27,RSA,SF,227,RSA,ZAR,SOUTH AFRICA,2,Rand,710,Yes,Pretoria,AF,.za,"zu,xh,af,nso,en-ZA,tn,st,ts,ss,ve,nr",953987,T3
200 | South Georgia & South Sandwich Islands,,,GS,SGS,239,,xs,,,500,,SX,228,,,,,,,Territory of GB,Grytviken,AN,.gs,en,3474415,
201 | South Korea,Republic of Korea,République de Corée,KR,KOR,410,KOR,ko,KO,ROK,82,KOR,KS,202,KOR,KRW,"KOREA, REPUBLIC OF",0,Won,410,Yes,Seoul,AS,.kr,"ko-KR,en",1835841,M5
202 | South Sudan,South Sudan,Soudan du Sud,SS,SSD,728,SSD,sd,,,211,,OD,,,SSP,SOUTH SUDAN,2,South Sudanese Pound,728,Yes,Juba,AF,,en,7909807,
203 | Spain,Spain,Espagne,ES,ESP,724,E,sp,SP,E,34,ESP,SP,229,ESP,EUR,SPAIN,2,Euro,978,Yes,Madrid,EU,.es,"es-ES,ca,gl,eu,oc",2510769,U3
204 | Sri Lanka,Sri Lanka,Sri Lanka,LK,LKA,144,CLN,ce,SB,CL,94,SRI,CE,231,SRI,LKR,SRI LANKA,2,Sri Lanka Rupee,144,Yes,Colombo,AS,.lk,"si,ta,en",1227603,F1
205 | St. Barthélemy,Saint Barthélemy,Saint-Barthélemy,BL,BLM,652,,sc,,,590,,TB,,,EUR,SAINT BARTHÉLEMY,2,Euro,978,Part of FR,Gustavia,NA,.gp,fr,3578476,
206 | St. Helena,Saint Helena,Sainte-Hélène,SH,SHN,654,SHN,xj,HE,SH,290 n,SHN,SH,207,HEL,SHP,"SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA",2,Saint Helena Pound,654,Territory of GB,Jamestown,AF,.sh,en-SH,3370751,
207 | St. Kitts & Nevis,Saint Kitts and Nevis,Saint-Kitts-et-Nevis,KN,KNA,659,KNA,xd,AT,KN,1-869,SKN,SC,208,SKN,XCD,SAINT KITTS AND NEVIS,2,East Caribbean Dollar,951,Yes,Basseterre,NA,.kn,en-KN,3575174,U7
208 | St. Lucia,Saint Lucia,Sainte-Lucie,LC,LCA,662,LCA,xk,LC,WL,1-758,LCA,ST,209,LCA,XCD,SAINT LUCIA,2,East Caribbean Dollar,951,Yes,Castries,NA,.lc,en-LC,3576468,U9
209 | St. Martin,Saint Martin (French part),Saint-Martin (partie française),MF,MAF,663,,st,,,590,,RN,,,EUR,SAINT MARTIN (FRENCH PART),2,Euro,978,Part of FR,Marigot,NA,.gp,fr,3578421,
210 | St. Pierre & Miquelon,Saint Pierre and Miquelon,Saint-Pierre-et-Miquelon,PM,SPM,666,SPM,xl,FP,F,508,SPM,SB,210,SPM,EUR,SAINT PIERRE AND MIQUELON,2,Euro,978,Part of FR,Saint-Pierre,NA,.pm,fr-PM,3424932,V0
211 | St. Vincent & Grenadines,Saint Vincent and the Grenadines,Saint-Vincent-et-les Grenadines,VC,VCT,670,VCT,xm,VG,WV,1-784,VIN,VC,211,VIN,XCD,SAINT VINCENT AND THE GRENADINES,2,East Caribbean Dollar,951,Yes,Kingstown,NA,.vc,"en-VC,fr",3577815,V1
212 | Sudan,Sudan,Soudan,SD,SDN,729,SDN,sj,SU,SUD,249,SUD,SU,40764,SUD,SDG,SUDAN,2,Sudanese Pound,938,Yes,Khartoum,AF,.sd,"ar-SD,en,fia",366755,V2
213 | Suriname,Suriname,Suriname,SR,SUR,740,SUR,sr,SM,SME,597,SUR,NS,233,SUR,SRD,SURINAME,2,Surinam Dollar,968,Yes,Paramaribo,SA,.sr,"nl-SR,en,srn,hns,jv",3382998,V3
214 | Svalbard & Jan Mayen,Svalbard and Jan Mayen Islands,Îles Svalbard-et-Jan Mayen,SJ,SJM,744,NOR,,SZ,,47,,"SV,JN",234,,NOK,SVALBARD AND JAN MAYEN,2,Norwegian Krone,578,Territory of NO,Longyearbyen,EU,.sj,"no,ru",607072,L9
215 | Swaziland,Swaziland,Swaziland,SZ,SWZ,748,SWZ,sq,SV,SD,268,SWZ,WZ,235,SWZ,SZL,SWAZILAND,2,Lilangeni,748,Yes,Mbabane,AF,.sz,"en-SZ,ss-SZ",934841,V6
216 | Sweden,Sweden,Suède,SE,SWE,752,S,sw,SN,S,46,SWE,SW,236,SWE,SEK,SWEDEN,2,Swedish Krona,752,Yes,Stockholm,EU,.se,"sv-SE,se,sma,fi-SE",2661886,V7
217 | Switzerland,Switzerland,Suisse,CH,CHE,756,SUI,sz,SW,CH,41,SUI,SZ,237,SUI,CHF,SWITZERLAND,2,Swiss Franc,756,Yes,Bern,EU,.ch,"de-CH,fr-CH,it-CH,rm",2658434,V8
218 | Syria,Syrian Arab Republic,République arabe syrienne,SY,SYR,760,SYR,sy,SY,SYR,963,SYR,SY,238,SYR,SYP,SYRIAN ARAB REPUBLIC,2,Syrian Pound,760,Yes,Damascus,AS,.sy,"ar-SY,ku,hy,arc,fr,en",163843,V9
219 | São Tomé & Príncipe,Sao Tome and Principe,Sao Tomé-et-Principe,ST,STP,678,STP,sf,TP,ST,239,STP,TP,214,STP,STD,SAO TOME AND PRINCIPE,2,Dobra,678,Yes,Sao Tome,AF,.st,pt-ST,2410758,S9
220 | Taiwan,,,TW,TWN,158,,ch,,RC,886,TPE,TW,925,TPE,,,,,,Yes,Taipei,AS,.tw,"zh-TW,zh,nan,hak",1668284,
221 | Tajikistan,Tajikistan,Tadjikistan,TJ,TJK,762,TJK,ta,TA,TJ,992,TJK,TI,239,TJK,TJS,TAJIKISTAN,2,Somoni,972,Yes,Dushanbe,AS,.tj,"tg,ru",1220409,2D
222 | Tanzania,United Republic of Tanzania,République-Unie de Tanzanie,TZ,TZA,834,TZA,tz,TN,EAT,255,TAN,TZ,257,TAN,TZS,"TANZANIA, UNITED REPUBLIC OF",2,Tanzanian Shilling,834,Yes,Dodoma,AF,.tz,"sw-TZ,en,ar",149590,W0
223 | Thailand,Thailand,Thaïlande,TH,THA,764,THA,th,TH,T,66,THA,TH,240,THA,THB,THAILAND,2,Baht,764,Yes,Bangkok,AS,.th,"th,en",1605651,W1
224 | Timor-Leste,Timor-Leste,Timor-Leste,TL,TLS,626,TLS,em,TM,RI,670,TLS,TT,242,TLS,USD,TIMOR-LESTE,2,US Dollar,840,Yes,Dili,OC,.tl,"tet,pt-TL,id,en",1966436,Z3
225 | Togo,Togo,Togo,TG,TGO,768,TGO,tg,TG,TG,228,TOG,TO,243,TOG,XOF,TOGO,0,CFA Franc BCEAO,952,Yes,Lome,AF,.tg,"fr-TG,ee,hna,kbp,dag,ha",2363686,W2
226 | Tokelau,Tokelau,Tokelau,TK,TKL,772,TKL,tl,TK,NZ,690,TKL,TL,244,,NZD,TOKELAU,2,New Zealand Dollar,554,Territory of NZ,,OC,.tk,"tkl,en-TK",4031074,W3
227 | Tonga,Tonga,Tonga,TO,TON,776,TON,to,TO,TO,676,TGA,TN,245,TGA,TOP,TONGA,2,Pa’anga,776,Yes,Nuku'alofa,OC,.to,"to,en-TO",4032283,W4
228 | Trinidad & Tobago,Trinidad and Tobago,Trinité-et-Tobago,TT,TTO,780,TRD,tr,TD,TT,1-868,TRI,TD,246,TTO,TTD,TRINIDAD AND TOBAGO,2,Trinidad and Tobago Dollar,780,Yes,Port of Spain,NA,.tt,"en-TT,hns,fr,es,zh",3573591,W5
229 | Tunisia,Tunisia,Tunisie,TN,TUN,788,TUN,ti,TS,TN,216,TUN,TS,248,TUN,TND,TUNISIA,3,Tunisian Dinar,788,Yes,Tunis,AF,.tn,"ar-TN,fr",2464461,W6
230 | Turkey,Turkey,Turquie,TR,TUR,792,TUR,tu,TU,TR,90,TUR,TU,249,TUR,TRY,TURKEY,2,Turkish Lira,949,Yes,Ankara,AS,.tr,"tr-TR,ku,diq,az,av",298795,W8
231 | Turkmenistan,Turkmenistan,Turkménistan,TM,TKM,795,TKM,tk,TR,TM,993,TKM,TX,250,TKM,TMT,TURKMENISTAN,2,Turkmenistan New Manat,934,Yes,Ashgabat,AS,.tm,"tk,ru,uz",1218197,2E
232 | Turks & Caicos Islands,Turks and Caicos Islands,Îles Turques-et-Caïques,TC,TCA,796,TCA,tc,TI,,1-649,TCA,TK,251,TKS,USD,TURKS AND CAICOS ISLANDS,2,US Dollar,840,Territory of GB,Cockburn Town,NA,.tc,en-TC,3576916,W7
233 | Tuvalu,Tuvalu,Tuvalu,TV,TUV,798,TUV,tv,TV,TV,688,TUV,TV,252,TUV,AUD,TUVALU,2,Australian Dollar,36,Yes,Funafuti,OC,.tv,"tvl,en,sm,gil",2110297,2G
234 | U.S. Outlying Islands,,,UM,UMI,581,,"ji,xf,wk,uc,up",,USA,,,"FQ,HQ,DQ,JQ,KQ,MQ,BQ,LQ,WQ",,,,,,,,Territories of US,,OC,.um,en-UM,5854968,
235 | U.S. Virgin Islands,United States Virgin Islands,Îles Vierges américaines,VI,VIR,850,VIR,vi,VI,USA,1-340,VIR,VQ,258,ISV,USD,VIRGIN ISLANDS (U.S.),2,US Dollar,840,Territory of US,Charlotte Amalie,NA,.vi,en-VI,4796775,
236 | UK,United Kingdom of Great Britain and Northern Ireland,Royaume-Uni de Grande-Bretagne et d'Irlande du Nord,GB,GBR,826,G,xxk,UK,GB,44,"ENG,NIR,SCO,WAL",UK,256,GBR,GBP,UNITED KINGDOM,2,Pound Sterling,826,Yes,London,EU,.uk,"en-GB,cy-GB,gd",2635167,X0
237 | US,United States of America,États-Unis d'Amérique,US,USA,840,USA,xxu,US,USA,1,USA,US,259,USA,USD,UNITED STATES,2,US Dollar,840,Yes,Washington,NA,.us,"en-US,es-US,haw,fr",6252001,
238 | Uganda,Uganda,Ouganda,UG,UGA,800,UGA,ug,UG,EAU,256,UGA,UG,253,UGA,UGX,UGANDA,0,Uganda Shilling,800,Yes,Kampala,AF,.ug,"en-UG,lg,sw,ar",226074,W9
239 | Ukraine,Ukraine,Ukraine,UA,UKR,804,UKR,un,UR,UA,380,UKR,UP,254,UKR,UAH,UKRAINE,2,Hryvnia,980,Yes,Kiev,EU,.ua,"uk,ru-UA,rom,pl,hu",690791,2H
240 | United Arab Emirates,United Arab Emirates,Émirats arabes unis,AE,ARE,784,UAE,ts,ER,,971,UAE,AE,255,UAE,AED,UNITED ARAB EMIRATES,2,UAE Dirham,784,Yes,Abu Dhabi,AS,.ae,"ar-AE,fa,en,hi,ur",290557,C0
241 | Uruguay,Uruguay,Uruguay,UY,URY,858,URG,uy,UY,ROU,598,URU,UY,260,URU,UYU,URUGUAY,2,Peso Uruguayo,858,Yes,Montevideo,SA,.uy,es-UY,3439705,X3
242 | Uzbekistan,Uzbekistan,Ouzbékistan,UZ,UZB,860,UZB,uz,UZ,UZ,998,UZB,UZ,261,UZB,UZS,UZBEKISTAN,2,Uzbekistan Sum,860,Yes,Tashkent,AS,.uz,"uz,ru,tg",1512440,2K
243 | Vanuatu,Vanuatu,Vanuatu,VU,VUT,548,VUT,nn,NV,VU,678,VAN,NH,262,VAN,VUV,VANUATU,0,Vatu,548,Yes,Port Vila,OC,.vu,"bi,en-VU,fr-VU",2134431,2L
244 | Vatican City,Holy See,Saint-Siège,VA,VAT,336,CVA,vc,,V,39-06,VAT,VT,110,,EUR,HOLY SEE (VATICAN CITY STATE),2,Euro,978,Yes,Vatican City,EU,.va,"la,it,fr",3164670,X4
245 | Venezuela,Venezuela (Bolivarian Republic of),Venezuela (République bolivarienne du),VE,VEN,862,VEN,ve,VN,YV,58,VEN,VE,263,VEN,VEF,"VENEZUELA, BOLIVARIAN REPUBLIC OF",2,Bolivar,937,Yes,Caracas,SA,.ve,es-VE,3625428,
246 | Vietnam,Viet Nam,Viet Nam,VN,VNM,704,VTN,vm,VS,VN,84,VIE,VM,264,VIE,VND,VIET NAM,0,Dong,704,Yes,Hanoi,AS,.vn,"vi,en,fr,zh,km",1562822,Q1
247 | Wallis & Futuna,Wallis and Futuna Islands,Îles Wallis-et-Futuna,WF,WLF,876,WAL,wf,FW,F,681,WLF,WF,266,WAF,XPF,WALLIS AND FUTUNA,0,CFP Franc,953,Territory of FR,Mata Utu,OC,.wf,"wls,fud,fr-WF",4034749,X8
248 | Western Sahara,Western Sahara,Sahara occidental,EH,ESH,732,AOE,ss,,,212,SAH,WI,268,,MAD,WESTERN SAHARA,2,Moroccan Dirham,504,In contention,El-Aaiun,AF,.eh,"ar,mey",2461445,U5
249 | Yemen,Yemen,Yémen,YE,YEM,887,YEM,ye,YE,YAR,967,YEM,YM,269,YEM,YER,YEMEN,2,Yemeni Rial,886,Yes,Sanaa,AS,.ye,ar-YE,69543,T7
250 | Zambia,Zambia,Zambie,ZM,ZMB,894,ZMB,za,ZB,Z,260,ZAM,ZA,270,ZAM,ZMW,ZAMBIA,2,Zambian Kwacha,967,Yes,Lusaka,AF,.zm,"en-ZM,bem,loz,lun,lue,ny,toi",895949,Y4
251 | Zimbabwe,Zimbabwe,Zimbabwe,ZW,ZWE,716,ZWE,rh,ZW,ZW,263,ZIM,ZI,271,ZIM,ZWL,ZIMBABWE,2,Zimbabwe Dollar,932,Yes,Harare,AF,.zw,"en-ZW,sn,nr,nd",878675,Y5
252 | Åland Islands,Åland Islands,Îles d'Åland,AX,ALA,248,,,,FIN,358,ALD,,1242,,EUR,ÅLAND ISLANDS,2,Euro,978,Part of FI,Mariehamn,EU,.ax,sv-AX,661882,
253 |
--------------------------------------------------------------------------------
/live16_clustering_texto/README:
--------------------------------------------------------------------------------
1 | Material para a Live 16
2 | Como Descobrir Padrões em Textos Usando Clustering
3 | https://youtu.be/liQJHtxf-nE
4 |
--------------------------------------------------------------------------------
/live16_clustering_texto/nb1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 33,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "%matplotlib inline\n",
12 | "\n",
13 | "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
14 | "from sklearn.metrics import silhouette_score\n",
15 | "from sklearn.cluster import KMeans, DBSCAN, SpectralClustering"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "# Quer aprender como eu faço um projeto de Data Science da ideia ao produto final? Acesse CursoDeDataScience.com"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "https://www.kaggle.com/lukeimurfather/tweets"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "tweets = pd.read_csv(\"nCoV_tweets.csv\", index_col=0, parse_dates=['dt'])"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 129,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "docs = [\"curso de data, dAta science\", \n",
48 | " \"tutorial de data analysis\", \n",
49 | " \"não sei mais o que escrever analysis\"]"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 133,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/html": [
60 | "\n",
61 | "\n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " | \n",
78 | " analysis | \n",
79 | " data | \n",
80 | "
\n",
81 | " \n",
82 | " \n",
83 | " \n",
84 | " | curso de data, dAta science | \n",
85 | " 0 | \n",
86 | " 2 | \n",
87 | "
\n",
88 | " \n",
89 | " | tutorial de data analysis | \n",
90 | " 1 | \n",
91 | " 1 | \n",
92 | "
\n",
93 | " \n",
94 | " | não sei mais o que escrever analysis | \n",
95 | " 1 | \n",
96 | " 0 | \n",
97 | "
\n",
98 | " \n",
99 | "
\n",
100 | "
"
101 | ],
102 | "text/plain": [
103 | " analysis data\n",
104 | "curso de data, dAta science 0 2\n",
105 | "tutorial de data analysis 1 1\n",
106 | "não sei mais o que escrever analysis 1 0"
107 | ]
108 | },
109 | "execution_count": 133,
110 | "metadata": {},
111 | "output_type": "execute_result"
112 | }
113 | ],
114 | "source": [
115 | "bag_of_words_transformer = CountVectorizer(binary=False, analyzer='word', stop_words=['de', 'que'], ngram_range=(1,1), min_df=2)\n",
116 | "mx = bag_of_words_transformer.fit_transform(docs).todense()\n",
117 | "terms = bag_of_words_transformer.get_feature_names()\n",
118 | "pd.DataFrame(mx, columns=terms, index=docs)\n",
119 | "\n",
120 | "\n",
121 | "#lowercase\n",
122 | "#analyzer='char'\n",
123 | "#strip_accents='unicode'\n",
124 | "#binary=True\n",
125 | "\n",
126 | "#stop_words=['de']\n",
127 | "#ngram_range=(1,1)\n",
128 | "#min_df\n",
129 | "\n",
130 | "#unigrama, bigrama, trigrama, "
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "tf-idf ~= frequencia do palavra no documento * inverso da frequencia da palavra em todos os documentos"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 137,
145 | "metadata": {},
146 | "outputs": [
147 | {
148 | "data": {
149 | "text/html": [
150 | "\n",
151 | "\n",
164 | "
\n",
165 | " \n",
166 | " \n",
167 | " | \n",
168 | " analysis | \n",
169 | " curso | \n",
170 | " data | \n",
171 | " de | \n",
172 | " escrever | \n",
173 | " mais | \n",
174 | " não | \n",
175 | " que | \n",
176 | " science | \n",
177 | " sei | \n",
178 | " tutorial | \n",
179 | "
\n",
180 | " \n",
181 | " \n",
182 | " \n",
183 | " | curso de data, dAta science | \n",
184 | " 0.000000 | \n",
185 | " 0.452123 | \n",
186 | " 0.687703 | \n",
187 | " 0.343851 | \n",
188 | " 0.000000 | \n",
189 | " 0.000000 | \n",
190 | " 0.000000 | \n",
191 | " 0.000000 | \n",
192 | " 0.452123 | \n",
193 | " 0.000000 | \n",
194 | " 0.000000 | \n",
195 | "
\n",
196 | " \n",
197 | " | tutorial de data analysis | \n",
198 | " 0.459854 | \n",
199 | " 0.000000 | \n",
200 | " 0.459854 | \n",
201 | " 0.459854 | \n",
202 | " 0.000000 | \n",
203 | " 0.000000 | \n",
204 | " 0.000000 | \n",
205 | " 0.000000 | \n",
206 | " 0.000000 | \n",
207 | " 0.000000 | \n",
208 | " 0.604652 | \n",
209 | "
\n",
210 | " \n",
211 | " | não sei mais o que escrever analysis | \n",
212 | " 0.322002 | \n",
213 | " 0.000000 | \n",
214 | " 0.000000 | \n",
215 | " 0.000000 | \n",
216 | " 0.423394 | \n",
217 | " 0.423394 | \n",
218 | " 0.423394 | \n",
219 | " 0.423394 | \n",
220 | " 0.000000 | \n",
221 | " 0.423394 | \n",
222 | " 0.000000 | \n",
223 | "
\n",
224 | " \n",
225 | "
\n",
226 | "
"
227 | ],
228 | "text/plain": [
229 | " analysis curso data de \\\n",
230 | "curso de data, dAta science 0.000000 0.452123 0.687703 0.343851 \n",
231 | "tutorial de data analysis 0.459854 0.000000 0.459854 0.459854 \n",
232 | "não sei mais o que escrever analysis 0.322002 0.000000 0.000000 0.000000 \n",
233 | "\n",
234 | " escrever mais não que \\\n",
235 | "curso de data, dAta science 0.000000 0.000000 0.000000 0.000000 \n",
236 | "tutorial de data analysis 0.000000 0.000000 0.000000 0.000000 \n",
237 | "não sei mais o que escrever analysis 0.423394 0.423394 0.423394 0.423394 \n",
238 | "\n",
239 | " science sei tutorial \n",
240 | "curso de data, dAta science 0.452123 0.000000 0.000000 \n",
241 | "tutorial de data analysis 0.000000 0.000000 0.604652 \n",
242 | "não sei mais o que escrever analysis 0.000000 0.423394 0.000000 "
243 | ]
244 | },
245 | "execution_count": 137,
246 | "metadata": {},
247 | "output_type": "execute_result"
248 | }
249 | ],
250 | "source": [
251 | "bag_of_words_transformer = TfidfVectorizer()\n",
252 | "mx = bag_of_words_transformer.fit_transform(docs).todense()\n",
253 | "terms = bag_of_words_transformer.get_feature_names()\n",
254 | "pd.DataFrame(mx, columns=terms, index=docs)\n",
255 | "\n",
256 | "\n",
257 | "# norm = Each output row will have unit norm, either: * ‘l2’: Sum of squares of vector elements is 1.\n",
258 | "#The cosine similarity between two vectors is their dot product when l2 norm has been applied. \n",
259 | "#* ‘l1’: Sum of absolute values of vector elements is 1. See preprocessing.normalize.\n",
260 | "\n",
261 | "#use_idf = False \n",
262 | "#use_idf = False e norm = l1, frequência simples"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 6,
268 | "metadata": {},
269 | "outputs": [],
270 | "source": [
271 | "from nltk.stem import SnowballStemmer"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 139,
277 | "metadata": {},
278 | "outputs": [
279 | {
280 | "data": {
281 | "text/plain": [
282 | "'não sei mais o que escrever analys'"
283 | ]
284 | },
285 | "execution_count": 139,
286 | "metadata": {},
287 | "output_type": "execute_result"
288 | }
289 | ],
290 | "source": [
291 | "stemmer.stem(docs[2])"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": 7,
297 | "metadata": {},
298 | "outputs": [
299 | {
300 | "data": {
301 | "text/plain": [
302 | "('analis', 'analis')"
303 | ]
304 | },
305 | "execution_count": 7,
306 | "metadata": {},
307 | "output_type": "execute_result"
308 | }
309 | ],
310 | "source": [
311 | "stemmer = SnowballStemmer(language='portuguese')\n",
312 | "stemmer.stem(\"analisado\"), stemmer.stem(\"analise\")"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": 153,
318 | "metadata": {},
319 | "outputs": [
320 | {
321 | "data": {
322 | "text/plain": [
323 | "(6706, 618)"
324 | ]
325 | },
326 | "execution_count": 153,
327 | "metadata": {},
328 | "output_type": "execute_result"
329 | }
330 | ],
331 | "source": [
332 | "bag_of_words_transformer = CountVectorizer(min_df=4, stop_words='english', ngram_range=(3,3))\n",
333 | "#ngram range 2,2, 3,3\n",
334 | "# stem\n",
335 | "mx = bag_of_words_transformer.fit_transform(tweets['txt'])#.todense()\n",
336 | "mx.shape"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 154,
342 | "metadata": {},
343 | "outputs": [],
344 | "source": [
345 | "from sklearn.pipeline import make_pipeline\n",
346 | "from sklearn.preprocessing import MaxAbsScaler"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 155,
352 | "metadata": {},
353 | "outputs": [
354 | {
355 | "name": "stdout",
356 | "output_type": "stream",
357 | "text": [
358 | "K = 2 - Silhouette: 0.7849594690794792\n",
359 | "K = 3 - Silhouette: 0.7872649732846221\n",
360 | "K = 4 - Silhouette: 0.7899806458433944\n",
361 | "K = 5 - Silhouette: 0.7916851880937629\n",
362 | "K = 6 - Silhouette: 0.7917029047299742\n",
363 | "K = 7 - Silhouette: 0.7967897372340083\n",
364 | "K = 8 - Silhouette: 0.7965854032241488\n",
365 | "K = 9 - Silhouette: 0.7980160269702951\n"
366 | ]
367 | }
368 | ],
369 | "source": [
370 | "for k in range(2,10):\n",
371 | " cluster = make_pipeline(MaxAbsScaler(), KMeans(n_clusters=k, random_state=0))\n",
372 | " cluster.fit(mx)\n",
373 | " p = cluster.predict(mx)\n",
374 | " \n",
375 | " sil = silhouette_score(mx, p)\n",
376 | " print(\"K = {} - Silhouette: {}\".format(k, sil))"
377 | ]
378 | },
379 | {
380 | "cell_type": "code",
381 | "execution_count": 156,
382 | "metadata": {},
383 | "outputs": [],
384 | "source": [
385 | "terms = bag_of_words_transformer.get_feature_names()"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 157,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "name": "stdout",
395 | "output_type": "stream",
396 | "text": [
397 | "\n",
398 | "Cluster 0 - Size 6453\n",
399 | "coronavirus update wuhan 0.002015\n",
400 | "coronavirus death toll 0.002170\n",
401 | "30 hours birth 0.002325\n",
402 | "accidentally leaked real 0.002479\n",
403 | "guan zhuang bing 0.002634\n",
404 | "tencent accidentally leaked 0.002634\n",
405 | "zhuang bing du 0.002634\n",
406 | "just 30 hours 0.002789\n",
407 | "novel coronavirus 2019 0.002944\n",
408 | "cruise ship japan 0.003099\n",
409 | "2019 novel coronavirus 0.003254\n",
410 | "news china coronavirus 0.003409\n",
411 | "world health organization 0.003719\n",
412 | "coronavirus coronaoutbreak coronanews 0.004184\n",
413 | "coronaoutbreak coronanews ncov2019 0.004184\n",
414 | "coronavirus asiannetwalking https 0.004339\n",
415 | "health coronavirus asiannetwalking 0.004339\n",
416 | "amid coronavirus outbreak 0.005114\n",
417 | "coronavirus 2019 ncov 0.005114\n",
418 | "coronavirus outbreak https 0.007438\n",
419 | "dtype: float64\n",
420 | "\n",
421 | "Cluster 1 - Size 14\n",
422 | "coronavirus vaccine https 0.000000\n",
423 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n",
424 | "coronavirus wuhan 2019ncov 0.000000\n",
425 | "coronavirus wuhan https 0.000000\n",
426 | "gt https sdfcrodiom 0.071429\n",
427 | "efzbdv4cot details gt 0.071429\n",
428 | "https efzbdv4cot details 0.071429\n",
429 | "https n8owlcko0x https 0.142857\n",
430 | "gt https n8owlcko0x 0.142857\n",
431 | "gt https 9orx4j6buu 0.214286\n",
432 | "https 9orx4j6buu https 0.214286\n",
433 | "answered survive details 0.428571\n",
434 | "outbreak answered survive 0.428571\n",
435 | "survive details gt 0.428571\n",
436 | "outbreak answered details 0.500000\n",
437 | "answered details gt 0.500000\n",
438 | "biggest questions outbreak 1.000000\n",
439 | "questions outbreak answered 1.000000\n",
440 | "details gt https 1.000000\n",
441 | "coronavirus biggest questions 1.000000\n",
442 | "dtype: float64\n",
443 | "\n",
444 | "Cluster 2 - Size 9\n",
445 | "coronavirus vaccine breakthrough 0.000000\n",
446 | "coronaviruswuhan coronavirusoutbreak https 0.000000\n",
447 | "coronavirus vaccine https 0.000000\n",
448 | "coronavirus wuhan 2019ncov 0.000000\n",
449 | "zone china sure 0.000000\n",
450 | "coronavirus wuhan https 0.000000\n",
451 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n",
452 | "coronavirusoutbreak china coronavirus 0.000000\n",
453 | "coronavirusoutbreak coronavirus https 0.000000\n",
454 | "details click https 0.444444\n",
455 | "anti pollution clean 1.000000\n",
456 | "pollution clean air 1.000000\n",
457 | "oxybreath pro highly 1.000000\n",
458 | "breathing mask details 1.000000\n",
459 | "clean air breathing 1.000000\n",
460 | "pro highly effective 1.000000\n",
461 | "highly effective anti 1.000000\n",
462 | "air breathing mask 1.000000\n",
463 | "effective anti pollution 1.000000\n",
464 | "mask details click 1.000000\n",
465 | "dtype: float64\n",
466 | "\n",
467 | "Cluster 3 - Size 14\n",
468 | "coronavirus survive details 0.0\n",
469 | "coronavirus wuhan https 0.0\n",
470 | "coronavirus wuhan 2019ncov 0.0\n",
471 | "coronavirus vaccine https 0.0\n",
472 | "coronavirus vaccine breakthrough 0.0\n",
473 | "coronavirus updates live 0.0\n",
474 | "coronavirusoutbreak safety tips 0.0\n",
475 | "coronavirus transmitted people 0.0\n",
476 | "coronavirus travel ban 0.0\n",
477 | "coronavirus update china 0.0\n",
478 | "year old woman 1.0\n",
479 | "woman 15th person 1.0\n",
480 | "coronavirus fifth queensland 1.0\n",
481 | "37 year old 1.0\n",
482 | "diagnosed coronavirus fifth 1.0\n",
483 | "person australia diagnosed 1.0\n",
484 | "15th person australia 1.0\n",
485 | "australia diagnosed coronavirus 1.0\n",
486 | "fifth queensland https 1.0\n",
487 | "old woman 15th 1.0\n",
488 | "dtype: float64\n",
489 | "\n",
490 | "Cluster 4 - Size 18\n",
491 | "coronavirus survive details 0.000000\n",
492 | "coronavirus transmitted people 0.000000\n",
493 | "coronavirus travel ban 0.000000\n",
494 | "coronavirusoutbreak coronavirus https 0.000000\n",
495 | "coronavirus update wuhan 0.000000\n",
496 | "coronavirus update china 0.000000\n",
497 | "coronavirus vaccine breakthrough 0.000000\n",
498 | "coronavirus vaccine https 0.000000\n",
499 | "coronavirus wuhan 2019ncov 0.000000\n",
500 | "coronavirus wuhan https 0.000000\n",
501 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n",
502 | "coronavirus updates live 0.000000\n",
503 | "zone china sure 0.000000\n",
504 | "whatsapp 0555171905 https 0.777778\n",
505 | "days vals surprise 0.777778\n",
506 | "0205414305or whatsapp 0555171905 0.944444\n",
507 | "reach 0205414305or whatsapp 1.000000\n",
508 | "special reach 0205414305or 1.000000\n",
509 | "surprise special reach 1.000000\n",
510 | "vals surprise special 1.000000\n",
511 | "dtype: float64\n",
512 | "\n",
513 | "Cluster 5 - Size 12\n",
514 | "cruelty stop eating 0.0\n",
515 | "coronavirus vaccine https 0.0\n",
516 | "coronavirus wuhan 2019ncov 0.0\n",
517 | "coronavirus wuhan https 0.0\n",
518 | "coronavirus wuhancoronavirus wuhanvirus 0.0\n",
519 | "coronavirusoutbreak china coronavirus 0.0\n",
520 | "coronavirusoutbreak coronavirus https 0.0\n",
521 | "coronavirusoutbreak safety tips 0.0\n",
522 | "coronaviruswuhan coronavirusoutbreak https 0.0\n",
523 | "coronavirus epidemic https 0.0\n",
524 | "help publishing link 1.0\n",
525 | "publishing link help 1.0\n",
526 | "syrie est dur 1.0\n",
527 | "est dur https 1.0\n",
528 | "awareness suffering syria 1.0\n",
529 | "link help raise 1.0\n",
530 | "suffering syria syrie 1.0\n",
531 | "syria syrie est 1.0\n",
532 | "raise awareness suffering 1.0\n",
533 | "help raise awareness 1.0\n",
534 | "dtype: float64\n",
535 | "\n",
536 | "Cluster 6 - Size 11\n",
537 | "coronavirus updates live 0.000000\n",
538 | "coronavirus vaccine https 0.000000\n",
539 | "coronavirus wuhan 2019ncov 0.000000\n",
540 | "coronavirus vaccine breakthrough 0.000000\n",
541 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n",
542 | "coronavirusoutbreak china coronavirus 0.000000\n",
543 | "coronavirusoutbreak coronavirus https 0.000000\n",
544 | "coronavirusoutbreak safety tips 0.000000\n",
545 | "coronavirus wuhan https 0.000000\n",
546 | "bing du https 0.181818\n",
547 | "guan zhuang bing 0.181818\n",
548 | "zhuang bing du 0.181818\n",
549 | "coronavirus statistics https 0.454545\n",
550 | "leak true terrifying 0.909091\n",
551 | "terrifying coronavirus statistics 0.909091\n",
552 | "true terrifying coronavirus 0.909091\n",
553 | "china tencent accidentally 1.000000\n",
554 | "tencent accidentally leak 1.000000\n",
555 | "did china tencent 1.000000\n",
556 | "accidentally leak true 1.000000\n",
557 | "dtype: float64\n",
558 | "\n",
559 | "Cluster 7 - Size 168\n",
560 | "kits virus update 0.071429\n",
561 | "test lab opens 0.071429\n",
562 | "opens cdc ships 0.071429\n",
563 | "lab opens cdc 0.071429\n",
564 | "wuhan test lab 0.071429\n",
565 | "https 9orx4j6buu https 0.077381\n",
566 | "virus coronavirus https 0.083333\n",
567 | "efzbdv4cot details gt 0.083333\n",
568 | "https efzbdv4cot details 0.083333\n",
569 | "n8owlcko0x virus https 0.101190\n",
570 | "gt https qvezftkwkq 0.113095\n",
571 | "coronavirus sars flu 0.119048\n",
572 | "9orx4j6buu virus https 0.142857\n",
573 | "https n8owlcko0x virus 0.154762\n",
574 | "virus coronavirus sars 0.160714\n",
575 | "gt https n8owlcko0x 0.196429\n",
576 | "https 9orx4j6buu virus 0.196429\n",
577 | "survive details gt 0.244048\n",
578 | "gt https 9orx4j6buu 0.273810\n",
579 | "details gt https 1.000000\n",
580 | "dtype: float64\n",
581 | "\n",
582 | "Cluster 8 - Size 7\n",
583 | "coronavirus update china 0.000000\n",
584 | "coronavirus update wuhan 0.000000\n",
585 | "coronavirus vaccine breakthrough 0.000000\n",
586 | "coronaviruswuhan coronavirusoutbreak https 0.000000\n",
587 | "coronavirus wuhan 2019ncov 0.000000\n",
588 | "zone china sure 0.000000\n",
589 | "coronavirus wuhan https 0.000000\n",
590 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n",
591 | "coronavirusoutbreak china coronavirus 0.000000\n",
592 | "coronavirusoutbreak coronavirus https 0.000000\n",
593 | "coronavirusoutbreak safety tips 0.000000\n",
594 | "rate recovery rate 0.714286\n",
595 | "fully automated live 0.857143\n",
596 | "mortality rate recovery 0.857143\n",
597 | "automated live coronavirus 0.857143\n",
598 | "live statistics mortality 1.000000\n",
599 | "coronavirus updates live 1.000000\n",
600 | "updates live statistics 1.000000\n",
601 | "statistics mortality rate 1.000000\n",
602 | "live coronavirus updates 1.000000\n",
603 | "dtype: float64\n"
604 | ]
605 | }
606 | ],
607 | "source": [
608 | "k = 9\n",
609 | "cluster = make_pipeline(MaxAbsScaler(), KMeans(n_clusters=k, random_state=0))\n",
610 | "cluster.fit(mx)\n",
611 | "p = cluster.predict(mx)\n",
612 | "\n",
613 | "for c in np.unique(p):\n",
614 | " print(\"\\nCluster {} - Size {}\".format(c, (p == c).sum()))\n",
615 | " rank = pd.Series(np.array(mx[p==c].mean(axis=0)).squeeze(), index=terms).sort_values().tail(20)\n",
616 | " print(rank)"
617 | ]
618 | },
619 | {
620 | "cell_type": "code",
621 | "execution_count": 147,
622 | "metadata": {},
623 | "outputs": [
624 | {
625 | "name": "stdout",
626 | "output_type": "stream",
627 | "text": [
628 | "\n",
629 | "Cluster 0\n",
630 | "test 0.058252\n",
631 | "lab 0.058252\n",
632 | "china 0.058252\n",
633 | "experts 0.063107\n",
634 | "sars 0.065534\n",
635 | "symptoms 0.067961\n",
636 | "questions 0.067961\n",
637 | "answered 0.067961\n",
638 | "biggest 0.067961\n",
639 | "efzbdv4cot 0.087379\n",
640 | "qvezftkwkq 0.092233\n",
641 | "outbreak 0.094660\n",
642 | "survive 0.114078\n",
643 | "n8owlcko0x 0.169903\n",
644 | "virus 0.174757\n",
645 | "9orx4j6buu 0.237864\n",
646 | "coronavirus 0.260922\n",
647 | "gt 0.294498\n",
648 | "https 0.588997\n",
649 | "details 1.000000\n",
650 | "dtype: float64\n",
651 | "\n",
652 | "Cluster 1\n",
653 | "amp 0.015000\n",
654 | "case 0.015846\n",
655 | "thanks 0.016000\n",
656 | "good 0.016308\n",
657 | "just 0.017462\n",
658 | "cases 0.017590\n",
659 | "virus 0.017692\n",
660 | "2020 0.018000\n",
661 | "chinese 0.018154\n",
662 | "coronavirusoutbreak 0.018308\n",
663 | "health 0.019077\n",
664 | "quarantine 0.020000\n",
665 | "spread 0.020615\n",
666 | "people 0.021538\n",
667 | "wuhan 0.022359\n",
668 | "latest 0.024462\n",
669 | "outbreak 0.024769\n",
670 | "china 0.058000\n",
671 | "coronavirus 0.152808\n",
672 | "https 0.272308\n",
673 | "dtype: float64\n"
674 | ]
675 | }
676 | ],
677 | "source": [
678 | "centroids = cluster.named_steps['kmeans'].cluster_centers_\n",
679 | "for c in range(centroids.shape[0]):\n",
680 | " print(\"\\nCluster {}\".format(c))\n",
681 | " rank = pd.Series(centroids[c, :], index=terms).sort_values().tail(20)\n",
682 | " print(rank)"
683 | ]
684 | },
685 | {
686 | "cell_type": "code",
687 | "execution_count": 158,
688 | "metadata": {},
689 | "outputs": [
690 | {
691 | "data": {
692 | "text/plain": [
693 | "array([[0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n",
694 | " 2.7774603 ],\n",
695 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n",
696 | " 2.7774603 ],\n",
697 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n",
698 | " 2.7774603 ],\n",
699 | " ...,\n",
700 | " [1.73034273, 2.86338604, 3.63284061, ..., 3.12811828, 2.12630845,\n",
701 | " 3.27326835],\n",
702 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n",
703 | " 2.7774603 ],\n",
704 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n",
705 | " 2.7774603 ]])"
706 | ]
707 | },
708 | "execution_count": 158,
709 | "metadata": {},
710 | "output_type": "execute_result"
711 | }
712 | ],
713 | "source": [
714 | "cluster.transform(mx)"
715 | ]
716 | },
717 | {
718 | "cell_type": "code",
719 | "execution_count": 159,
720 | "metadata": {},
721 | "outputs": [],
722 | "source": [
723 | "tweets['cluster'] = p"
724 | ]
725 | },
726 | {
727 | "cell_type": "code",
728 | "execution_count": 160,
729 | "metadata": {},
730 | "outputs": [
731 | {
732 | "name": "stdout",
733 | "output_type": "stream",
734 | "text": [
735 | "Cluster 0 = what the actual -\n",
736 | "\n",
737 | "Cluster 1 = What is coronavirus? The biggest questions about the outbreak, answered. SURVIVE SEE DETAILS AT ==>... https://t.co/21AGGnBj58\n",
738 | "\n",
739 | "Cluster 2 = @IsChinar OxyBreath Pro\n",
740 | "Highly Effective Anti-Pollution Clean Air Breathing Mask.\n",
741 | "Full details please click on a li... https://t.co/uWq9DZ9XI9\n",
742 | "\n",
743 | "Cluster 3 = A 37-year-old woman has become the 15th person in Australia diagnosed with coronavirus - the fifth in Queensland.... https://t.co/UtfPvHwjvR\n",
744 | "\n",
745 | "Cluster 4 = Vals Is Here Surprise That Special Someone Now\n",
746 | "\n",
747 | "You can reach Us On 0205414305or WhatsApp 0555171905 \n",
748 | "For The Bes... https://t.co/E9CSZvxFCT\n",
749 | "\n",
750 | "Cluster 5 = \n",
751 | "\n",
752 | "Can you help us by publishing this link to help raise Awareness of the suffering in #Syria\n",
753 | "\n",
754 | "#Syrie\n",
755 | "C'est dur a r... https://t.co/FnS9hjdXFo\n",
756 | "\n",
757 | "Cluster 6 = Did China's Tencent Accidentally Leak The True Terrifying #Coronavirus Statistics https://t.co/gyd8C00Pxg\n",
758 | "\n",
759 | "Cluster 7 = Coronavirus Latest Updates: Everything You Need to Know SEE DETAILS AT ==> https://t.co/9orX4j6BuU #virus... https://t.co/EZkVAQuI3G\n",
760 | "\n",
761 | "Cluster 8 = @ABSCBNNews @raphbosano Fully Automated Live #CoronaVirus Updates. \n",
762 | "* Live Statistics: Mortality rate, recovery rat... https://t.co/RXbHWuWM58\n",
763 | "\n"
764 | ]
765 | }
766 | ],
767 | "source": [
768 | "for c in np.unique(p):\n",
769 | " print('Cluster {} = {}'.format(c, tweets[tweets['cluster'] == c]['txt'].iloc[0]))\n",
770 | " print()\n",
771 | " "
772 | ]
773 | },
774 | {
775 | "cell_type": "code",
776 | "execution_count": null,
777 | "metadata": {},
778 | "outputs": [],
779 | "source": []
780 | }
781 | ],
782 | "metadata": {
783 | "kernelspec": {
784 | "display_name": "Python 3",
785 | "language": "python",
786 | "name": "python3"
787 | },
788 | "language_info": {
789 | "codemirror_mode": {
790 | "name": "ipython",
791 | "version": 3
792 | },
793 | "file_extension": ".py",
794 | "mimetype": "text/x-python",
795 | "name": "python",
796 | "nbconvert_exporter": "python",
797 | "pygments_lexer": "ipython3",
798 | "version": "3.7.3"
799 | }
800 | },
801 | "nbformat": 4,
802 | "nbformat_minor": 4
803 | }
804 |
--------------------------------------------------------------------------------
/live17_stacking/README:
--------------------------------------------------------------------------------
1 | Arquivos para a live 17 - Extraia o poder máximo dos seus modelos usando Stacking Ensembles
2 | https://youtu.be/TSoQGRhhHBE
3 |
--------------------------------------------------------------------------------
/live17_stacking/nb1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "# Procurando um curso prático de Data Science que cabe no seu bolso?\n",
18 | "# CursoDeDataScience.com\n",
19 | "# Começa 20:05 - não pule!"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 87,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "train = pd.read_csv(\"train.csv\")\n",
29 | "test = pd.read_csv(\"test.csv\")\n",
30 | "train['Sex_binario'] = train['Sex'].map({\"male\": 0, \"female\": 1})\n",
31 | "\n",
32 | "test['Sex_binario'] = test['Sex'].map({\"male\": 0, \"female\": 1})"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 88,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "data": {
42 | "text/html": [
43 | "\n",
44 | "\n",
57 | "
\n",
58 | " \n",
59 | " \n",
60 | " | \n",
61 | " PassengerId | \n",
62 | " Survived | \n",
63 | " Pclass | \n",
64 | " Name | \n",
65 | " Sex | \n",
66 | " Age | \n",
67 | " SibSp | \n",
68 | " Parch | \n",
69 | " Ticket | \n",
70 | " Fare | \n",
71 | " Cabin | \n",
72 | " Embarked | \n",
73 | " Sex_binario | \n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " \n",
78 | " | 0 | \n",
79 | " 1 | \n",
80 | " 0 | \n",
81 | " 3 | \n",
82 | " Braund, Mr. Owen Harris | \n",
83 | " male | \n",
84 | " 22.0 | \n",
85 | " 1 | \n",
86 | " 0 | \n",
87 | " A/5 21171 | \n",
88 | " 7.2500 | \n",
89 | " NaN | \n",
90 | " S | \n",
91 | " 0 | \n",
92 | "
\n",
93 | " \n",
94 | " | 1 | \n",
95 | " 2 | \n",
96 | " 1 | \n",
97 | " 1 | \n",
98 | " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
99 | " female | \n",
100 | " 38.0 | \n",
101 | " 1 | \n",
102 | " 0 | \n",
103 | " PC 17599 | \n",
104 | " 71.2833 | \n",
105 | " C85 | \n",
106 | " C | \n",
107 | " 1 | \n",
108 | "
\n",
109 | " \n",
110 | " | 2 | \n",
111 | " 3 | \n",
112 | " 1 | \n",
113 | " 3 | \n",
114 | " Heikkinen, Miss. Laina | \n",
115 | " female | \n",
116 | " 26.0 | \n",
117 | " 0 | \n",
118 | " 0 | \n",
119 | " STON/O2. 3101282 | \n",
120 | " 7.9250 | \n",
121 | " NaN | \n",
122 | " S | \n",
123 | " 1 | \n",
124 | "
\n",
125 | " \n",
126 | " | 3 | \n",
127 | " 4 | \n",
128 | " 1 | \n",
129 | " 1 | \n",
130 | " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
131 | " female | \n",
132 | " 35.0 | \n",
133 | " 1 | \n",
134 | " 0 | \n",
135 | " 113803 | \n",
136 | " 53.1000 | \n",
137 | " C123 | \n",
138 | " S | \n",
139 | " 1 | \n",
140 | "
\n",
141 | " \n",
142 | " | 4 | \n",
143 | " 5 | \n",
144 | " 0 | \n",
145 | " 3 | \n",
146 | " Allen, Mr. William Henry | \n",
147 | " male | \n",
148 | " 35.0 | \n",
149 | " 0 | \n",
150 | " 0 | \n",
151 | " 373450 | \n",
152 | " 8.0500 | \n",
153 | " NaN | \n",
154 | " S | \n",
155 | " 0 | \n",
156 | "
\n",
157 | " \n",
158 | "
\n",
159 | "
"
160 | ],
161 | "text/plain": [
162 | " PassengerId Survived Pclass \\\n",
163 | "0 1 0 3 \n",
164 | "1 2 1 1 \n",
165 | "2 3 1 3 \n",
166 | "3 4 1 1 \n",
167 | "4 5 0 3 \n",
168 | "\n",
169 | " Name Sex Age SibSp \\\n",
170 | "0 Braund, Mr. Owen Harris male 22.0 1 \n",
171 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
172 | "2 Heikkinen, Miss. Laina female 26.0 0 \n",
173 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
174 | "4 Allen, Mr. William Henry male 35.0 0 \n",
175 | "\n",
176 | " Parch Ticket Fare Cabin Embarked Sex_binario \n",
177 | "0 0 A/5 21171 7.2500 NaN S 0 \n",
178 | "1 0 PC 17599 71.2833 C85 C 1 \n",
179 | "2 0 STON/O2. 3101282 7.9250 NaN S 1 \n",
180 | "3 0 113803 53.1000 C123 S 1 \n",
181 | "4 0 373450 8.0500 NaN S 0 "
182 | ]
183 | },
184 | "execution_count": 88,
185 | "metadata": {},
186 | "output_type": "execute_result"
187 | }
188 | ],
189 | "source": [
190 | "train.head()"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 82,
196 | "metadata": {},
197 | "outputs": [],
198 | "source": [
199 | "X = train.select_dtypes(include=np.number).drop([\"PassengerId\", 'Survived'], axis=1).fillna(0)\n",
200 | "y = train['Survived']"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 78,
206 | "metadata": {},
207 | "outputs": [],
208 | "source": [
209 | "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
210 | "from sklearn.linear_model import LogisticRegression\n",
211 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
212 | "from sklearn.model_selection import KFold\n",
213 | "from sklearn.metrics import log_loss, accuracy_score\n",
214 | "from sklearn.pipeline import make_pipeline"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 89,
220 | "metadata": {},
221 | "outputs": [
222 | {
223 | "name": "stdout",
224 | "output_type": "stream",
225 | "text": [
226 | "RF Accuracy: 0.7869955156950673 - Log Loss: 0.8891877224352214\n",
227 | "ET Accuracy: 0.7802690582959642 - Log Loss: 1.7277767849421113\n",
228 | "LR StdScaler Accuracy: 0.7713004484304933 - Log Loss: 0.4548870233895985\n",
229 | "LR MinMax Accuracy: 0.773542600896861 - Log Loss: 0.45730271249069515\n",
230 | "\n",
231 | "RF Accuracy: 0.8292134831460675 - Log Loss: 0.6313360340802087\n",
232 | "ET Accuracy: 0.8089887640449438 - Log Loss: 2.533916920090966\n",
233 | "LR StdScaler Accuracy: 0.7955056179775281 - Log Loss: 0.4579224442106097\n",
234 | "LR MinMax Accuracy: 0.802247191011236 - Log Loss: 0.4610789905245168\n",
235 | "\n"
236 | ]
237 | }
238 | ],
239 | "source": [
240 | "kf = KFold(n_splits=2, random_state=0, shuffle=True)\n",
241 | "\n",
242 | "\n",
243 | "second_level = np.zeros((X.shape[0], 4))\n",
244 | "\n",
245 | "for tr, ts in kf.split(X,y):\n",
246 | " Xtr, Xval = X.iloc[tr], X.iloc[ts]\n",
247 | " ytr, yval = y.iloc[tr], y.iloc[ts]\n",
248 | " \n",
249 | " rf = RandomForestClassifier(n_estimators=100, n_jobs=6, random_state=10)\n",
250 | " rf.fit(Xtr, ytr)\n",
251 | " prf = rf.predict_proba(Xval)[:,1]\n",
252 | " prf_ = (prf > 0.5).astype(int)\n",
253 | " \n",
254 | " print(\"RF Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, prf_), log_loss(yval, prf)))\n",
255 | " \n",
256 | " et = ExtraTreesClassifier(n_estimators=100, n_jobs=6, random_state=10)\n",
257 | " et.fit(Xtr, ytr)\n",
258 | " pet = et.predict_proba(Xval)[:,1]\n",
259 | " pet_ = (pet > 0.5).astype(int)\n",
260 | " \n",
261 | " print(\"ET Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, pet_), log_loss(yval, pet)))\n",
262 | " \n",
263 | " lr1 = make_pipeline(StandardScaler(), LogisticRegression())\n",
264 | " lr1.fit(Xtr, ytr)\n",
265 | " plr1 = lr1.predict_proba(Xval)[:,1]\n",
266 | " plr1_ = (plr1 > 0.5).astype(int)\n",
267 | " \n",
268 | " print(\"LR StdScaler Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, plr1_), log_loss(yval, plr1)))\n",
269 | " \n",
270 | " lr2 = make_pipeline(MinMaxScaler(), LogisticRegression())\n",
271 | " lr2.fit(Xtr, ytr)\n",
272 | " plr2 = lr2.predict_proba(Xval)[:,1]\n",
273 | " plr2_ = (plr2 > 0.5).astype(int)\n",
274 | " \n",
275 | " print(\"LR MinMax Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, plr2_), log_loss(yval, plr2)))\n",
276 | " \n",
277 | " second_level[ts, 0] = prf\n",
278 | " second_level[ts, 1] = pet\n",
279 | " second_level[ts, 2] = plr1\n",
280 | " second_level[ts, 3] = plr2\n",
281 | " \n",
282 | " print()\n",
283 | " \n",
284 | "# fatores de diversidade"
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": null,
290 | "metadata": {},
291 | "outputs": [],
292 | "source": [
293 | "# second_level.mean(axis=1)"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": null,
299 | "metadata": {},
300 | "outputs": [],
301 | "source": [
302 | "folds = [1, 2, 3]\n",
303 | "\n",
304 | "primeiro nivel\n",
305 | "\n",
306 | "ciclo 1 = [1,2] [3]\n",
307 | "ciclo 2 = [1,3] [2]\n",
308 | "ciclo 3 = [2,3] [1]\n",
309 | "\n",
310 | "segundo nivel\n",
311 | "\n",
312 | "ciclo 1 = [1,2] [3]\n",
313 | "ciclo 2 = [1,3] [2]\n",
314 | "ciclo 3 = [2,3] [1]\n",
315 | "\n",
316 | "\n",
317 | "\n",
318 | "\n"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": null,
324 | "metadata": {},
325 | "outputs": [],
326 | "source": [
327 | "# modelos - gbm + rede neural, knn \n",
328 | "# features\n",
329 | "# exemplos \n",
330 | "# hiperparametros "
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 97,
336 | "metadata": {},
337 | "outputs": [
338 | {
339 | "data": {
340 | "text/plain": [
341 | "array([[0.15 , 0.19 , 0.0947167 , 0.11582552],\n",
342 | " [0.95 , 0.99 , 0.90010202, 0.88974478],\n",
343 | " [0.54 , 0.76 , 0.65737866, 0.63540321],\n",
344 | " ...,\n",
345 | " [0.74 , 0.76 , 0.61083421, 0.59159169],\n",
346 | " [0.84 , 0.92 , 0.45575654, 0.45086285],\n",
347 | " [0.35583333, 0.4 , 0.10479333, 0.11872796]])"
348 | ]
349 | },
350 | "execution_count": 97,
351 | "metadata": {},
352 | "output_type": "execute_result"
353 | }
354 | ],
355 | "source": [
356 | "second_level"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 95,
362 | "metadata": {},
363 | "outputs": [
364 | {
365 | "name": "stdout",
366 | "output_type": "stream",
367 | "text": [
368 | "Stack Accuracy: 0.8004484304932735 Log loss: 0.4379152913562436\n",
369 | "\n",
370 | "Stack Accuracy: 0.8157303370786517 Log loss: 0.42892608158477763\n",
371 | "\n"
372 | ]
373 | }
374 | ],
375 | "source": [
376 | "for tr, ts in kf.split(X,y):\n",
377 | " \n",
378 | " Xtr, Xval = second_level[tr], second_level[ts]\n",
379 | " ytr, yval = y.iloc[tr], y.iloc[ts]\n",
380 | " \n",
381 | " lr_stack = LogisticRegression(C=1.)\n",
382 | " lr_stack.fit(Xtr, ytr)\n",
383 | " plr_stack = lr_stack.predict_proba(Xval)[:,1]\n",
384 | " plr_stack_ = (plr_stack > 0.5).astype(int)\n",
385 | " \n",
386 | " print(\"Stack Accuracy: {} Log loss: {}\".format(accuracy_score(yval, plr_stack_), log_loss(yval, plr_stack)))\n",
387 | " print()"
388 | ]
389 | },
390 | {
391 | "cell_type": "code",
392 | "execution_count": 75,
393 | "metadata": {},
394 | "outputs": [
395 | {
396 | "data": {
397 | "text/html": [
398 | "\n",
399 | "\n",
412 | "
\n",
413 | " \n",
414 | " \n",
415 | " | \n",
416 | " 0 | \n",
417 | " 1 | \n",
418 | " 2 | \n",
419 | " 3 | \n",
420 | "
\n",
421 | " \n",
422 | " \n",
423 | " \n",
424 | " | 0 | \n",
425 | " 1.000000 | \n",
426 | " 0.935380 | \n",
427 | " 0.595684 | \n",
428 | " 0.578839 | \n",
429 | "
\n",
430 | " \n",
431 | " | 1 | \n",
432 | " 0.935380 | \n",
433 | " 1.000000 | \n",
434 | " 0.497313 | \n",
435 | " 0.479914 | \n",
436 | "
\n",
437 | " \n",
438 | " | 2 | \n",
439 | " 0.595684 | \n",
440 | " 0.497313 | \n",
441 | " 1.000000 | \n",
442 | " 0.987037 | \n",
443 | "
\n",
444 | " \n",
445 | " | 3 | \n",
446 | " 0.578839 | \n",
447 | " 0.479914 | \n",
448 | " 0.987037 | \n",
449 | " 1.000000 | \n",
450 | "
\n",
451 | " \n",
452 | "
\n",
453 | "
"
454 | ],
455 | "text/plain": [
456 | " 0 1 2 3\n",
457 | "0 1.000000 0.935380 0.595684 0.578839\n",
458 | "1 0.935380 1.000000 0.497313 0.479914\n",
459 | "2 0.595684 0.497313 1.000000 0.987037\n",
460 | "3 0.578839 0.479914 0.987037 1.000000"
461 | ]
462 | },
463 | "execution_count": 75,
464 | "metadata": {},
465 | "output_type": "execute_result"
466 | }
467 | ],
468 | "source": [
469 | "pd.DataFrame(np.corrcoef(second_level.T))"
470 | ]
471 | },
472 | {
473 | "cell_type": "code",
474 | "execution_count": null,
475 | "metadata": {},
476 | "outputs": [],
477 | "source": []
478 | }
479 | ],
480 | "metadata": {
481 | "kernelspec": {
482 | "display_name": "Python 3",
483 | "language": "python",
484 | "name": "python3"
485 | },
486 | "language_info": {
487 | "codemirror_mode": {
488 | "name": "ipython",
489 | "version": 3
490 | },
491 | "file_extension": ".py",
492 | "mimetype": "text/x-python",
493 | "name": "python",
494 | "nbconvert_exporter": "python",
495 | "pygments_lexer": "ipython3",
496 | "version": "3.7.3"
497 | }
498 | },
499 | "nbformat": 4,
500 | "nbformat_minor": 4
501 | }
502 |
--------------------------------------------------------------------------------
/live18_rede_neural/README:
--------------------------------------------------------------------------------
1 | Materiais da Live 18: Como Criar Sua Primeira Rede Neural com Tensorflow
2 | https://youtu.be/s0s6Q1GLJGo
3 |
--------------------------------------------------------------------------------
/live20_covid_einstein/README:
--------------------------------------------------------------------------------
1 | Material da live sobre dados disponibilizados pelo hospital Albert Einstein sobre pacientes com suspeita de COVID-19
2 | https://www.kaggle.com/einsteindata4u/covid19
3 | https://youtu.be/Wg1RcYNf4Lg
4 |
--------------------------------------------------------------------------------
/lives_ml_na_industria/README:
--------------------------------------------------------------------------------
1 | Arquivos para as Lives 009 e 010
2 |
--------------------------------------------------------------------------------
/lives_ml_na_industria/notes_on_dataset.txt:
--------------------------------------------------------------------------------
1 | Description of physical setup:
2 | The data comes from a continuous flow process.
3 | Sample rate is 1 Hz.
4 | In the first stage, Machines 1, 2, and 3 operate in parallel, and feed their outputs into a step that combines the flows.
5 | Output from the combiner is measured in 15 locations. These measurements are the primary measurements to predict.
6 | Next, the output flows into a second stage, where Machines 4 and 5 process in series.
7 | Measurements are made again in the same 15 locations. These are the secondary measurements to predict.
8 |
9 | Measurements are noisy.
10 | Each measurement also has a target or Setpoint (setpoints are included in the first row of data).
11 | The goal is to predict the measurements (or the error versus setpoints) for as many of the 15 measurements as possible.
12 | Some measurements will be more predictable than others!
13 | Prediction of measurements after the first stage are the primary interest.
14 | Prediction of measurements after the second stage are nice-to-have but the data is much more noisy.
15 |
16 | Note on variable naming conventions
17 | ~.C.Setpoint Setpoint for Controlled variable
18 | ~.C.Actual Actual value of Controlled variable
19 | ~.U.Actual Actual value of Uncontrolled variable
20 | Others Environmental or raw material variables, States / events, etc.
21 |
22 | Start col End col Description
23 | 0 0 Time stamp
24 | 1 2 Factory ambient conditions
25 | 3 6 First stage, Machine 1, raw material properties (material going in to Machine 1)
26 | 7 14 First stage, Machine 1 process variables
27 | 15 18 First stage, Machine 2, raw material properties (material going in to Machine 2)
28 | 19 26 First stage, Machine 2 process variables
29 | 27 30 First stage, Machine 3, raw material properties (material going in to Machine 3)
30 | 31 38 First stage, Machine 3 process variables
31 | 39 41 Combiner stage process parameters. Here we combines the outputs from Machines 1, 2, and 3.
32 | 42 71 PRIMARY OUTPUT TO CONTROL: Measurements of 15 features (in mm), along with setpoint or target for each
33 | 72 78 Second stage, Machine 4 process variables
34 | 79 85 Second stage, Machine 5 process variables
35 | 86 115 SECONDARY OUTPUT TO CONTROL: Measurements of 15 features (in mm), along with setpoint or target for each
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------