├── 5_dias_de_kaggle ├── README ├── lightautoml-baseline-tps-june-2021.ipynb ├── lightgbm.ipynb ├── lightgbm2.ipynb ├── lightgbm_permutation.ipynb └── submission_super_incroivel_do_tutorial.csv ├── README ├── agente-ia-ferramentas ├── README.md └── agente_do_zero.py ├── atualize_seu_modelo.ipynb ├── como_tunar_hipers.ipynb ├── copa_america ├── 0.ipynb ├── 1.ipynb └── README ├── deploy_sql ├── README ├── deploy_tip_model_.py ├── monitor.ipynb └── validator.py ├── euro2021 ├── 0_clean_data_merged_v1.ipynb ├── 1_baselines.ipynb ├── 2_model1.ipynb ├── 3_model2.ipynb ├── 4_model3.ipynb ├── 5_model4.ipynb ├── 6_model5.ipynb ├── 7_model6.ipynb └── README ├── live11_sucesso_musical ├── README └── nb1.ipynb ├── live12_timeseries_prophet ├── README └── nb1.ipynb ├── live14_deploy ├── README ├── app.py ├── clickbait_titles.csv ├── mdl.pkl.z ├── nb1.ipynb └── non_clickbait_titles.csv ├── live15_covid ├── Countries Longitude and Latitude.csv ├── README ├── countries and continents.csv ├── covid_19_data.csv ├── nb3_corona.ipynb └── nb4_corona.ipynb ├── live16_clustering_texto ├── README ├── nCoV_tweets.csv └── nb1.ipynb ├── live17_stacking ├── README ├── nb1.ipynb └── train.csv ├── live18_rede_neural ├── README ├── nb1.ipynb └── train.csv ├── live20_covid_einstein ├── 10_lgb4.ipynb ├── 11_lgb5.ipynb ├── 12_ag3.ipynb ├── 13_nn1.ipynb ├── 1_nb1.ipynb ├── 2_nb2.ipynb ├── 3_ag1.ipynb ├── 4_ag2.ipynb ├── 5_rf1.ipynb ├── 6_rf2.ipynb ├── 7_lgb1.ipynb ├── 8_lgb2.ipynb ├── 9_lgb3.ipynb └── README ├── lives_ml_na_industria ├── README ├── continuous_factory_process.csv ├── nb1.ipynb ├── nb2.ipynb ├── nb3.ipynb └── notes_on_dataset.txt └── shap_outubro_rosa.ipynb /5_dias_de_kaggle/README: -------------------------------------------------------------------------------- 1 | Material dos vídeos do desafio "5 dias de Kaggle" 2 | https://www.youtube.com/watch?v=B62dzeqFgPI&list=PLwnip85KhroUbNQcnhCF4cnRT7TTtGQ8F 3 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Aqui você encontra os notebooks dos tutoriais de alguns vídeos do meu canal no Youtube 2 | 3 | http://youtube.com/mariofilhoml 4 | -------------------------------------------------------------------------------- /agente-ia-ferramentas/README.md: -------------------------------------------------------------------------------- 1 | Script para rodar um agente de IA que usa ferramentas criado no vídeo https://youtu.be/RT4WYDq2ZkU -------------------------------------------------------------------------------- /agente-ia-ferramentas/agente_do_zero.py: -------------------------------------------------------------------------------- 1 | import litellm 2 | import json 3 | import os 4 | 5 | class Tool: 6 | def __init__(self, name, description, func, parameters): 7 | self.name = name 8 | self.description = description 9 | self.func = func 10 | self.parameters = parameters 11 | self.schema = self._generate_schema() 12 | 13 | def _generate_schema(self): 14 | return { 15 | "type": "function", 16 | "function": { 17 | "name": self.name, 18 | "description": self.description, 19 | "parameters": self.parameters 20 | } 21 | } 22 | 23 | class Agent: 24 | def __init__(self, tools=None): 25 | self.conversation = list() 26 | self.tools = tools 27 | 28 | def _handle_tool_calls(self, message): 29 | self.conversation.append(message.model_dump()) 30 | 31 | for tool_call in message.tool_calls: 32 | tool_name = tool_call.function.name 33 | tool_args = json.loads(tool_call.function.arguments) 34 | 35 | for tool in self.tools: 36 | if tool_name == tool.name: 37 | try: 38 | result = tool.func(**tool_args) 39 | result = str(result) 40 | break 41 | except Exception as e: 42 | result = f"Erro ao executar {tool_name}: {str(e)}" 43 | print(e) 44 | break 45 | else: 46 | result = f"Ferramenta {tool_name} não encontrada" 47 | 48 | self.conversation.append({ 49 | "role": "tool", 50 | "tool_call_id": tool_call.id, 51 | "content": result 52 | }) 53 | 54 | #print(self.conversation) 55 | response = litellm.completion( 56 | model="gpt-4.1", 57 | messages=self.conversation 58 | ) 59 | 60 | response_message = response.choices[0].message.content 61 | self.conversation.append({ 62 | "role": "assistant", 63 | "content": response_message 64 | }) 65 | print(f"Agente: {response_message}") 66 | 67 | 68 | def run(self): 69 | print("Agente iniciado") 70 | 71 | while True: 72 | user_input = input("Voce: ") 73 | 74 | if user_input.lower() == "sair": 75 | print("Tchau") 76 | break 77 | 78 | user_msg = {"role": "user", "content": user_input} 79 | 80 | self.conversation.append(user_msg) 81 | 82 | tools_schemas = [tool.schema for tool in self.tools] 83 | 84 | response = litellm.completion( 85 | model="gpt-4.1", 86 | messages = self.conversation, 87 | tools=tools_schemas 88 | ) 89 | 90 | assistant_message = response.choices[0].message 91 | 92 | 93 | 94 | if assistant_message.tool_calls: 95 | self._handle_tool_calls(assistant_message) 96 | else: 97 | self.conversation.append({"role": "assistant", "content": assistant_message}) 98 | print(f"Agente: {assistant_message}") 99 | 100 | def list_files(): 101 | files = os.listdir(".") 102 | return "\n".join(files) 103 | 104 | def read_file(file_path): 105 | with open(file_path, "r", encoding='utf-8') as f: 106 | return f.read() 107 | 108 | def write_file(file_path, content): 109 | with open(file_path, "w", encoding='utf-8') as f: 110 | f.write(content) 111 | return f"Conteúdo {content} escrito para o arquivo" 112 | 113 | if __name__ == "__main__": 114 | list_files_tool = Tool(name="list_files", 115 | description="Lista os arquivos disponíveis no diretório atual", 116 | func=list_files, 117 | parameters={ 118 | "type": "object", 119 | "properties": {}, 120 | "required": [] 121 | }) 122 | read_file_tool = Tool(name="read_file", 123 | description="Lê o arquivo especificado por file_path", 124 | func=read_file, 125 | parameters={ 126 | "type": "object", 127 | "properties": { 128 | "file_path": { 129 | "type": "string", 130 | "description": "Caminho para o arquivo a ser lido" 131 | } 132 | }, 133 | "required": ["file_path"] 134 | }) 135 | write_file_tool = Tool(name="write_file", 136 | description="Trunca e escreve para o arquivo especificado por file_path", 137 | func=write_file, 138 | parameters={ 139 | "type": "object", 140 | "properties": { 141 | "file_path": { 142 | "type": "string", 143 | "description": "Caminho para o arquivo a ser lido" 144 | }, 145 | "content": { 146 | "type": "string", 147 | "description": "Conteúdo para escrever dentro do arquivo" 148 | } 149 | }, 150 | "required": ["file_path", "content"] 151 | }) 152 | 153 | 154 | 155 | agent = Agent(tools=[list_files_tool, read_file_tool, write_file_tool]) 156 | agent.run() -------------------------------------------------------------------------------- /copa_america/0.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 121, 6 | "id": "e7b18d84-49ad-40f1-a56e-f2c03cea8fc0", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 122, 17 | "id": "67296ad3-172f-4aba-ace0-08d0051382a2", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "data = pd.read_csv(\"data.csv\", parse_dates=['Data'])" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 123, 27 | "id": "e60588c6-5d3f-4374-99f5-37517354f958", 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/html": [ 33 | "
\n", 34 | "\n", 47 | "\n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | "
DataCasaForaGols CasaGols ForaEdiçãoFase
02001-07-11EquadorChile1.04.02001Grupo A
12001-07-11ColombiaVenezuela2.00.02001Grupo A
22001-07-14ChileVenezuela1.00.02001Grupo A
32001-07-14ColombiaEquador1.00.02001Grupo A
42001-07-17EquadorVenezuela4.00.02001Grupo A
........................
2072021-06-27VenezuelaPeru0.01.02021Grupo B
2082021-07-02PeruParaguaiNaNNaN2021Quartas
2092021-07-02BrasilChileNaNNaN2021Quartas
2102021-07-03UruguaiColombiaNaNNaN2021Quartas
2112021-07-03ArgentinaEquadorNaNNaN2021Quartas
\n", 173 | "

212 rows × 7 columns

\n", 174 | "
" 175 | ], 176 | "text/plain": [ 177 | " Data Casa Fora Gols Casa Gols Fora Edição Fase\n", 178 | "0 2001-07-11 Equador Chile 1.0 4.0 2001 Grupo A\n", 179 | "1 2001-07-11 Colombia Venezuela 2.0 0.0 2001 Grupo A\n", 180 | "2 2001-07-14 Chile Venezuela 1.0 0.0 2001 Grupo A\n", 181 | "3 2001-07-14 Colombia Equador 1.0 0.0 2001 Grupo A\n", 182 | "4 2001-07-17 Equador Venezuela 4.0 0.0 2001 Grupo A\n", 183 | ".. ... ... ... ... ... ... ...\n", 184 | "207 2021-06-27 Venezuela Peru 0.0 1.0 2021 Grupo B\n", 185 | "208 2021-07-02 Peru Paraguai NaN NaN 2021 Quartas\n", 186 | "209 2021-07-02 Brasil Chile NaN NaN 2021 Quartas\n", 187 | "210 2021-07-03 Uruguai Colombia NaN NaN 2021 Quartas\n", 188 | "211 2021-07-03 Argentina Equador NaN NaN 2021 Quartas\n", 189 | "\n", 190 | "[212 rows x 7 columns]" 191 | ] 192 | }, 193 | "execution_count": 123, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "data" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 124, 205 | "id": "afd2bb8b-ad59-4c02-a5ee-dfcd9221c6c4", 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "resultados = list()\n", 210 | "for gc, gf in data[['Gols Casa', 'Gols Fora']].values:\n", 211 | " if gc > gf:\n", 212 | " resultados.append(1)\n", 213 | " elif gf > gc:\n", 214 | " resultados.append(-1)\n", 215 | " else:\n", 216 | " resultados.append(0)\n", 217 | "data['resultados'] = resultados" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 125, 223 | "id": "9fa91a5b-112a-438e-9e89-fd292db0dbc6", 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "ix = [\"Edição\",\"time\"]\n", 228 | "grupos = data[data['Fase'].str.contains(\"Grupo\")]\n", 229 | "newnames_casa = {\"Casa\": \"time\", -1: \"derrota\", 0:\"empate\", 1:\"vitoria\"}\n", 230 | "newnames_fora = {\"Fora\": \"time\", -1: \"vitoria\", 0:\"empate\", 1:\"derrota\"}\n", 231 | "grupos_casa = grupos.groupby([\"Edição\", \"Casa\", 'resultados']).size().unstack().fillna(0).reset_index().rename(columns=newnames_casa).set_index(ix)\n", 232 | "grupos_fora = grupos.groupby([\"Edição\", \"Fora\", 'resultados']).size().unstack().fillna(0).reset_index().rename(columns=newnames_fora).set_index(ix)\n", 233 | "partidas = grupos_casa.add(grupos_fora, fill_value = 0).reset_index()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 126, 239 | "id": "7854bfc0-573b-4713-8b50-85becb589f2a", 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "ix = [\"Edição\",\"time\"]\n", 244 | "newnames_gcasa = {\"Casa\": \"time\", 'Gols Casa': \"marcados\", 'Gols Fora': \"sofridos\"}\n", 245 | "gols_casa = grupos.groupby([\"Edição\", \"Casa\"])[['Gols Casa', 'Gols Fora']].sum().reset_index().rename(columns=newnames_gcasa).set_index(ix)\n", 246 | "newnames_gfora = {\"Fora\": \"time\", 'Gols Casa': \"sofridos\", 'Gols Fora': \"marcados\"}\n", 247 | "gols_fora = grupos.groupby([\"Edição\", \"Fora\"])[['Gols Casa', 'Gols Fora']].sum().reset_index().rename(columns=newnames_gfora).set_index(ix)\n", 248 | "gols = gols_casa.add(gols_fora, fill_value=0).reset_index()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 127, 254 | "id": "185a3874-ae62-4326-b3a7-770c5b47e5bc", 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "non_grupos = grupos = data[~data['Fase'].str.contains(\"Grupo\")][['Edição','Casa', 'Fora', 'resultados']]" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 128, 264 | "id": "6151044b-e214-4756-a2be-c2c0ff360404", 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "df = pd.merge(non_grupos, partidas, left_on=[\"Edição\",\"Casa\"], right_on=[\"Edição\",\"time\"], how='left')\n", 269 | "df = pd.merge(df, partidas, left_on=[\"Edição\",\"Fora\"], right_on=[\"Edição\",\"time\"], how='left', suffixes=[\"_casa\", \"_fora\"])\n", 270 | "df = pd.merge(df, gols, left_on=[\"Edição\",\"Casa\"], right_on=[\"Edição\",\"time\"], how='left')\n", 271 | "df = pd.merge(df, gols, left_on=[\"Edição\",\"Fora\"], right_on=[\"Edição\",\"time\"], how='left', suffixes=[\"_casa\", \"_fora\"])" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 129, 277 | "id": "1a00d613-7564-41cc-aff1-581cb00af81b", 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/html": [ 283 | "
\n", 284 | "\n", 297 | "\n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | "
EdiçãoCasaForaresultadostime_casaderrota_casaempate_casavitoria_casatime_foraderrota_foraempate_foravitoria_foratime_casamarcados_casasofridos_casatime_foramarcados_forasofridos_fora
02001ChileMexico-1Chile1.00.02.0Mexico1.01.01.0Chile5.03.0Mexico1.01.0
12001Costa RicaUruguai-1Costa Rica0.01.02.0Uruguai1.01.01.0Costa Rica6.01.0Uruguai2.02.0
22001ColombiaPeru1Colombia0.00.03.0Peru1.01.01.0Colombia5.00.0Peru4.05.0
32001HondurasBrasil1Honduras1.00.02.0Brasil1.00.02.0Honduras3.01.0Brasil5.02.0
42001MexicoUruguai1Mexico1.01.01.0Uruguai1.01.01.0Mexico1.01.0Uruguai2.02.0
\n", 429 | "
" 430 | ], 431 | "text/plain": [ 432 | " Edição Casa Fora resultados time_casa derrota_casa \\\n", 433 | "0 2001 Chile Mexico -1 Chile 1.0 \n", 434 | "1 2001 Costa Rica Uruguai -1 Costa Rica 0.0 \n", 435 | "2 2001 Colombia Peru 1 Colombia 0.0 \n", 436 | "3 2001 Honduras Brasil 1 Honduras 1.0 \n", 437 | "4 2001 Mexico Uruguai 1 Mexico 1.0 \n", 438 | "\n", 439 | " empate_casa vitoria_casa time_fora derrota_fora empate_fora \\\n", 440 | "0 0.0 2.0 Mexico 1.0 1.0 \n", 441 | "1 1.0 2.0 Uruguai 1.0 1.0 \n", 442 | "2 0.0 3.0 Peru 1.0 1.0 \n", 443 | "3 0.0 2.0 Brasil 1.0 0.0 \n", 444 | "4 1.0 1.0 Uruguai 1.0 1.0 \n", 445 | "\n", 446 | " vitoria_fora time_casa marcados_casa sofridos_casa time_fora \\\n", 447 | "0 1.0 Chile 5.0 3.0 Mexico \n", 448 | "1 1.0 Costa Rica 6.0 1.0 Uruguai \n", 449 | "2 1.0 Colombia 5.0 0.0 Peru \n", 450 | "3 2.0 Honduras 3.0 1.0 Brasil \n", 451 | "4 1.0 Mexico 1.0 1.0 Uruguai \n", 452 | "\n", 453 | " marcados_fora sofridos_fora \n", 454 | "0 1.0 1.0 \n", 455 | "1 2.0 2.0 \n", 456 | "2 4.0 5.0 \n", 457 | "3 5.0 2.0 \n", 458 | "4 2.0 2.0 " 459 | ] 460 | }, 461 | "execution_count": 129, 462 | "metadata": {}, 463 | "output_type": "execute_result" 464 | } 465 | ], 466 | "source": [ 467 | "df.head()" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 146, 473 | "id": "f540b9f1-4057-4721-81dc-649c80d47448", 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [ 477 | "\n", 478 | "from sklearn.linear_model import LogisticRegression\n", 479 | "from sklearn.ensemble import RandomForestClassifier\n", 480 | "from sklearn.metrics import log_loss, roc_auc_score, classification_report, f1_score" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 135, 486 | "id": "d9a516d4-c964-4ab2-aefe-86db91997fd2", 487 | "metadata": {}, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/plain": [ 492 | "0.45783730158730157" 493 | ] 494 | }, 495 | "execution_count": 135, 496 | "metadata": {}, 497 | "output_type": "execute_result" 498 | } 499 | ], 500 | "source": [ 501 | "0.45783730158730157" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 142, 507 | "id": "6bc669d6-deb1-4183-86c9-2254b7cf0ca4", 508 | "metadata": {}, 509 | "outputs": [ 510 | { 511 | "name": "stdout", 512 | "output_type": "stream", 513 | "text": [ 514 | "Ano: 2004 | LL: 2.9514717901829335 | AUC: 0.22916666666666666\n", 515 | "\n", 516 | "Ano: 2007 | LL: 1.1069725818244613 | AUC: 0.690873015873016\n", 517 | "\n", 518 | "Ano: 2011 | LL: 3.32783622818049 | AUC: 0.25555555555555554\n", 519 | "\n", 520 | "Ano: 2015 | LL: 1.3715442860035738 | AUC: 0.47142857142857136\n", 521 | "\n", 522 | "Ano: 2016 | LL: 1.2713562077495835 | AUC: 0.6111111111111112\n", 523 | "\n", 524 | "Ano: 2019 | LL: 1.4586912545073727 | AUC: 0.4888888888888889\n", 525 | "\n", 526 | "0.45783730158730157\n" 527 | ] 528 | }, 529 | { 530 | "name": "stderr", 531 | "output_type": "stream", 532 | "text": [ 533 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n", 534 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", 535 | "\n", 536 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n", 537 | " https://scikit-learn.org/stable/modules/preprocessing.html\n", 538 | "Please also refer to the documentation for alternative solver options:\n", 539 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", 540 | " n_iter_i = _check_optimize_result(\n", 541 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n", 542 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", 543 | "\n", 544 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n", 545 | " https://scikit-learn.org/stable/modules/preprocessing.html\n", 546 | "Please also refer to the documentation for alternative solver options:\n", 547 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", 548 | " n_iter_i = _check_optimize_result(\n" 549 | ] 550 | } 551 | ], 552 | "source": [ 553 | "auc = list()\n", 554 | "for e in [2004, 2007, 2011, 2015, 2016, 2019]:\n", 555 | " dftr = df[df['Edição'] < e]\n", 556 | " dfval = df[df['Edição'] == e]\n", 557 | " f = ['derrota_casa','empate_casa', 'vitoria_casa', 'derrota_fora',\n", 558 | " 'empate_fora', 'vitoria_fora', 'marcados_casa',\n", 559 | " 'sofridos_casa', 'marcados_fora', 'sofridos_fora']\n", 560 | " \n", 561 | " Xtr = dftr[f]\n", 562 | " ytr = dftr['resultados']\n", 563 | " Xval = dfval[f]\n", 564 | " yval = dfval['resultados']\n", 565 | " \n", 566 | " #print(dftr.shape)\n", 567 | " #print(dfval.shape)\n", 568 | " mdl = LogisticRegression(C=1, class_weight='balanced')\n", 569 | " #mdl = RandomForestClassifier(n_jobs=6, min_samples_leaf=2, n_estimators=100, random_state=0)\n", 570 | " mdl.fit(Xtr, ytr)\n", 571 | " \n", 572 | " \n", 573 | " p = mdl.predict_proba(Xval)\n", 574 | " p_ = mdl.predict(Xval)\n", 575 | " auc_ = roc_auc_score(yval,p,multi_class='ovr')\n", 576 | " f1 = \n", 577 | " print(\"Ano: {} | LL: {} | AUC: {}\".format(e, log_loss(yval, p), auc_))\n", 578 | " auc.append(auc_)\n", 579 | " #print(classification_report(yval, p_))\n", 580 | " print()\n", 581 | "print(np.mean(auc))\n", 582 | " " 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 143, 588 | "id": "8226b29f-ff12-443f-bbe9-1c8fea85fc36", 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/plain": [ 594 | "array([ 1, 0, 1, 0, 1, 0, -1, 1])" 595 | ] 596 | }, 597 | "execution_count": 143, 598 | "metadata": {}, 599 | "output_type": "execute_result" 600 | } 601 | ], 602 | "source": [ 603 | "p_" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 145, 609 | "id": "9e186daf-9613-4898-9b22-d84984203b8b", 610 | "metadata": {}, 611 | "outputs": [ 612 | { 613 | "data": { 614 | "text/html": [ 615 | "
\n", 616 | "\n", 629 | "\n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | "
EdiçãoCasaForaresultadostime_casaderrota_casaempate_casavitoria_casatime_foraderrota_foraempate_foravitoria_foratime_casamarcados_casasofridos_casatime_foramarcados_forasofridos_fora
482019BrasilParaguai0Brasil0.01.02.0Paraguai1.02.00.0Brasil8.00.0Paraguai3.04.0
492019VenezuelaArgentina-1Venezuela0.02.01.0Argentina1.01.01.0Venezuela3.01.0Argentina3.03.0
502019ColombiaChile0Colombia0.00.03.0Chile1.00.02.0Colombia4.00.0Chile6.02.0
512019UruguaiPeru0Uruguai0.01.02.0Peru1.01.01.0Uruguai7.02.0Peru3.06.0
522019BrasilArgentina1Brasil0.01.02.0Argentina1.01.01.0Brasil8.00.0Argentina3.03.0
532019ChilePeru-1Chile1.00.02.0Peru1.01.01.0Chile6.02.0Peru3.06.0
542019ArgentinaChile1Argentina1.01.01.0Chile1.00.02.0Argentina3.03.0Chile6.02.0
552019BrasilPeru1Brasil0.01.02.0Peru1.01.01.0Brasil8.00.0Peru3.06.0
\n", 824 | "
" 825 | ], 826 | "text/plain": [ 827 | " Edição Casa Fora resultados time_casa derrota_casa \\\n", 828 | "48 2019 Brasil Paraguai 0 Brasil 0.0 \n", 829 | "49 2019 Venezuela Argentina -1 Venezuela 0.0 \n", 830 | "50 2019 Colombia Chile 0 Colombia 0.0 \n", 831 | "51 2019 Uruguai Peru 0 Uruguai 0.0 \n", 832 | "52 2019 Brasil Argentina 1 Brasil 0.0 \n", 833 | "53 2019 Chile Peru -1 Chile 1.0 \n", 834 | "54 2019 Argentina Chile 1 Argentina 1.0 \n", 835 | "55 2019 Brasil Peru 1 Brasil 0.0 \n", 836 | "\n", 837 | " empate_casa vitoria_casa time_fora derrota_fora empate_fora \\\n", 838 | "48 1.0 2.0 Paraguai 1.0 2.0 \n", 839 | "49 2.0 1.0 Argentina 1.0 1.0 \n", 840 | "50 0.0 3.0 Chile 1.0 0.0 \n", 841 | "51 1.0 2.0 Peru 1.0 1.0 \n", 842 | "52 1.0 2.0 Argentina 1.0 1.0 \n", 843 | "53 0.0 2.0 Peru 1.0 1.0 \n", 844 | "54 1.0 1.0 Chile 1.0 0.0 \n", 845 | "55 1.0 2.0 Peru 1.0 1.0 \n", 846 | "\n", 847 | " vitoria_fora time_casa marcados_casa sofridos_casa time_fora \\\n", 848 | "48 0.0 Brasil 8.0 0.0 Paraguai \n", 849 | "49 1.0 Venezuela 3.0 1.0 Argentina \n", 850 | "50 2.0 Colombia 4.0 0.0 Chile \n", 851 | "51 1.0 Uruguai 7.0 2.0 Peru \n", 852 | "52 1.0 Brasil 8.0 0.0 Argentina \n", 853 | "53 1.0 Chile 6.0 2.0 Peru \n", 854 | "54 2.0 Argentina 3.0 3.0 Chile \n", 855 | "55 1.0 Brasil 8.0 0.0 Peru \n", 856 | "\n", 857 | " marcados_fora sofridos_fora \n", 858 | "48 3.0 4.0 \n", 859 | "49 3.0 3.0 \n", 860 | "50 6.0 2.0 \n", 861 | "51 3.0 6.0 \n", 862 | "52 3.0 3.0 \n", 863 | "53 3.0 6.0 \n", 864 | "54 6.0 2.0 \n", 865 | "55 3.0 6.0 " 866 | ] 867 | }, 868 | "execution_count": 145, 869 | "metadata": {}, 870 | "output_type": "execute_result" 871 | } 872 | ], 873 | "source": [ 874 | "dfval" 875 | ] 876 | }, 877 | { 878 | "cell_type": "code", 879 | "execution_count": null, 880 | "id": "90b55cff-aa80-4391-b47c-bb2c62e95346", 881 | "metadata": {}, 882 | "outputs": [], 883 | "source": [] 884 | } 885 | ], 886 | "metadata": { 887 | "kernelspec": { 888 | "display_name": "Python 3", 889 | "language": "python", 890 | "name": "python3" 891 | }, 892 | "language_info": { 893 | "codemirror_mode": { 894 | "name": "ipython", 895 | "version": 3 896 | }, 897 | "file_extension": ".py", 898 | "mimetype": "text/x-python", 899 | "name": "python", 900 | "nbconvert_exporter": "python", 901 | "pygments_lexer": "ipython3", 902 | "version": "3.8.5" 903 | } 904 | }, 905 | "nbformat": 4, 906 | "nbformat_minor": 5 907 | } 908 | -------------------------------------------------------------------------------- /copa_america/README: -------------------------------------------------------------------------------- 1 | Notebook para prever o campeão da copa América 2 | -------------------------------------------------------------------------------- /deploy_sql/README: -------------------------------------------------------------------------------- 1 | Scripts para o tutorial de deploy de um modelo de machine learning que precisa rodar todos os dias e colocar as previsões num banco de dados SQL 2 | 3 | Eu acabei deletando o notebook do modelo por acidente, por isso ele não está aqui 🤦‍♂️ 4 | -------------------------------------------------------------------------------- /deploy_sql/deploy_tip_model_.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from validator import schema 3 | import joblib 4 | import schedule 5 | 6 | def load_data(yesterday, today): 7 | data = pd.read_sql(f'SELECT * FROM yellow_tripdata WHERE tpep_pickup_datetime >= "{yesterday}" AND tpep_pickup_datetime < "{today}"', 'sqlite:///data.db') 8 | return data 9 | 10 | def create_features(data): 11 | data["fare_amount_per_person"] = data["fare_amount"] / (data["passenger_count"] + 1) 12 | return data 13 | 14 | def make_predictions(): 15 | #today = pd.to_datetime("today").strftime("%Y-%m-%d") 16 | #yesterday = pd.to_datetime("today") - pd.Timedelta(days=1) 17 | #yesterday = yesterday.strftime("%Y-%m-%d") 18 | today = "2022-01-05" 19 | yesterday = "2022-01-04" 20 | 21 | data = load_data(yesterday, today) 22 | data = create_features(data) 23 | 24 | validated_data = schema.validate(data) 25 | 26 | #print(data.columns) 27 | 28 | #print("validado", validated_data.columns) 29 | 30 | model = joblib.load("model.joblib") 31 | predictions = model.predict(validated_data) 32 | 33 | predictions_df = pd.DataFrame(predictions, columns=["prediction"], index=data["index"]) 34 | predictions_df.to_sql("predictions", "sqlite:///data.db", if_exists="append") 35 | 36 | 37 | if __name__ == '__main__': 38 | schedule.every().day.at("00:00").do(make_predictions) 39 | print("Rodando") 40 | while True: 41 | schedule.run_pending() -------------------------------------------------------------------------------- /deploy_sql/monitor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 19, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "date = \"2022-01-05\"\n", 19 | "data = pd.read_sql(f'SELECT * FROM yellow_tripdata WHERE tpep_pickup_datetime < \"{date}\"', 'sqlite:///data.db')\n", 20 | "preds = pd.read_sql(f'SELECT * FROM predictions', 'sqlite:///data.db')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 21, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | "
indexVendorIDtpep_pickup_datetimetpep_dropoff_datetimepassenger_counttrip_distanceRatecodeIDstore_and_fwd_flagPULocationIDDOLocationIDpayment_typefare_amountextramta_taxtip_amounttolls_amountimprovement_surchargetotal_amountcongestion_surchargeairport_fee
268862239855522022-01-04 23:47:272022-01-04 23:55:03NaN1.66NaNNone14350013.200.00.54.380.00.320.88NaNNaN
268863239855622022-01-04 23:44:582022-01-04 23:56:10NaN3.23NaNNone236164014.590.00.52.860.00.320.75NaNNaN
268864239855722022-01-04 23:07:072022-01-04 23:10:54NaN0.55NaNNone129129013.200.00.51.500.00.315.50NaNNaN
268865239855822022-01-04 23:43:002022-01-04 23:56:00NaN3.05NaNNone137143013.610.00.52.690.00.319.60NaNNaN
268866239855922022-01-04 23:34:002022-01-04 23:45:00NaN3.66NaNNone146236014.770.00.52.340.00.320.41NaNNaN
\n", 189 | "
" 190 | ], 191 | "text/plain": [ 192 | " index VendorID tpep_pickup_datetime tpep_dropoff_datetime \\\n", 193 | "268862 2398555 2 2022-01-04 23:47:27 2022-01-04 23:55:03 \n", 194 | "268863 2398556 2 2022-01-04 23:44:58 2022-01-04 23:56:10 \n", 195 | "268864 2398557 2 2022-01-04 23:07:07 2022-01-04 23:10:54 \n", 196 | "268865 2398558 2 2022-01-04 23:43:00 2022-01-04 23:56:00 \n", 197 | "268866 2398559 2 2022-01-04 23:34:00 2022-01-04 23:45:00 \n", 198 | "\n", 199 | " passenger_count trip_distance RatecodeID store_and_fwd_flag \\\n", 200 | "268862 NaN 1.66 NaN None \n", 201 | "268863 NaN 3.23 NaN None \n", 202 | "268864 NaN 0.55 NaN None \n", 203 | "268865 NaN 3.05 NaN None \n", 204 | "268866 NaN 3.66 NaN None \n", 205 | "\n", 206 | " PULocationID DOLocationID payment_type fare_amount extra mta_tax \\\n", 207 | "268862 143 50 0 13.20 0.0 0.5 \n", 208 | "268863 236 164 0 14.59 0.0 0.5 \n", 209 | "268864 129 129 0 13.20 0.0 0.5 \n", 210 | "268865 137 143 0 13.61 0.0 0.5 \n", 211 | "268866 146 236 0 14.77 0.0 0.5 \n", 212 | "\n", 213 | " tip_amount tolls_amount improvement_surcharge total_amount \\\n", 214 | "268862 4.38 0.0 0.3 20.88 \n", 215 | "268863 2.86 0.0 0.3 20.75 \n", 216 | "268864 1.50 0.0 0.3 15.50 \n", 217 | "268865 2.69 0.0 0.3 19.60 \n", 218 | "268866 2.34 0.0 0.3 20.41 \n", 219 | "\n", 220 | " congestion_surcharge airport_fee \n", 221 | "268862 NaN NaN \n", 222 | "268863 NaN NaN \n", 223 | "268864 NaN NaN \n", 224 | "268865 NaN NaN \n", 225 | "268866 NaN NaN " 226 | ] 227 | }, 228 | "execution_count": 21, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "data.tail()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 22, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/html": [ 245 | "
\n", 246 | "\n", 259 | "\n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | "
indexprediction
14696223985552.694889
14696323985563.326200
14696423985574.061624
14696523985583.535200
14696623985592.957333
\n", 295 | "
" 296 | ], 297 | "text/plain": [ 298 | " index prediction\n", 299 | "146962 2398555 2.694889\n", 300 | "146963 2398556 3.326200\n", 301 | "146964 2398557 4.061624\n", 302 | "146965 2398558 3.535200\n", 303 | "146966 2398559 2.957333" 304 | ] 305 | }, 306 | "execution_count": 22, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "preds.tail()" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 25, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "r = preds.merge(data[[\"index\", \"tip_amount\"]], on=\"index\", how=\"left\")" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 26, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "data": { 331 | "text/html": [ 332 | "
\n", 333 | "\n", 346 | "\n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | "
indexpredictiontip_amount
0971751.9888004.21
11015324.1351737.41
21148023.5784705.55
31170624.9045528.62
41171450.8348590.00
\n", 388 | "
" 389 | ], 390 | "text/plain": [ 391 | " index prediction tip_amount\n", 392 | "0 97175 1.988800 4.21\n", 393 | "1 101532 4.135173 7.41\n", 394 | "2 114802 3.578470 5.55\n", 395 | "3 117062 4.904552 8.62\n", 396 | "4 117145 0.834859 0.00" 397 | ] 398 | }, 399 | "execution_count": 26, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "r.head()" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 27, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "2.728058382025522" 417 | ] 418 | }, 419 | "execution_count": 27, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "from sklearn.metrics import mean_squared_error\n", 426 | "mean_squared_error(r[\"tip_amount\"], r[\"prediction\"], squared=False) " 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [] 435 | } 436 | ], 437 | "metadata": { 438 | "kernelspec": { 439 | "display_name": "Python 3.9.7 ('base': conda)", 440 | "language": "python", 441 | "name": "python3" 442 | }, 443 | "language_info": { 444 | "codemirror_mode": { 445 | "name": "ipython", 446 | "version": 3 447 | }, 448 | "file_extension": ".py", 449 | "mimetype": "text/x-python", 450 | "name": "python", 451 | "nbconvert_exporter": "python", 452 | "pygments_lexer": "ipython3", 453 | "version": "3.9.7" 454 | }, 455 | "orig_nbformat": 4, 456 | "vscode": { 457 | "interpreter": { 458 | "hash": "7a2c4b191d1ae843dde5cb5f4d1f62fa892f6b79b0f9392a84691e890e33c5a4" 459 | } 460 | } 461 | }, 462 | "nbformat": 4, 463 | "nbformat_minor": 2 464 | } 465 | -------------------------------------------------------------------------------- /deploy_sql/validator.py: -------------------------------------------------------------------------------- 1 | from pandera import DataFrameSchema, Column, Check, Index, MultiIndex 2 | import pandera 3 | 4 | schema = DataFrameSchema( 5 | columns={ 6 | "passenger_count": Column( 7 | dtype=pandera.engines.numpy_engine.Float64, 8 | checks=[Check(lambda s: s >= 0), 9 | Check(lambda s: s <= 10)], 10 | nullable=True, 11 | unique=False, 12 | coerce=False, 13 | required=True, 14 | regex=False, 15 | ), 16 | "trip_distance": Column( 17 | dtype=pandera.engines.numpy_engine.Float64, 18 | checks=[Check(lambda s: s >= 0)], 19 | nullable=False, 20 | unique=False, 21 | coerce=False, 22 | required=True, 23 | regex=False, 24 | ), 25 | "fare_amount": Column( 26 | dtype=pandera.engines.numpy_engine.Float64, 27 | checks=None, 28 | nullable=False, 29 | unique=False, 30 | coerce=False, 31 | required=True, 32 | regex=False, 33 | ), 34 | "fare_amount_per_person": Column( 35 | dtype=pandera.engines.numpy_engine.Float64, 36 | checks=None, 37 | nullable=True, 38 | unique=False, 39 | coerce=False, 40 | required=True, 41 | regex=False, 42 | ), 43 | }, 44 | index=Index( 45 | dtype=pandera.engines.numpy_engine.Int64, 46 | checks=None, 47 | nullable=False, 48 | coerce=False, 49 | name=None, 50 | ), 51 | coerce=False, 52 | strict="filter", 53 | name=None, 54 | ) 55 | -------------------------------------------------------------------------------- /euro2021/0_clean_data_merged_v1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "id": "16f7a6a2-2588-45e5-a7c7-cd61c2699a42", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 15, 17 | "id": "afe8030f-53f3-4d90-8eea-eb3211bd8294", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "data = pd.read_csv(\"data/Uefa Euro Cup All Matches.csv\")" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 16, 27 | "id": "9a825ea6-8f5c-4559-b49d-d767f843dcbb", 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/html": [ 33 | "
\n", 34 | "\n", 47 | "\n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | "
DateTimeHomeTeamNameAwayTeamNameHomeTeamGoalsAwayTeamGoalsStageSpecialWinConditionsStadiumCityAttendanceYear
06 July 1960 (1960-07-06)20:00FranceYugoslavia45Semi-finalsNaNParc des PrincesParis263701960
\n", 83 | "
" 84 | ], 85 | "text/plain": [ 86 | " Date Time HomeTeamName AwayTeamName HomeTeamGoals \\\n", 87 | "0 6 July 1960 (1960-07-06) 20:00 France   Yugoslavia 4 \n", 88 | "\n", 89 | " AwayTeamGoals Stage SpecialWinConditions Stadium City \\\n", 90 | "0 5 Semi-finals NaN Parc des Princes Paris \n", 91 | "\n", 92 | " Attendance Year \n", 93 | "0 26370 1960 " 94 | ] 95 | }, 96 | "execution_count": 16, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "data.head(1)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "id": "2769e675-0e20-46f0-8af4-d8e4c5afa41a", 108 | "metadata": {}, 109 | "source": [ 110 | "Date (with Time), Home Team, Away Team, HGoals, AGoals" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 17, 116 | "id": "d55071fa-0460-431e-919f-30a1a109debd", 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "# https://regexr.com/\n", 121 | "# https://strftime.org/\n", 122 | "# https://www.kaggle.com/c/ncaam-march-mania-2021" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 18, 128 | "id": "0c0d0044-346b-48de-9c50-b16d322ca84f", 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "0 1960-07-06 20:00\n", 135 | "1 1960-07-06 21:30\n", 136 | "2 1960-07-09 21:30\n", 137 | "3 1960-07-10 21:30\n", 138 | "4 1964-06-17 20:00\n", 139 | " ... \n", 140 | "281 2016-07-02 21:00\n", 141 | "282 2016-07-03 21:00\n", 142 | "283 2016-07-06 21:00\n", 143 | "284 2016-07-07 21:00\n", 144 | "285 2016-07-10 21:00\n", 145 | "Name: 0, Length: 286, dtype: object" 146 | ] 147 | }, 148 | "execution_count": 18, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "captured_date = (data['Date'].str.extract(r\"\\((.*)\\)\").squeeze() + \" \" \n", 155 | " + data['Time'].str.extract(r\"(\\d+\\:\\d+)\").squeeze())\n", 156 | "captured_date" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 19, 162 | "id": "f7ccdc43-a40b-4ef3-ac10-c5360693749d", 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/html": [ 168 | "
\n", 169 | "\n", 182 | "\n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | "
date
01960-07-06 20:00:00
11960-07-06 21:30:00
21960-07-09 21:30:00
31960-07-10 21:30:00
41964-06-17 20:00:00
\n", 212 | "
" 213 | ], 214 | "text/plain": [ 215 | " date\n", 216 | "0 1960-07-06 20:00:00\n", 217 | "1 1960-07-06 21:30:00\n", 218 | "2 1960-07-09 21:30:00\n", 219 | "3 1960-07-10 21:30:00\n", 220 | "4 1964-06-17 20:00:00" 221 | ] 222 | }, 223 | "execution_count": 19, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "captured_date = pd.to_datetime(captured_date, format=\"%Y-%m-%d %H:%M\")\n", 230 | "clean_data = pd.DataFrame({\"date\": captured_date})\n", 231 | "clean_data.head()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 20, 237 | "id": "c73c52fd-6984-4a88-83a3-c78fa03b9a98", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "match_data = data[['HomeTeamName', 'AwayTeamName', 'HomeTeamGoals', 'AwayTeamGoals']]\n", 242 | "clean_data_merged = pd.concat([clean_data, match_data], axis=1)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 21, 248 | "id": "a2e8ce15-7807-406d-bdee-042900c6a534", 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/html": [ 254 | "
\n", 255 | "\n", 268 | "\n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | "
dateHomeTeamNameAwayTeamNameHomeTeamGoalsAwayTeamGoals
01960-07-06 20:00:00FranceYugoslavia45
11960-07-06 21:30:00CzechoslovakiaSoviet Union03
21960-07-09 21:30:00CzechoslovakiaFrance20
31960-07-10 21:30:00Soviet UnionYugoslavia21
41964-06-17 20:00:00SpainHungary21
..................
2812016-07-02 21:00:00GermanyItaly11
2822016-07-03 21:00:00FranceIceland52
2832016-07-06 21:00:00PortugalWales20
2842016-07-07 21:00:00GermanyFrance02
2852016-07-10 21:00:00PortugalFrance10
\n", 370 | "

286 rows × 5 columns

\n", 371 | "
" 372 | ], 373 | "text/plain": [ 374 | " date HomeTeamName AwayTeamName HomeTeamGoals \\\n", 375 | "0 1960-07-06 20:00:00 France   Yugoslavia 4 \n", 376 | "1 1960-07-06 21:30:00 Czechoslovakia   Soviet Union 0 \n", 377 | "2 1960-07-09 21:30:00 Czechoslovakia   France 2 \n", 378 | "3 1960-07-10 21:30:00 Soviet Union   Yugoslavia 2 \n", 379 | "4 1964-06-17 20:00:00 Spain   Hungary 2 \n", 380 | ".. ... ... ... ... \n", 381 | "281 2016-07-02 21:00:00 Germany   Italy 1 \n", 382 | "282 2016-07-03 21:00:00 France   Iceland 5 \n", 383 | "283 2016-07-06 21:00:00 Portugal   Wales 2 \n", 384 | "284 2016-07-07 21:00:00 Germany   France 0 \n", 385 | "285 2016-07-10 21:00:00 Portugal   France 1 \n", 386 | "\n", 387 | " AwayTeamGoals \n", 388 | "0 5 \n", 389 | "1 3 \n", 390 | "2 0 \n", 391 | "3 1 \n", 392 | "4 1 \n", 393 | ".. ... \n", 394 | "281 1 \n", 395 | "282 2 \n", 396 | "283 0 \n", 397 | "284 2 \n", 398 | "285 0 \n", 399 | "\n", 400 | "[286 rows x 5 columns]" 401 | ] 402 | }, 403 | "execution_count": 21, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "clean_data_merged" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 22, 415 | "id": "5ab062a3-705e-4e4f-a582-9e6307e49985", 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "def decide_y(home_goals, away_goals):\n", 420 | " if home_goals > away_goals:\n", 421 | " return 1\n", 422 | " elif home_goals < away_goals:\n", 423 | " return -1\n", 424 | " else:\n", 425 | " return 0\n", 426 | " \n", 427 | "labels = np.zeros(clean_data_merged.shape[0])\n", 428 | "for i, (home_goals, away_goals) in enumerate(clean_data_merged[['HomeTeamGoals', 'AwayTeamGoals']].values): \n", 429 | " if home_goals > away_goals:\n", 430 | " labels[i] = 1\n", 431 | " elif home_goals < away_goals:\n", 432 | " labels[i] = -1\n", 433 | " else:\n", 434 | " labels[i] = 0\n", 435 | "clean_data_merged['y_classif'] = labels" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 23, 441 | "id": "9d4eaf30-8a8f-43d7-8dc8-0fe0a031647a", 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [ 445 | "clean_data_merged['y_reg'] = clean_data_merged['HomeTeamGoals'] - clean_data_merged['AwayTeamGoals']" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 24, 451 | "id": "dea8fd0b-9247-412a-bb62-0b884058d266", 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/html": [ 457 | "
\n", 458 | "\n", 471 | "\n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | "
dateHomeTeamNameAwayTeamNameHomeTeamGoalsAwayTeamGoalsy_classify_reg
01960-07-06 20:00:00FranceYugoslavia45-1.0-1
11960-07-06 21:30:00CzechoslovakiaSoviet Union03-1.0-3
21960-07-09 21:30:00CzechoslovakiaFrance201.02
31960-07-10 21:30:00Soviet UnionYugoslavia211.01
41964-06-17 20:00:00SpainHungary211.01
........................
2812016-07-02 21:00:00GermanyItaly110.00
2822016-07-03 21:00:00FranceIceland521.03
2832016-07-06 21:00:00PortugalWales201.02
2842016-07-07 21:00:00GermanyFrance02-1.0-2
2852016-07-10 21:00:00PortugalFrance101.01
\n", 597 | "

286 rows × 7 columns

\n", 598 | "
" 599 | ], 600 | "text/plain": [ 601 | " date HomeTeamName AwayTeamName HomeTeamGoals \\\n", 602 | "0 1960-07-06 20:00:00 France   Yugoslavia 4 \n", 603 | "1 1960-07-06 21:30:00 Czechoslovakia   Soviet Union 0 \n", 604 | "2 1960-07-09 21:30:00 Czechoslovakia   France 2 \n", 605 | "3 1960-07-10 21:30:00 Soviet Union   Yugoslavia 2 \n", 606 | "4 1964-06-17 20:00:00 Spain   Hungary 2 \n", 607 | ".. ... ... ... ... \n", 608 | "281 2016-07-02 21:00:00 Germany   Italy 1 \n", 609 | "282 2016-07-03 21:00:00 France   Iceland 5 \n", 610 | "283 2016-07-06 21:00:00 Portugal   Wales 2 \n", 611 | "284 2016-07-07 21:00:00 Germany   France 0 \n", 612 | "285 2016-07-10 21:00:00 Portugal   France 1 \n", 613 | "\n", 614 | " AwayTeamGoals y_classif y_reg \n", 615 | "0 5 -1.0 -1 \n", 616 | "1 3 -1.0 -3 \n", 617 | "2 0 1.0 2 \n", 618 | "3 1 1.0 1 \n", 619 | "4 1 1.0 1 \n", 620 | ".. ... ... ... \n", 621 | "281 1 0.0 0 \n", 622 | "282 2 1.0 3 \n", 623 | "283 0 1.0 2 \n", 624 | "284 2 -1.0 -2 \n", 625 | "285 0 1.0 1 \n", 626 | "\n", 627 | "[286 rows x 7 columns]" 628 | ] 629 | }, 630 | "execution_count": 24, 631 | "metadata": {}, 632 | "output_type": "execute_result" 633 | } 634 | ], 635 | "source": [ 636 | "clean_data_merged" 637 | ] 638 | }, 639 | { 640 | "cell_type": "markdown", 641 | "id": "e641c09b-002e-4483-be91-1722f0856aaf", 642 | "metadata": {}, 643 | "source": [ 644 | "p > 1 - time da casa ganhou \n", 645 | "-1 < p < 1 - empate \n", 646 | "p < -1 - time de fora ganhou" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 26, 652 | "id": "75e2c27c-447c-4cad-a5d6-0b64f60aa10d", 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [ 656 | "clean_data_merged.to_parquet(\"./data/clean_data_merged_v1.parquet\")" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "id": "1dbc1bda-8f00-4486-b4f4-2e56302925d1", 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [] 666 | } 667 | ], 668 | "metadata": { 669 | "kernelspec": { 670 | "display_name": "Python 3", 671 | "language": "python", 672 | "name": "python3" 673 | }, 674 | "language_info": { 675 | "codemirror_mode": { 676 | "name": "ipython", 677 | "version": 3 678 | }, 679 | "file_extension": ".py", 680 | "mimetype": "text/x-python", 681 | "name": "python", 682 | "nbconvert_exporter": "python", 683 | "pygments_lexer": "ipython3", 684 | "version": "3.8.5" 685 | } 686 | }, 687 | "nbformat": 4, 688 | "nbformat_minor": 5 689 | } 690 | -------------------------------------------------------------------------------- /euro2021/1_baselines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "16f7a6a2-2588-45e5-a7c7-cd61c2699a42", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "id": "afe8030f-53f3-4d90-8eea-eb3211bd8294", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "data = pd.read_parquet(\"./data/clean_data_merged_v1.parquet\")\n", 22 | "data['HomeTeamName'] = data['HomeTeamName'].str.strip()\n", 23 | "data['AwayTeamName'] = data['AwayTeamName'].str.strip()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "id": "9a825ea6-8f5c-4559-b49d-d767f843dcbb", 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/html": [ 35 | "
\n", 36 | "\n", 49 | "\n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | "
dateHomeTeamNameAwayTeamNameHomeTeamGoalsAwayTeamGoalsy_classify_reg
01960-07-06 20:00:00FranceYugoslavia45-1.0-1
11960-07-06 21:30:00CzechoslovakiaSoviet Union03-1.0-3
21960-07-09 21:30:00CzechoslovakiaFrance201.02
31960-07-10 21:30:00Soviet UnionYugoslavia211.01
41964-06-17 20:00:00SpainHungary211.01
\n", 115 | "
" 116 | ], 117 | "text/plain": [ 118 | " date HomeTeamName AwayTeamName HomeTeamGoals \\\n", 119 | "0 1960-07-06 20:00:00 France Yugoslavia 4 \n", 120 | "1 1960-07-06 21:30:00 Czechoslovakia Soviet Union 0 \n", 121 | "2 1960-07-09 21:30:00 Czechoslovakia France 2 \n", 122 | "3 1960-07-10 21:30:00 Soviet Union Yugoslavia 2 \n", 123 | "4 1964-06-17 20:00:00 Spain Hungary 2 \n", 124 | "\n", 125 | " AwayTeamGoals y_classif y_reg \n", 126 | "0 5 -1.0 -1 \n", 127 | "1 3 -1.0 -3 \n", 128 | "2 0 1.0 2 \n", 129 | "3 1 1.0 1 \n", 130 | "4 1 1.0 1 " 131 | ] 132 | }, 133 | "execution_count": 3, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "data.head(5)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "id": "2bf5eb6b-4696-4a1f-95af-824004e1a7d2", 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "def baseline_wins(df):\n", 150 | " home = df.groupby(\"HomeTeamName\")['y_classif'].mean()\n", 151 | " away = df.groupby(\"AwayTeamName\")['y_classif'].mean() * -1\n", 152 | " \n", 153 | " return 0.5*home + 0.5*away" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 5, 159 | "id": "926b213b-3715-4992-b114-bd714de33594", 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "from sklearn.metrics import classification_report, roc_auc_score" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 9, 169 | "id": "75e2c27c-447c-4cad-a5d6-0b64f60aa10d", 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "2004 Baseline Historical wins 0.25806451612903225\n", 177 | "2004 Baseline Home Win 0.3548387096774194\n", 178 | " precision recall f1-score support\n", 179 | "\n", 180 | " -1.0 0.19 0.30 0.23 10\n", 181 | " 0.0 0.00 0.00 0.00 10\n", 182 | " 1.0 0.33 0.45 0.38 11\n", 183 | "\n", 184 | " accuracy 0.26 31\n", 185 | " macro avg 0.17 0.25 0.21 31\n", 186 | "weighted avg 0.18 0.26 0.21 31\n", 187 | "\n" 188 | ] 189 | }, 190 | { 191 | "name": "stderr", 192 | "output_type": "stream", 193 | "text": [ 194 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", 195 | " _warn_prf(average, modifier, msg_start, len(result))\n", 196 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", 197 | " _warn_prf(average, modifier, msg_start, len(result))\n", 198 | "/Users/mariofilho/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1245: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", 199 | " _warn_prf(average, modifier, msg_start, len(result))\n" 200 | ] 201 | }, 202 | { 203 | "ename": "AxisError", 204 | "evalue": "axis 1 is out of bounds for array of dimension 1", 205 | "output_type": "error", 206 | "traceback": [ 207 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 208 | "\u001b[0;31mAxisError\u001b[0m Traceback (most recent call last)", 209 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;31m#print(classification_report(val['y_classif'], np.ones(val.shape[0])))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0myear\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"AUC Baseline Historical wins\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mroc_auc_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'y_classif'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'p'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmulti_class\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ovr'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 210 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 211 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_ranking.py\u001b[0m in \u001b[0;36mroc_auc_score\u001b[0;34m(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmulti_class\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'raise'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"multi_class must be in ('ovo', 'ovr')\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 537\u001b[0;31m return _multiclass_roc_auc_score(y_true, y_score, labels,\n\u001b[0m\u001b[1;32m 538\u001b[0m multi_class, average, sample_weight)\n\u001b[1;32m 539\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0my_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"binary\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 212 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/sklearn/metrics/_ranking.py\u001b[0m in \u001b[0;36m_multiclass_roc_auc_score\u001b[0;34m(y_true, y_score, labels, multi_class, average, sample_weight)\u001b[0m\n\u001b[1;32m 593\u001b[0m \"\"\"\n\u001b[1;32m 594\u001b[0m \u001b[0;31m# validation of the input y_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 595\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mallclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_score\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 596\u001b[0m raise ValueError(\n\u001b[1;32m 597\u001b[0m \u001b[0;34m\"Target scores need to be probabilities for multiclass \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 213 | "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/numpy/core/_methods.py\u001b[0m in \u001b[0;36m_sum\u001b[0;34m(a, axis, dtype, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 45\u001b[0m def _sum(a, axis=None, dtype=None, out=None, keepdims=False,\n\u001b[1;32m 46\u001b[0m initial=_NoValue, where=True):\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mumr_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdims\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minitial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwhere\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m def _prod(a, axis=None, dtype=None, out=None, keepdims=False,\n", 214 | "\u001b[0;31mAxisError\u001b[0m: axis 1 is out of bounds for array of dimension 1" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "for year in [2004, 2008, 2012, 2016]:\n", 220 | " tr = data[data['date'].dt.year < year].copy()\n", 221 | " val = data[data['date'].dt.year == year].copy()\n", 222 | " #print(year, tr.shape, val.shape)\n", 223 | " \n", 224 | " baseline = baseline_wins(tr)\n", 225 | " val['p'] = (val['HomeTeamName'].map(baseline) > val['AwayTeamName'].map(baseline)).astype(int)\n", 226 | " val['p'] = val['p'].map(lambda x: -1 if x == 0 else 1 )\n", 227 | " \n", 228 | " print(year,\"Baseline Historical wins\", (val['y_classif'] == val['p']).mean())\n", 229 | " print(year, \"Baseline Home Win\", (val['y_classif'] == np.ones(val.shape[0])).mean())\n", 230 | " print(classification_report(val['y_classif'], val['p']))\n", 231 | " #print(classification_report(val['y_classif'], np.ones(val.shape[0])))\n", 232 | " \n", 233 | " print(year,\"AUC Baseline Historical wins\", roc_auc_score(val['y_classif'], val['p'], multi_class='ovr'))\n", 234 | " print()\n", 235 | " \n", 236 | " \n", 237 | " " 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "id": "53acd66c-03cf-45d3-90ed-2020a4fbab01", 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": "Python 3", 252 | "language": "python", 253 | "name": "python3" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.8.5" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 5 270 | } 271 | -------------------------------------------------------------------------------- /euro2021/README: -------------------------------------------------------------------------------- 1 | Material para os vídeos da Playlist: https://www.youtube.com/watch?v=5cI9YLfl5pA&list=PLwnip85KhroXqla3GfqVxDHzFBF3xLcAT 2 | 3 | Machine Learning na UEFA Euro 2021 4 | -------------------------------------------------------------------------------- /live11_sucesso_musical/README: -------------------------------------------------------------------------------- 1 | Revelando os Segredos do Sucesso de uma Música com Machine Learning - Live de Data Science #11 2 | https://youtu.be/6UBReBQZGmo 3 | -------------------------------------------------------------------------------- /live12_timeseries_prophet/README: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /live14_deploy/README: -------------------------------------------------------------------------------- 1 | Arquivos da live 14 - Como Colocar um Modelo de Machine Learning em Produção 2 | https://youtu.be/1hdZ0AVbQcw 3 | 4 | 5 | Dataset: https://www.kaggle.com/rogeriochaves/clickbait-buzzfeed-brasil 6 | -------------------------------------------------------------------------------- /live14_deploy/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request 2 | import joblib as jb 3 | import json 4 | 5 | app = Flask(__name__) 6 | 7 | mdl = jb.load("mdl.pkl.z") 8 | 9 | @app.route("/") # decorator 10 | def main(): 11 | 12 | print(request.args) 13 | 14 | title = request.args.get("titulo", default='') 15 | res = {"titulo": title, "p": mdl.predict_proba([title])[0][1]} 16 | return json.dumps(res) 17 | 18 | if __name__ == "__main__": 19 | app.run() 20 | 21 | 22 | #https://gunicorn.org/#quickstart -------------------------------------------------------------------------------- /live14_deploy/mdl.pkl.z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ledmaster/notebooks_tutoriais/616c87c44f79bda5e1b0a2d33a2d5f7c31b86a08/live14_deploy/mdl.pkl.z -------------------------------------------------------------------------------- /live15_covid/Countries Longitude and Latitude.csv: -------------------------------------------------------------------------------- 1 | ,longitude,latitude,name 2 | 0,33.791638,-84.389488, 3 | 1,33.791638,-84.389488, 4 | 2,33.93911,67.709953,Afghanistan 5 | 3,41.153332,20.168331,Albania 6 | 4,28.033886,1.659626,Algeria 7 | 5,-14.270972,-170.132217,American Samoa 8 | 6,42.506285,1.521801,Andorra 9 | 7,-11.202692,17.873887,Angola 10 | 8,18.220554,-63.06861499999999,Anguilla 11 | 9,-82.862752,135.0,Antarctica 12 | 10,17.060816,-61.796428,Antigua & Barbuda 13 | 11,-38.416097,-63.61667199999999,Argentina 14 | 12,40.069099,45.038189,Armenia 15 | 13,12.52111,-69.968338,Aruba 16 | 14,-25.274398,133.775136,Australia 17 | 15,47.516231,14.550072,Austria 18 | 16,40.143105,47.576927,Azerbaijan 19 | 17,25.03428,-77.39627999999999,Bahamas 20 | 18,26.0667,50.5577,Bahrain 21 | 19,23.684994,90.356331,Bangladesh 22 | 20,13.193887,-59.543198,Barbados 23 | 21,53.709807,27.953389,Belarus 24 | 22,50.503887,4.469936,Belgium 25 | 23,17.189877,-88.49765,Belize 26 | 24,9.30769,2.315834,Benin 27 | 25,32.3078,-64.7505,Bermuda 28 | 26,27.514162,90.433601,Bhutan 29 | 27,-16.290154,-63.58865299999999,Bolivia 30 | 28,43.915886,17.679076,Bosnia 31 | 29,-22.328474,24.684866,Botswana 32 | 30,-54.4207915,3.3464497,Bouvet Island 33 | 31,-14.235004,-51.92528,Brazil 34 | 32,-6.343194,71.876519,British Indian Ocean Territory 35 | 33,18.420695,-64.639968,British Virgin Islands 36 | 34,4.535277,114.727669,Brunei 37 | 35,42.733883,25.48583,Bulgaria 38 | 36,12.238333,-1.561593,Burkina Faso 39 | 37,-3.373056,29.918886,Burundi 40 | 38,12.565679,104.990963,Cambodia 41 | 39,7.369721999999999,12.354722,Cameroon 42 | 40,56.130366,-106.346771,Canada 43 | 41,16.5388,-23.0418,Cape Verde 44 | 42,12.1783611,-68.2385339,Caribbean Netherlands 45 | 43,19.3133,-81.2546,Cayman Islands 46 | 44,6.611110999999999,20.939444,Central African Republic 47 | 45,15.454166,18.732207,Chad 48 | 46,-35.675147,-71.542969,Chile 49 | 47,35.86166,104.195397,China 50 | 48,-10.447525,105.690449,Christmas Island 51 | 49,-12.164165,96.87095599999999,Cocos (Keeling) Islands 52 | 50,4.570868,-74.297333,Colombia 53 | 51,-11.6455,43.3333,Comoros 54 | 52,-0.228021,15.827659,Congo - Brazzaville 55 | 53,-4.038333,21.758664,Congo - Kinshasa 56 | 54,-21.236736,-159.777671,Cook Islands 57 | 55,9.748916999999999,-83.753428,Costa Rica 58 | 56,45.1,15.2000001,Croatia 59 | 57,21.521757,-77.781167,Cuba 60 | 58,12.16957,-68.99002,Curaçao 61 | 59,35.126413,33.429859,Cyprus 62 | 60,49.81749199999999,15.472962,Czech Republic 63 | 61,7.539988999999999,-5.547079999999999,Côte d’Ivoire 64 | 62,56.26392,9.501785,Denmark 65 | 63,11.825138,42.590275,Djibouti 66 | 64,15.414999,-61.37097600000001,Dominica 67 | 65,18.735693,-70.162651,Dominican Republic 68 | 66,-1.831239,-78.18340599999999,Ecuador 69 | 67,26.820553,30.802498,Egypt 70 | 68,13.794185,-88.89653,El Salvador 71 | 69,1.650801,10.267895,Equatorial Guinea 72 | 70,15.179384,39.782334,Eritrea 73 | 71,58.595272,25.0136071,Estonia 74 | 72,9.145000000000001,40.489673,Ethiopia 75 | 73,-51.796253,-59.523613,Falkland Islands 76 | 74,61.89263500000001,-6.9118061,Faroe Islands 77 | 75,-17.713371,178.065032,Fiji 78 | 76,61.92410999999999,25.7481511,Finland 79 | 77,46.227638,2.213749,France 80 | 78,3.933889,-53.125782,French Guiana 81 | 79,-17.679742,-149.406843,French Polynesia 82 | 80,-49.280366,69.3485571,French Southern Territories 83 | 81,-0.803689,11.609444,Gabon 84 | 82,13.443182,-15.310139,Gambia 85 | 83,32.1656221,-82.9000751,Georgia 86 | 84,51.165691,10.451526,Germany 87 | 85,7.946527,-1.023194,Ghana 88 | 86,36.140751,-5.353585,Gibraltar 89 | 87,39.074208,21.824312,Greece 90 | 88,71.706936,-42.604303,Greenland 91 | 89,12.1165,-61.67899999999999,Grenada 92 | 90,16.265,-61.55099999999999,Guadeloupe 93 | 91,13.444304,144.793731,Guam 94 | 92,15.783471,-90.23075899999999,Guatemala 95 | 93,49.465691,-2.585278,Guernsey 96 | 94,9.945587,-9.696645,Guinea 97 | 95,11.803749,-15.180413,Guinea-Bissau 98 | 96,4.860416,-58.93018,Guyana 99 | 97,18.971187,-72.285215,Haiti 100 | 98,-53.08181,73.50415799999999,Heard & McDonald Islands 101 | 99,15.199999,-86.241905,Honduras 102 | 100,22.396428,114.109497,Hong Kong 103 | 101,47.162494,19.5033041,Hungary 104 | 102,64.963051,-19.020835,Iceland 105 | 103,20.593684,78.96288,India 106 | 104,-0.789275,113.921327,Indonesia 107 | 105,32.427908,53.688046,Iran 108 | 106,33.223191,43.679291,Iraq 109 | 107,53.1423672,-7.692053599999999,Ireland 110 | 108,54.236107,-4.548056,Isle of Man 111 | 109,31.046051,34.851612,Israel 112 | 110,41.87194,12.56738,Italy 113 | 111,18.109581,-77.297508,Jamaica 114 | 112,36.204824,138.252924,Japan 115 | 113,49.214439,-2.13125,Jersey 116 | 114,30.585164,36.238414,Jordan 117 | 115,48.019573,66.923684,Kazakhstan 118 | 116,-0.023559,37.906193,Kenya 119 | 117,-3.370417,-168.734039,Kiribati 120 | 118,29.31166,47.481766,Kuwait 121 | 119,41.20438,74.766098,Kyrgyzstan 122 | 120,19.85627,102.495496,Laos 123 | 121,56.879635,24.603189,Latvia 124 | 122,33.854721,35.862285,Lebanon 125 | 123,-29.609988,28.233608,Lesotho 126 | 124,6.428055,-9.429499000000002,Liberia 127 | 125,26.3351,17.228331,Libya 128 | 126,47.166,9.555373,Liechtenstein 129 | 127,55.169438,23.881275,Lithuania 130 | 128,49.815273,6.129582999999999,Luxembourg 131 | 129,22.198745,113.543873,Macau 132 | 130,41.608635,21.745275,Macedonia 133 | 131,-18.766947,46.869107,Madagascar 134 | 132,-13.254308,34.301525,Malawi 135 | 133,4.210484,101.975766,Malaysia 136 | 134,3.202778,73.22068,Maldives 137 | 135,17.570692,-3.996166,Mali 138 | 136,35.937496,14.375416,Malta 139 | 137,7.131474,171.184478,Marshall Islands 140 | 138,14.641528,-61.024174,Martinique 141 | 139,21.00789,-10.940835,Mauritania 142 | 140,-20.348404,57.55215200000001,Mauritius 143 | 141,-12.8275,45.166244,Mayotte 144 | 142,23.634501,-102.552784,Mexico 145 | 143,7.425554,150.550812,Micronesia 146 | 144,47.411631,28.369885,Moldova 147 | 145,43.73841760000001,7.424615799999999,Monaco 148 | 146,46.862496,103.846656,Mongolia 149 | 147,42.708678,19.37439,Montenegro 150 | 148,16.742498,-62.187366,Montserrat 151 | 149,31.791702,-7.092619999999999,Morocco 152 | 150,-18.665695,35.529562,Mozambique 153 | 151,21.916221,95.955974,Myanmar 154 | 152,-22.95764,18.49041,Namibia 155 | 153,-0.522778,166.931503,Nauru 156 | 154,28.394857,84.12400799999999,Nepal 157 | 155,52.132633,5.291265999999999,Netherlands 158 | 156,-20.904305,165.618042,New Caledonia 159 | 157,-40.900557,174.885971,New Zealand 160 | 158,12.865416,-85.207229,Nicaragua 161 | 159,17.607789,8.081666,Niger 162 | 160,9.081999,8.675277,Nigeria 163 | 161,-19.054445,-169.867233,Niue 164 | 162,-29.040835,167.954712,Norfolk Island 165 | 163,40.339852,127.510093,North Korea 166 | 164,15.0979,145.6739,Northern Mariana Islands 167 | 165,60.47202399999999,8.468945999999999,Norway 168 | 166,21.4735329,55.975413,Oman 169 | 167,30.375321,69.34511599999999,Pakistan 170 | 168,7.514979999999999,134.58252,Palau 171 | 169,31.952162,35.233154,Palestine 172 | 170,8.537981,-80.782127,Panama 173 | 171,-6.314992999999999,143.95555,Papua New Guinea 174 | 172,-23.442503,-58.443832,Paraguay 175 | 173,-9.189967,-75.015152,Peru 176 | 174,12.879721,121.774017,Philippines 177 | 175,-24.3767537,-128.3242376,Pitcairn Islands 178 | 176,51.919438,19.145136,Poland 179 | 177,39.39987199999999,-8.224454,Portugal 180 | 178,18.220833,-66.590149,Puerto Rico 181 | 179,25.354826,51.183884,Qatar 182 | 180,45.943161,24.96676,Romania 183 | 181,61.52401,105.318756,Russia 184 | 182,-1.940278,29.873888,Rwanda 185 | 183,-21.115141,55.536384,Réunion 186 | 184,-13.759029,-172.104629,Samoa 187 | 185,43.94236,12.457777,San Marino 188 | 186,23.885942,45.079162,Saudi Arabia 189 | 187,14.497401,-14.452362,Senegal 190 | 188,44.016521,21.005859,Serbia 191 | 189,-4.679574,55.491977,Seychelles 192 | 190,8.460555,-11.779889,Sierra Leone 193 | 191,1.352083,103.819836,Singapore 194 | 192,18.04248,-63.05483,Sint Maarten 195 | 193,48.669026,19.699024,Slovakia 196 | 194,46.151241,14.995463,Slovenia 197 | 195,-9.64571,160.156194,Solomon Islands 198 | 196,5.152149,46.199616,Somalia 199 | 197,-30.559482,22.937506,South Africa 200 | 198,-54.429579,-36.587909,South Georgia & South Sandwich Islands 201 | 199,35.907757,127.766922,South Korea 202 | 200,6.876991899999999,31.3069788,South Sudan 203 | 201,40.46366700000001,-3.74922,Spain 204 | 202,7.873053999999999,80.77179699999999,Sri Lanka 205 | 203,17.9,-62.833333,St. Barthélemy 206 | 204,-15.9650104,-5.7089241,St. Helena 207 | 205,17.357822,-62.782998,St. Kitts & Nevis 208 | 206,13.909444,-60.978893,St. Lucia 209 | 207,18.0708298,-63.0500809,St. Martin 210 | 208,46.8852,-56.3159,St. Pierre & Miquelon 211 | 209,12.984305,-61.287228,St. Vincent & Grenadines 212 | 210,12.862807,30.217636,Sudan 213 | 211,3.919305,-56.027783,Suriname 214 | 212,77.55360399999999,23.6702719,Svalbard & Jan Mayen 215 | 213,-26.522503,31.465866,Swaziland 216 | 214,60.12816100000001,18.643501,Sweden 217 | 215,46.818188,8.227511999999999,Switzerland 218 | 216,34.80207499999999,38.996815,Syria 219 | 217,0.18636,6.613080999999999,São Tomé & Príncipe 220 | 218,23.69781,120.960515,Taiwan 221 | 219,38.861034,71.276093,Tajikistan 222 | 220,-6.369028,34.888822,Tanzania 223 | 221,15.870032,100.992541,Thailand 224 | 222,-8.874217,125.727539,Timor-Leste 225 | 223,8.619543,0.824782,Togo 226 | 224,-9.200199999999999,-171.8484,Tokelau 227 | 225,-21.178986,-175.198242,Tonga 228 | 226,10.691803,-61.222503,Trinidad & Tobago 229 | 227,33.886917,9.537499,Tunisia 230 | 228,38.963745,35.243322,Turkey 231 | 229,38.969719,59.556278,Turkmenistan 232 | 230,21.694025,-71.797928,Turks & Caicos Islands 233 | 231,-7.109534999999999,177.64933,Tuvalu 234 | 232,19.2823192,166.647047,U.S. Outlying Islands 235 | 233,18.335765,-64.896335,U.S. Virgin Islands 236 | 234,55.378051,-3.435973,UK 237 | 235,40.7605367,-73.9788903,US 238 | 236,1.373333,32.290275,Uganda 239 | 237,48.379433,31.1655799,Ukraine 240 | 238,23.424076,53.847818,United Arab Emirates 241 | 239,-32.522779,-55.765835,Uruguay 242 | 240,41.377491,64.585262,Uzbekistan 243 | 241,-15.376706,166.959158,Vanuatu 244 | 242,41.902916,12.453389,Vatican City 245 | 243,6.42375,-66.58973,Venezuela 246 | 244,14.058324,108.277199,Vietnam 247 | 245,-14.2938,-178.1165,Wallis & Futuna 248 | 246,24.215527,-12.885834,Western Sahara 249 | 247,15.552727,48.516388,Yemen 250 | 248,-13.133897,27.849332,Zambia 251 | 249,-19.015438,29.154857,Zimbabwe 252 | 250,60.1785247,19.9156105,Åland Islands 253 | -------------------------------------------------------------------------------- /live15_covid/README: -------------------------------------------------------------------------------- 1 | Arquivos da live "Coronavírus: Prevendo Próximos Países a Descobrirem Casos Confirmados - Live de Data Science #15" 2 | https://youtu.be/zg_Y8qNCKto 3 | -------------------------------------------------------------------------------- /live15_covid/countries and continents.csv: -------------------------------------------------------------------------------- 1 | name,official_name_en,official_name_fr,ISO3166-1-Alpha-2,ISO3166-1-Alpha-3,M49,ITU,MARC,WMO,DS,Dial,FIFA,FIPS,GAUL,IOC,ISO4217-currency_alphabetic_code,ISO4217-currency_country_name,ISO4217-currency_minor_unit,ISO4217-currency_name,ISO4217-currency_numeric_code,is_independent,Capital,Continent,TLD,Languages,Geoname ID,EDGAR 2 | ,Channel Islands,Îles Anglo-Normandes,,,830,,,,,,,,,,,,,,,,,,,,, 3 | ,Sark,Sercq,,,680,,,,,,,,,,,,,,,,,,,,, 4 | Afghanistan,Afghanistan,Afghanistan,AF,AFG,4,AFG,af,AF,AFG,93,AFG,AF,1,AFG,AFN,AFGHANISTAN,2,Afghani,971,Yes,Kabul,AS,.af,"fa-AF,ps,uz-AF,tk",1149361,B2 5 | Albania,Albania,Albanie,AL,ALB,8,ALB,aa,AB,AL,355,ALB,AL,3,ALB,ALL,ALBANIA,2,Lek,8,Yes,Tirana,EU,.al,"sq,el",783754,B3 6 | Algeria,Algeria,Algérie,DZ,DZA,12,ALG,ae,AL,DZ,213,ALG,AG,4,ALG,DZD,ALGERIA,2,Algerian Dinar,12,Yes,Algiers,AF,.dz,ar-DZ,2589581,B4 7 | American Samoa,American Samoa,Samoa américaines,AS,ASM,16,SMA,as,,USA,1-684,ASA,AQ,5,ASA,USD,AMERICAN SAMOA,2,US Dollar,840,Territory of US,Pago Pago,OC,.as,"en-AS,sm,to",5880801,B5 8 | Andorra,Andorra,Andorre,AD,AND,20,AND,an,,AND,376,AND,AN,7,AND,EUR,ANDORRA,2,Euro,978,Yes,Andorra la Vella,EU,.ad,ca,3041565,B6 9 | Angola,Angola,Angola,AO,AGO,24,AGL,ao,AN,AO,244,ANG,AO,8,ANG,AOA,ANGOLA,2,Kwanza,973,Yes,Luanda,AF,.ao,pt-AO,3351879,B7 10 | Anguilla,Anguilla,Anguilla,AI,AIA,660,AIA,am,,,1-264,AIA,AV,9,AIA,XCD,ANGUILLA,2,East Caribbean Dollar,951,Territory of GB,The Valley,NA,.ai,en-AI,3573511,1A 11 | Antarctica,,,AQ,ATA,10,,ay,AA,,672,ROS,AY,10,,,,,,,International,,AN,.aq,,6697173, 12 | Antigua & Barbuda,Antigua and Barbuda,Antigua-et-Barbuda,AG,ATG,28,ATG,aq,AT,,1-268,ATG,AC,11,ANT,XCD,ANTIGUA AND BARBUDA,2,East Caribbean Dollar,951,Yes,St. John's,NA,.ag,en-AG,3576396,B9 13 | Argentina,Argentina,Argentine,AR,ARG,32,ARG,ag,AG,RA,54,ARG,AR,12,ARG,ARS,ARGENTINA,2,Argentine Peso,32,Yes,Buenos Aires,SA,.ar,"es-AR,en,it,de,fr,gn",3865483,C1 14 | Armenia,Armenia,Arménie,AM,ARM,51,ARM,ai,AY,AM,374,ARM,AM,13,ARM,AMD,ARMENIA,2,Armenian Dram,51,Yes,Yerevan,AS,.am,hy,174982,1B 15 | Aruba,Aruba,Aruba,AW,ABW,533,ABW,aw,NU,AW,297,ARU,AA,14,ARU,AWG,ARUBA,2,Aruban Florin,533,Part of NL,Oranjestad,NA,.aw,"nl-AW,es,en",3577279,1C 16 | Australia,Australia,Australie,AU,AUS,36,AUS,at,AU,AUS,61,AUS,AS,17,AUS,AUD,AUSTRALIA,2,Australian Dollar,36,Yes,Canberra,OC,.au,en-AU,2077456,C3 17 | Austria,Austria,Autriche,AT,AUT,40,AUT,au,OS,A,43,AUT,AU,18,AUT,EUR,AUSTRIA,2,Euro,978,Yes,Vienna,EU,.at,"de-AT,hr,hu,sl",2782113,C4 18 | Azerbaijan,Azerbaijan,Azerbaïdjan,AZ,AZE,31,AZE,aj,AJ,AZ,994,AZE,AJ,19,AZE,AZN,AZERBAIJAN,2,Azerbaijanian Manat,944,Yes,Baku,AS,.az,"az,ru,hy",587116,1D 19 | Bahamas,Bahamas,Bahamas,BS,BHS,44,BAH,bf,BA,BS,1-242,BAH,BF,20,BAH,BSD,BAHAMAS,2,Bahamian Dollar,44,Yes,Nassau,NA,.bs,en-BS,3572887,C5 20 | Bahrain,Bahrain,Bahreïn,BH,BHR,48,BHR,ba,BN,BRN,973,BHR,BA,21,BRN,BHD,BAHRAIN,3,Bahraini Dinar,48,Yes,Manama,AS,.bh,"ar-BH,en,fa,ur",290291,C6 21 | Bangladesh,Bangladesh,Bangladesh,BD,BGD,50,BGD,bg,BW,BD,880,BAN,BG,23,BAN,BDT,BANGLADESH,2,Taka,50,Yes,Dhaka,AS,.bd,"bn-BD,en",1210997,C7 22 | Barbados,Barbados,Barbade,BB,BRB,52,BRB,bb,BR,BDS,1-246,BRB,BB,24,BAR,BBD,BARBADOS,2,Barbados Dollar,52,Yes,Bridgetown,NA,.bb,en-BB,3374084,C8 23 | Belarus,Belarus,Bélarus,BY,BLR,112,BLR,bw,BY,BY,375,BLR,BO,26,BLR,BYR,BELARUS,0,Belarussian Ruble,974,Yes,Minsk,EU,.by,"be,ru",630336,1F 24 | Belgium,Belgium,Belgique,BE,BEL,56,BEL,be,BX,B,32,BEL,BE,27,BEL,EUR,BELGIUM,2,Euro,978,Yes,Brussels,EU,.be,"nl-BE,fr-BE,de-BE",2802361,C9 25 | Belize,Belize,Belize,BZ,BLZ,84,BLZ,bh,BH,BH,501,BLZ,BH,28,BIZ,BZD,BELIZE,2,Belize Dollar,84,Yes,Belmopan,NA,.bz,"en-BZ,es",3582678,D1 26 | Benin,Benin,Bénin,BJ,BEN,204,BEN,dm,BJ,DY,229,BEN,BN,29,BEN,XOF,BENIN,0,CFA Franc BCEAO,952,Yes,Porto-Novo,AF,.bj,fr-BJ,2395170,G6 27 | Bermuda,Bermuda,Bermudes,BM,BMU,60,BER,bm,BE,BM,1-441,BER,BD,30,BER,BMD,BERMUDA,2,Bermudian Dollar,60,Territory of GB,Hamilton,NA,.bm,"en-BM,pt",3573345,D0 28 | Bhutan,Bhutan,Bhoutan,BT,BTN,64,BTN,bt,,BT,975,BHU,BT,31,BHU,INR,BHUTAN,2,Indian Rupee,356,Yes,Thimphu,AS,.bt,dz,1252634,D2 29 | Bolivia,Bolivia (Plurinational State of),Bolivie (État plurinational de),BO,BOL,68,BOL,bo,BO,BOL,591,BOL,BL,33,BOL,BOB,"BOLIVIA, PLURINATIONAL STATE OF",2,Boliviano,68,Yes,Sucre,SA,.bo,"es-BO,qu,ay",3923057, 30 | Bosnia,Bosnia and Herzegovina,Bosnie-Herzégovine,BA,BIH,70,BIH,bn,BG,BIH,387,BIH,BK,34,BIH,BAM,BOSNIA AND HERZEGOVINA,2,Convertible Mark,977,Yes,Sarajevo,EU,.ba,"bs,hr-BA,sr-BA",3277605,1E 31 | Botswana,Botswana,Botswana,BW,BWA,72,BOT,bs,BC,BW,267,BOT,BC,35,BOT,BWP,BOTSWANA,2,Pula,72,Yes,Gaborone,AF,.bw,"en-BW,tn-BW",933860,B1 32 | Bouvet Island,,,BV,BVT,74,,bv,BV,BV,47,,BV,36,,,,,,,Territory of NO,,AN,.bv,,3371123, 33 | Brazil,Brazil,Brésil,BR,BRA,76,B,bl,BZ,BR,55,BRA,BR,37,BRA,BRL,BRAZIL,2,Brazilian Real,986,Yes,Brasilia,SA,.br,"pt-BR,es,en,fr",3469034,D5 34 | British Indian Ocean Territory,,,IO,IOT,86,BIO,bi,,,246,,IO,38,,,,,,,Territory of GB,Diego Garcia,AS,.io,en-IO,1282588, 35 | British Virgin Islands,British Virgin Islands,Îles Vierges britanniques,VG,VGB,92,VRG,vb,VI,BVI,1-284,VGB,VI,39,IVB,USD,VIRGIN ISLANDS (BRITISH),2,US Dollar,840,Territory of GB,Road Town,NA,.vg,en-VG,3577718, 36 | Brunei,Brunei Darussalam,Brunéi Darussalam,BN,BRN,96,BRU,bx,BD,BRU,673,BRU,BX,40,BRU,BND,BRUNEI DARUSSALAM,2,Brunei Dollar,96,Yes,Bandar Seri Begawan,AS,.bn,"ms-BN,en-BN",1820814,D9 37 | Bulgaria,Bulgaria,Bulgarie,BG,BGR,100,BUL,bu,BU,BG,359,BUL,BU,41,BUL,BGN,BULGARIA,2,Bulgarian Lev,975,Yes,Sofia,EU,.bg,"bg,tr-BG,rom",732800,E0 38 | Burkina Faso,Burkina Faso,Burkina Faso,BF,BFA,854,BFA,uv,HV,BF,226,BFA,UV,42,BUR,XOF,BURKINA FASO,0,CFA Franc BCEAO,952,Yes,Ouagadougou,AF,.bf,fr-BF,2361809,X2 39 | Burundi,Burundi,Burundi,BI,BDI,108,BDI,bd,BI,RU,257,BDI,BY,43,BDI,BIF,BURUNDI,0,Burundi Franc,108,Yes,Bujumbura,AF,.bi,"fr-BI,rn",433561,E2 40 | Cambodia,Cambodia,Cambodge,KH,KHM,116,CBG,cb,KP,K,855,CAM,CB,44,CAM,KHR,CAMBODIA,2,Riel,116,Yes,Phnom Penh,AS,.kh,"km,fr,en",1831722,E3 41 | Cameroon,Cameroon,Cameroun,CM,CMR,120,CME,cm,CM,CAM,237,CMR,CM,45,CMR,XAF,CAMEROON,0,CFA Franc BEAC,950,Yes,Yaounde,AF,.cm,"en-CM,fr-CM",2233387,E4 42 | Canada,Canada,Canada,CA,CAN,124,CAN,xxc,CN,CDN,1,CAN,CA,46,CAN,CAD,CANADA,2,Canadian Dollar,124,Yes,Ottawa,NA,.ca,"en-CA,fr-CA,iu",6251999, 43 | Cape Verde,Cabo Verde,Cabo Verde,CV,CPV,132,CPV,cv,CV,CV,238,CPV,CV,47,CPV,CVE,CABO VERDE,2,Cabo Verde Escudo,132,Yes,Praia,AF,.cv,pt-CV,3374766, 44 | Caribbean Netherlands,"Bonaire, Sint Eustatius and Saba","Bonaire, Saint-Eustache et Saba",BQ,BES,535,ATN,ca,NU,NA,599,ANT,NL,176,AHO,USD,"BONAIRE, SINT EUSTATIUS AND SABA",2,US Dollar,840,Part of NL,,NA,.bq,"nl,pap,en",7626844, 45 | Cayman Islands,Cayman Islands,Îles Caïmanes,KY,CYM,136,CYM,cj,GC,KY,1-345,CAY,CJ,48,CAY,KYD,CAYMAN ISLANDS,2,Cayman Islands Dollar,136,Territory of GB,George Town,NA,.ky,en-KY,3580718,E9 46 | Central African Republic,Central African Republic,République centrafricaine,CF,CAF,140,CAF,cx,CE,RCA,236,CTA,CT,49,CAF,XAF,CENTRAL AFRICAN REPUBLIC,0,CFA Franc BEAC,950,Yes,Bangui,AF,.cf,"fr-CF,sg,ln,kg",239880,F0 47 | Chad,Chad,Tchad,TD,TCD,148,TCD,cd,CD,TCH,235,CHA,CD,50,CHA,XAF,CHAD,0,CFA Franc BEAC,950,Yes,N'Djamena,AF,.td,"fr-TD,ar-TD,sre",2434508,F2 48 | Chile,Chile,Chili,CL,CHL,152,CHL,cl,CH,RCH,56,CHI,CI,51,CHI,CLP,CHILE,0,Chilean Peso,152,Yes,Santiago,SA,.cl,es-CL,3895114,F3 49 | China,China,Chine,CN,CHN,156,CHN,cc,CI,CN,86,CHN,CH,53,CHN,CNY,CHINA,2,Yuan Renminbi,156,Yes,Beijing,AS,.cn,"zh-CN,yue,wuu,dta,ug,za",1814991,F4 50 | Christmas Island,,,CX,CXR,162,CHR,xa,KI,AUS,61,CXR,KT,54,,,,,,,Territory of AU,Flying Fish Cove,AS,.cx,"en,zh,ms-CC",2078138, 51 | Cocos (Keeling) Islands,,,CC,CCK,166,ICO,xb,KK,AUS,61,CCK,CK,56,,,,,,,Territory of AU,West Island,AS,.cc,"ms-CC,en",1547376, 52 | Colombia,Colombia,Colombie,CO,COL,170,CLM,ck,CO,CO,57,COL,CO,57,COL,COP,COLOMBIA,2,Colombian Peso,170,Yes,Bogota,SA,.co,es-CO,3686110,F8 53 | Comoros,Comoros,Comores,KM,COM,174,COM,cq,IC,KM,269,COM,CN,58,COM,KMF,COMOROS,0,Comoro Franc,174,Yes,Moroni,AF,.km,"ar,fr-KM",921929,F9 54 | Congo - Brazzaville,Congo,Congo,CG,COG,178,COG,cf,CG,RCB,242,CGO,CF,59,CGO,XAF,CONGO,0,CFA Franc BEAC,950,Yes,Brazzaville,AF,.cg,"fr-CG,kg,ln-CG",2260494,G0 55 | Congo - Kinshasa,Democratic Republic of the Congo,République démocratique du Congo,CD,COD,180,COD,cg,ZR,ZRE,243,COD,CG,68,COD,,,,,,Yes,Kinshasa,AF,.cd,"fr-CD,ln,kg",203312, 56 | Cook Islands,Cook Islands,Îles Cook,CK,COK,184,CKH,cw,KU,NZ,682,COK,CW,60,COK,NZD,COOK ISLANDS,2,New Zealand Dollar,554,Associated with NZ,Avarua,OC,.ck,"en-CK,mi",1899402,G1 57 | Costa Rica,Costa Rica,Costa Rica,CR,CRI,188,CTR,cr,CS,CR,506,CRC,CS,61,CRC,CRC,COSTA RICA,2,Costa Rican Colon,188,Yes,San Jose,NA,.cr,"es-CR,en",3624060,G2 58 | Croatia,Croatia,Croatie,HR,HRV,191,HRV,ci,RH,HR,385,CRO,HR,62,CRO,HRK,CROATIA,2,Croatian Kuna,191,Yes,Zagreb,EU,.hr,"hr-HR,sr",3202326,1M 59 | Cuba,Cuba,Cuba,CU,CUB,192,CUB,cu,CU,C,53,CUB,CU,63,CUB,CUP,CUBA,2,Cuban Peso,192,Yes,Havana,NA,.cu,es-CU,3562981,G3 60 | Curaçao,Curaçao,Curaçao,CW,CUW,531,,co,,,599,,UC,,,ANG,CURAÇAO,2,Netherlands Antillean Guilder,532,Part of NL,Willemstad,NA,.cw,"nl,pap",7626836, 61 | Cyprus,Cyprus,Chypre,CY,CYP,196,CYP,cy,CY,CY,357,CYP,CY,64,CYP,EUR,CYPRUS,2,Euro,978,Yes,Nicosia,EU,.cy,"el-CY,tr-CY,en",146669,G4 62 | Czech Republic,Czechia,Tchéquie,CZ,CZE,203,CZE,xr,CZ,CZ,420,CZE,EZ,65,CZE,,,,,,Yes,Prague,EU,.cz,"cs,sk",3077311, 63 | Côte d’Ivoire,Côte d'Ivoire,Côte d'Ivoire,CI,CIV,384,CTI,iv,IV,CI,225,CIV,IV,66,CIV,XOF,CÔTE D'IVOIRE,0,CFA Franc BCEAO,952,Yes,Yamoussoukro,AF,.ci,fr-CI,2287781, 64 | Denmark,Denmark,Danemark,DK,DNK,208,DNK,dk,DN,DK,45,DEN,DA,69,DEN,DKK,DENMARK,2,Danish Krone,208,Yes,Copenhagen,EU,.dk,"da-DK,en,fo,de-DK",2623032,G7 65 | Djibouti,Djibouti,Djibouti,DJ,DJI,262,DJI,ft,DJ,F,253,DJI,DJ,70,DJI,DJF,DJIBOUTI,0,Djibouti Franc,262,Yes,Djibouti,AF,.dj,"fr-DJ,ar,so-DJ,aa",223816,1G 66 | Dominica,Dominica,Dominique,DM,DMA,212,DMA,dq,DO,WD,1-767,DMA,DO,71,DMA,XCD,DOMINICA,2,East Caribbean Dollar,951,Yes,Roseau,NA,.dm,en-DM,3575830,G9 67 | Dominican Republic,Dominican Republic,République dominicaine,DO,DOM,214,DOM,dr,DR,DOM,"1-809,1-829,1-849",DOM,DR,72,DOM,DOP,DOMINICAN REPUBLIC,2,Dominican Peso,214,Yes,Santo Domingo,NA,.do,es-DO,3508796,G8 68 | Ecuador,Ecuador,Équateur,EC,ECU,218,EQA,ec,EQ,EC,593,ECU,EC,73,ECU,USD,ECUADOR,2,US Dollar,840,Yes,Quito,SA,.ec,es-EC,3658394,H1 69 | Egypt,Egypt,Égypte,EG,EGY,818,EGY,ua,EG,ET,20,EGY,EG,40765,EGY,EGP,EGYPT,2,Egyptian Pound,818,Yes,Cairo,AF,.eg,"ar-EG,en,fr",357994,H2 70 | El Salvador,El Salvador,El Salvador,SV,SLV,222,SLV,es,ES,ES,503,SLV,ES,75,ESA,USD,EL SALVADOR,2,US Dollar,840,Yes,San Salvador,NA,.sv,es-SV,3585968,H3 71 | Equatorial Guinea,Equatorial Guinea,Guinée équatoriale,GQ,GNQ,226,GNE,eg,GQ,EQ,240,EQG,EK,76,GEQ,XAF,EQUATORIAL GUINEA,0,CFA Franc BEAC,950,Yes,Malabo,AF,.gq,"es-GQ,fr",2309096,H4 72 | Eritrea,Eritrea,Érythrée,ER,ERI,232,ERI,ea,,ER,291,ERI,ER,77,ERI,ERN,ERITREA,2,Nakfa,232,Yes,Asmara,AF,.er,"aa-ER,ar,tig,kun,ti-ER",338010,1J 73 | Estonia,Estonia,Estonie,EE,EST,233,EST,er,EO,EST,372,EST,EN,78,EST,EUR,ESTONIA,2,Euro,978,Yes,Tallinn,EU,.ee,"et,ru",453733,1H 74 | Ethiopia,Ethiopia,Éthiopie,ET,ETH,231,ETH,et,ET,ETH,251,ETH,ET,79,ETH,ETB,ETHIOPIA,2,Ethiopian Birr,230,Yes,Addis Ababa,AF,.et,"am,en-ET,om-ET,ti-ET,so-ET,sid",337996,H5 75 | Falkland Islands,Falkland Islands (Malvinas),Îles Falkland (Malvinas),FK,FLK,238,FLK,fk,FK,,500,FLK,FK,81,FLK,FKP,FALKLAND ISLANDS (MALVINAS),2,Falkland Islands Pound,238,Territory of GB,Stanley,SA,.fk,en-FK,3474414,H7 76 | Faroe Islands,Faeroe Islands,Îles Féroé,FO,FRO,234,FRO,fa,FA,FO,298,FRO,FO,82,FAR,,,,,,Part of DK,Torshavn,EU,.fo,"fo,da-FO",2622320, 77 | Fiji,Fiji,Fidji,FJ,FJI,242,FJI,fj,FJ,FJI,679,FIJ,FJ,83,FIJ,FJD,FIJI,2,Fiji Dollar,242,Yes,Suva,OC,.fj,"en-FJ,fj",2205218,H8 78 | Finland,Finland,Finlande,FI,FIN,246,FIN,fi,FI,FIN,358,FIN,FI,84,FIN,EUR,FINLAND,2,Euro,978,Yes,Helsinki,EU,.fi,"fi-FI,sv-FI,smn",660013,H9 79 | France,France,France,FR,FRA,250,F,fr,FR,F,33,FRA,FR,85,FRA,EUR,FRANCE,2,Euro,978,Yes,Paris,EU,.fr,"fr-FR,frp,br,co,ca,eu,oc",3017382,I0 80 | French Guiana,French Guiana,Guyane française,GF,GUF,254,GUF,fg,FG,F,594,GUF,FG,86,FGU,EUR,FRENCH GUIANA,2,Euro,978,Part of FR,Cayenne,SA,.gf,fr-GF,3381670,I3 81 | French Polynesia,French Polynesia,Polynésie française,PF,PYF,258,OCE,fp,PF,F,689,TAH,FP,87,FPO,XPF,FRENCH POLYNESIA,0,CFP Franc,953,Territory of FR,Papeete,OC,.pf,"fr-PF,ty",4030656,I4 82 | French Southern Territories,,,TF,ATF,260,,fs,,F,262,,FS,88,,,,,,,Territory of FR,Port-aux-Francais,AN,.tf,fr,1546748, 83 | Gabon,Gabon,Gabon,GA,GAB,266,GAB,go,GO,G,241,GAB,GB,89,GAB,XAF,GABON,0,CFA Franc BEAC,950,Yes,Libreville,AF,.ga,fr-GA,2400553,I5 84 | Gambia,Gambia,Gambie,GM,GMB,270,GMB,gm,GB,WAG,220,GAM,GA,90,GAM,GMD,GAMBIA,2,Dalasi,270,Yes,Banjul,AF,.gm,"en-GM,mnk,wof,wo,ff",2413451,I6 85 | Georgia,Georgia,Géorgie,GE,GEO,268,GEO,gs,GG,GE,995,GEO,GG,92,GEO,GEL,GEORGIA,2,Lari,981,Yes,Tbilisi,AS,.ge,"ka,ru,hy,az",614540,2Q 86 | Germany,Germany,Allemagne,DE,DEU,276,D,gw,DL,D,49,GER,GM,93,GER,EUR,GERMANY,2,Euro,978,Yes,Berlin,EU,.de,de,2921044,2M 87 | Ghana,Ghana,Ghana,GH,GHA,288,GHA,gh,GH,GH,233,GHA,GH,94,GHA,GHS,GHANA,2,Ghana Cedi,936,Yes,Accra,AF,.gh,"en-GH,ak,ee,tw",2300660,J0 88 | Gibraltar,Gibraltar,Gibraltar,GI,GIB,292,GIB,gi,GI,GBZ,350,GBZ,GI,95,GIB,GIP,GIBRALTAR,2,Gibraltar Pound,292,Territory of GB,Gibraltar,EU,.gi,"en-GI,es,it,pt",2411586,J1 89 | Greece,Greece,Grèce,GR,GRC,300,GRC,gr,GR,GR,30,GRE,GR,97,GRE,EUR,GREECE,2,Euro,978,Yes,Athens,EU,.gr,"el-GR,en,fr",390903,J3 90 | Greenland,Greenland,Groenland,GL,GRL,304,GRL,gl,GL,DK,299,GRL,GL,98,GRL,DKK,GREENLAND,2,Danish Krone,208,Part of DK,Nuuk,NA,.gl,"kl,da-GL,en",3425505,J4 91 | Grenada,Grenada,Grenade,GD,GRD,308,GRD,gd,GD,WG,1-473,GRN,GJ,99,GRN,XCD,GRENADA,2,East Caribbean Dollar,951,Yes,St. George's,NA,.gd,en-GD,3580239,J5 92 | Guadeloupe,Guadeloupe,Guadeloupe,GP,GLP,312,GDL,gp,MF,F,590,GLP,GP,100,GUD,EUR,GUADELOUPE,2,Euro,978,Part of FR,Basse-Terre,NA,.gp,fr-GP,3579143,J6 93 | Guam,Guam,Guam,GU,GUM,316,GUM,gu,GM,USA,1-671,GUM,GQ,101,GUM,USD,GUAM,2,US Dollar,840,Territory of US,Hagatna,OC,.gu,"en-GU,ch-GU",4043988,GU 94 | Guatemala,Guatemala,Guatemala,GT,GTM,320,GTM,gt,GU,GCA,502,GUA,GT,103,GUA,GTQ,GUATEMALA,2,Quetzal,320,Yes,Guatemala City,NA,.gt,es-GT,3595528,J8 95 | Guernsey,Guernsey,Guernesey,GG,GGY,831,,uik,,GBG,44,GBG,GK,104,,GBP,GUERNSEY,2,Pound Sterling,826,Crown dependency of GB,St Peter Port,EU,.gg,"en,fr",3042362,Y7 96 | Guinea,Guinea,Guinée,GN,GIN,324,GUI,gv,GN,RG,224,GUI,GV,106,GUI,GNF,GUINEA,0,Guinea Franc,324,Yes,Conakry,AF,.gn,fr-GN,2420477,J9 97 | Guinea-Bissau,Guinea-Bissau,Guinée-Bissau,GW,GNB,624,GNB,pg,GW,GW,245,GNB,PU,105,GBS,XOF,GUINEA-BISSAU,0,CFA Franc BCEAO,952,Yes,Bissau,AF,.gw,"pt-GW,pov",2372248,S0 98 | Guyana,Guyana,Guyana,GY,GUY,328,GUY,gy,GY,GUY,592,GUY,GY,107,GUY,GYD,GUYANA,2,Guyana Dollar,328,Yes,Georgetown,SA,.gy,en-GY,3378535,K0 99 | Haiti,Haiti,Haïti,HT,HTI,332,HTI,ht,HA,RH,509,HAI,HA,108,HAI,USD,HAITI,2,US Dollar,840,Yes,Port-au-Prince,NA,.ht,"ht,fr-HT",3723988,K1 100 | Heard & McDonald Islands,,,HM,HMD,334,,hm,,AUS,672,,HM,109,,,,,,,Territory of AU,,AN,.hm,,1547314, 101 | Honduras,Honduras,Honduras,HN,HND,340,HND,ho,HO,,504,HON,HO,111,HON,HNL,HONDURAS,2,Lempira,340,Yes,Tegucigalpa,NA,.hn,es-HN,3608932,K2 102 | Hong Kong,"China, Hong Kong Special Administrative Region","Chine, région administrative spéciale de Hong Kong",HK,HKG,344,HKG,,HK,HK,852,HKG,HK,33364,HKG,,,,,,Part of CN,Hong Kong,AS,.hk,"zh-HK,yue,zh,en",1819730, 103 | Hungary,Hungary,Hongrie,HU,HUN,348,HNG,hu,HU,H,36,HUN,HU,113,HUN,HUF,HUNGARY,2,Forint,348,Yes,Budapest,EU,.hu,hu-HU,719819,K5 104 | Iceland,Iceland,Islande,IS,ISL,352,ISL,ic,IL,IS,354,ISL,IC,114,ISL,ISK,ICELAND,0,Iceland Krona,352,Yes,Reykjavik,EU,.is,"is,en,de,da,sv,no",2629691,K6 105 | India,India,Inde,IN,IND,356,IND,ii,IN,IND,91,IND,IN,115,IND,INR,INDIA,2,Indian Rupee,356,Yes,New Delhi,AS,.in,"en-IN,hi,bn,te,mr,ta,ur,gu,kn,ml,or,pa,as,bh,sat,ks,ne,sd,kok,doi,mni,sit,sa,fr,lus,inc",1269750,K7 106 | Indonesia,Indonesia,Indonésie,ID,IDN,360,INS,io,ID,RI,62,IDN,ID,116,INA,IDR,INDONESIA,2,Rupiah,360,Yes,Jakarta,AS,.id,"id,en,nl,jv",1643084,K8 107 | Iran,Iran (Islamic Republic of),Iran (République islamique d'),IR,IRN,364,IRN,ir,IR,IR,98,IRN,IR,117,IRI,IRR,"IRAN, ISLAMIC REPUBLIC OF",2,Iranian Rial,364,Yes,Tehran,AS,.ir,"fa-IR,ku",130758,K9 108 | Iraq,Iraq,Iraq,IQ,IRQ,368,IRQ,iq,IQ,IRQ,964,IRQ,IZ,118,IRQ,IQD,IRAQ,3,Iraqi Dinar,368,Yes,Baghdad,AS,.iq,"ar-IQ,ku,hy",99237,L0 109 | Ireland,Ireland,Irlande,IE,IRL,372,IRL,ie,IE,IRL,353,IRL,EI,119,IRL,EUR,IRELAND,2,Euro,978,Yes,Dublin,EU,.ie,"en-IE,ga-IE",2963597,L2 110 | Isle of Man,Isle of Man,Île de Man,IM,IMN,833,,uik,,GBM,44,GBM,IM,120,,GBP,ISLE OF MAN,2,Pound Sterling,826,Crown dependency of GB,Douglas,EU,.im,"en,gv",3042225,Y8 111 | Israel,Israel,Israël,IL,ISR,376,ISR,is,IS,IL,972,ISR,IS,121,ISR,ILS,ISRAEL,2,New Israeli Sheqel,376,Yes,Jerusalem,AS,.il,"he,ar-IL,en-IL,",294640,L3 112 | Italy,Italy,Italie,IT,ITA,380,I,it,IY,I,39,ITA,IT,122,ITA,EUR,ITALY,2,Euro,978,Yes,Rome,EU,.it,"it-IT,de-IT,fr-IT,sc,ca,co,sl",3175395,L6 113 | Jamaica,Jamaica,Jamaïque,JM,JAM,388,JMC,jm,JM,JA,1-876,JAM,JM,123,JAM,JMD,JAMAICA,2,Jamaican Dollar,388,Yes,Kingston,NA,.jm,en-JM,3489940,L8 114 | Japan,Japan,Japon,JP,JPN,392,J,ja,JP,J,81,JPN,JA,126,JPN,JPY,JAPAN,0,Yen,392,Yes,Tokyo,AS,.jp,ja,1861060,M0 115 | Jersey,Jersey,Jersey,JE,JEY,832,,uik,,GBJ,44,GBJ,JE,128,,GBP,JERSEY,2,Pound Sterling,826,Crown dependency of GB,Saint Helier,EU,.je,"en,pt",3042142,Y9 116 | Jordan,Jordan,Jordanie,JO,JOR,400,JOR,jo,JD,HKJ,962,JOR,JO,130,JOR,JOD,JORDAN,3,Jordanian Dinar,400,Yes,Amman,AS,.jo,"ar-JO,en",248816,M2 117 | Kazakhstan,Kazakhstan,Kazakhstan,KZ,KAZ,398,KAZ,kz,KZ,KZ,7,KAZ,KZ,132,KAZ,KZT,KAZAKHSTAN,2,Tenge,398,Yes,Astana,AS,.kz,"kk,ru",1522867, 118 | Kenya,Kenya,Kenya,KE,KEN,404,KEN,ke,KN,EAK,254,KEN,KE,133,KEN,KES,KENYA,2,Kenyan Shilling,404,Yes,Nairobi,AF,.ke,"en-KE,sw-KE",192950,M3 119 | Kiribati,Kiribati,Kiribati,KI,KIR,296,KIR,gb,KB,,686,KIR,KR,135,KIR,AUD,KIRIBATI,2,Australian Dollar,36,Yes,Tarawa,OC,.ki,"en-KI,gil",4030945,J2 120 | Kuwait,Kuwait,Koweït,KW,KWT,414,KWT,ku,KW,KWT,965,KUW,KU,137,KUW,KWD,KUWAIT,3,Kuwaiti Dinar,414,Yes,Kuwait City,AS,.kw,"ar-KW,en",285570,M6 121 | Kyrgyzstan,Kyrgyzstan,Kirghizistan,KG,KGZ,417,KGZ,kg,KG,KS,996,KGZ,KG,138,KGZ,KGS,KYRGYZSTAN,2,Som,417,Yes,Bishkek,AS,.kg,"ky,uz,ru",1527747,1N 122 | Laos,Lao People's Democratic Republic,République démocratique populaire lao,LA,LAO,418,LAO,ls,LA,LAO,856,LAO,LA,139,LAO,LAK,LAO PEOPLE’S DEMOCRATIC REPUBLIC,2,Kip,418,Yes,Vientiane,AS,.la,"lo,fr,en",1655842, 123 | Latvia,Latvia,Lettonie,LV,LVA,428,LVA,lv,LV,LV,371,LVA,LG,140,LAT,EUR,LATVIA,2,Euro,978,Yes,Riga,EU,.lv,"lv,ru,lt",458258,1R 124 | Lebanon,Lebanon,Liban,LB,LBN,422,LBN,le,LB,RL,961,LIB,LE,141,LIB,LBP,LEBANON,2,Lebanese Pound,422,Yes,Beirut,AS,.lb,"ar-LB,fr-LB,en,hy",272103,M8 125 | Lesotho,Lesotho,Lesotho,LS,LSO,426,LSO,lo,LS,LS,266,LES,LT,142,LES,ZAR,LESOTHO,2,Rand,710,Yes,Maseru,AF,.ls,"en-LS,st,zu,xh",932692,M9 126 | Liberia,Liberia,Libéria,LR,LBR,430,LBR,lb,LI,LB,231,LBR,LI,144,LBR,LRD,LIBERIA,2,Liberian Dollar,430,Yes,Monrovia,AF,.lr,en-LR,2275384,N0 127 | Libya,Libya,Libye,LY,LBY,434,LBY,ly,LY,LAR,218,LBY,LY,145,LBA,LYD,LIBYA,3,Libyan Dinar,434,Yes,Tripoli,AF,.ly,"ar-LY,it,en",2215636, 128 | Liechtenstein,Liechtenstein,Liechtenstein,LI,LIE,438,LIE,lh,,FL,423,LIE,LS,146,LIE,CHF,LIECHTENSTEIN,2,Swiss Franc,756,Yes,Vaduz,EU,.li,de-LI,3042058,N2 129 | Lithuania,Lithuania,Lituanie,LT,LTU,440,LTU,li,LT,LT,370,LTU,LH,147,LTU,EUR,LITHUANIA,2,Euro,978,Yes,Vilnius,EU,.lt,"lt,ru,pl",597427,1Q 130 | Luxembourg,Luxembourg,Luxembourg,LU,LUX,442,LUX,lu,BX,L,352,LUX,LU,148,LUX,EUR,LUXEMBOURG,2,Euro,978,Yes,Luxembourg,EU,.lu,"lb,de-LU,fr-LU",2960313,N4 131 | Macau,"China, Macao Special Administrative Region","Chine, région administrative spéciale de Macao",MO,MAC,446,MAC,,MU,MO,853,MAC,MC,149,MAC,MOP,MACAO,2,Pataca,446,Part of CN,Macao,AS,.mo,"zh,zh-MO,pt",1821275, 132 | Macedonia,The former Yugoslav Republic of Macedonia,Ex-République yougoslave de Macédoine,MK,MKD,807,MKD,xn,MJ,MK,389,MKD,MK,241,MKD,MKD,"MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF",2,Denar,807,Yes,Skopje,EU,.mk,"mk,sq,tr,rmm,sr",718075,1U 133 | Madagascar,Madagascar,Madagascar,MG,MDG,450,MDG,mg,MG,RM,261,MAD,MA,150,MAD,MGA,MADAGASCAR,2,Malagasy Ariary,969,Yes,Antananarivo,AF,.mg,"fr-MG,mg",1062947,N6 134 | Malawi,Malawi,Malawi,MW,MWI,454,MWI,mw,MW,MW,265,MWI,MI,152,MAW,MWK,MALAWI,2,Kwacha,454,Yes,Lilongwe,AF,.mw,"ny,yao,tum,swk",927384,N7 135 | Malaysia,Malaysia,Malaisie,MY,MYS,458,MLA,my,MS,MAL,60,MAS,MY,153,MAS,MYR,MALAYSIA,2,Malaysian Ringgit,458,Yes,Kuala Lumpur,AS,.my,"ms-MY,en,zh,ta,te,ml,pa,th",1733045,N8 136 | Maldives,Maldives,Maldives,MV,MDV,462,MLD,xc,MV,MV,960,MDV,MV,154,MDV,MVR,MALDIVES,2,Rufiyaa,462,Yes,Male,AS,.mv,"dv,en",1282028,N9 137 | Mali,Mali,Mali,ML,MLI,466,MLI,ml,MI,RMM,223,MLI,ML,155,MLI,XOF,MALI,0,CFA Franc BCEAO,952,Yes,Bamako,AF,.ml,"fr-ML,bm",2453866,O0 138 | Malta,Malta,Malte,MT,MLT,470,MLT,mm,ML,M,356,MLT,MT,156,MLT,EUR,MALTA,2,Euro,978,Yes,Valletta,EU,.mt,"mt,en-MT",2562770,O1 139 | Marshall Islands,Marshall Islands,Îles Marshall,MH,MHL,584,MHL,xe,MH,,692,MHL,RM,157,MSH,USD,MARSHALL ISLANDS,2,US Dollar,840,Yes,Majuro,OC,.mh,"mh,en-MH",2080185,1T 140 | Martinique,Martinique,Martinique,MQ,MTQ,474,MRT,mq,MR,F,596,MTQ,MB,158,MRT,EUR,MARTINIQUE,2,Euro,978,Part of FR,Fort-de-France,NA,.mq,fr-MQ,3570311,O2 141 | Mauritania,Mauritania,Mauritanie,MR,MRT,478,MTN,mu,MT,RIM,222,MTN,MR,159,MTN,MRO,MAURITANIA,2,Ouguiya,478,Yes,Nouakchott,AF,.mr,"ar-MR,fuc,snk,fr,mey,wo",2378080,O3 142 | Mauritius,Mauritius,Maurice,MU,MUS,480,MAU,mf,MA,MS,230,MRI,MP,160,MRI,MUR,MAURITIUS,2,Mauritius Rupee,480,Yes,Port Louis,AF,.mu,"en-MU,bho,fr",934292,O4 143 | Mayotte,Mayotte,Mayotte,YT,MYT,175,MYT,ot,,,262,MYT,MF,161,MAY,EUR,MAYOTTE,2,Euro,978,Part of FR,Mamoudzou,AF,.yt,fr-YT,1024031,2P 144 | Mexico,Mexico,Mexique,MX,MEX,484,MEX,mx,MX,MEX,52,MEX,MX,162,MEX,MXN,MEXICO,2,Mexican Peso,484,Yes,Mexico City,NA,.mx,es-MX,3996063,O5 145 | Micronesia,Micronesia (Federated States of),Micronésie (États fédérés de),FM,FSM,583,FSM,fm,,,691,FSM,FM,163,FSM,USD,"MICRONESIA, FEDERATED STATES OF",2,US Dollar,840,Yes,Palikir,OC,.fm,"en-FM,chk,pon,yap,kos,uli,woe,nkr,kpg",2081918,1K 146 | Moldova,Republic of Moldova,République de Moldova,MD,MDA,498,MDA,mv,RM,MD,373,MDA,MD,165,MDA,MDL,"MOLDOVA, REPUBLIC OF",2,Moldovan Leu,498,Yes,Chisinau,EU,.md,"ro,ru,gag,tr",617790,1S 147 | Monaco,Monaco,Monaco,MC,MCO,492,MCO,mc,,MC,377,MON,MN,166,MON,EUR,MONACO,2,Euro,978,Yes,Monaco,EU,.mc,"fr-MC,en,it",2993457,O9 148 | Mongolia,Mongolia,Mongolie,MN,MNG,496,MNG,mp,MO,MGL,976,MNG,MG,167,MGL,MNT,MONGOLIA,2,Tugrik,496,Yes,Ulan Bator,AS,.mn,"mn,ru",2029969,P0 149 | Montenegro,Montenegro,Monténégro,ME,MNE,499,MNE,mo,,MNE,382,MNE,MJ,2647,MGO,EUR,MONTENEGRO,2,Euro,978,Yes,Podgorica,EU,.me,"sr,hu,bs,sq,hr,rom",3194884,Z5 150 | Montserrat,Montserrat,Montserrat,MS,MSR,500,MSR,mj,,,1-664,MSR,MH,168,MNT,XCD,MONTSERRAT,2,East Caribbean Dollar,951,Territory of GB,Plymouth,NA,.ms,en-MS,3578097,P1 151 | Morocco,Morocco,Maroc,MA,MAR,504,MRC,mr,MC,MA,212,MAR,MO,169,MAR,MAD,MOROCCO,2,Moroccan Dirham,504,Yes,Rabat,AF,.ma,"ar-MA,ber,fr",2542007,P2 152 | Mozambique,Mozambique,Mozambique,MZ,MOZ,508,MOZ,mz,MZ,MOC,258,MOZ,MZ,170,MOZ,MZN,MOZAMBIQUE,2,Mozambique Metical,943,Yes,Maputo,AF,.mz,"pt-MZ,vmw",1036973,P3 153 | Myanmar,Myanmar,Myanmar,MM,MMR,104,MYA,br,BM,BUR,95,MYA,BM,171,MYA,MMK,MYANMAR,2,Kyat,104,Yes,Nay Pyi Taw,AS,.mm,my,1327865,E1 154 | Namibia,Namibia,Namibie,NA,NAM,516,NMB,sx,NM,NAM,264,NAM,WA,172,NAM,ZAR,NAMIBIA,2,Rand,710,Yes,Windhoek,AF,.na,"en-NA,af,de,hz,naq",3355338,T6 155 | Nauru,Nauru,Nauru,NR,NRU,520,NRU,nu,NW,NAU,674,NRU,NR,173,NRU,AUD,NAURU,2,Australian Dollar,36,Yes,Yaren,OC,.nr,"na,en-NR",2110425,P5 156 | Nepal,Nepal,Népal,NP,NPL,524,NPL,np,NP,NEP,977,NEP,NP,175,NEP,NPR,NEPAL,2,Nepalese Rupee,524,Yes,Kathmandu,AS,.np,"ne,en",1282988,P6 157 | Netherlands,Netherlands,Pays-Bas,NL,NLD,528,HOL,ne,NL,NL,31,NED,NL,177,NED,EUR,NETHERLANDS,2,Euro,978,Yes,Amsterdam,EU,.nl,"nl-NL,fy-NL",2750405,P7 158 | New Caledonia,New Caledonia,Nouvelle-Calédonie,NC,NCL,540,NCL,nl,NC,F,687,NCL,NC,178,NCD,XPF,NEW CALEDONIA,0,CFP Franc,953,Territory of FR,Noumea,OC,.nc,fr-NC,2139685,1W 159 | New Zealand,New Zealand,Nouvelle-Zélande,NZ,NZL,554,NZL,nz,NZ,NZ,64,NZL,NZ,179,NZL,NZD,NEW ZEALAND,2,New Zealand Dollar,554,Yes,Wellington,OC,.nz,"en-NZ,mi",2186224,Q2 160 | Nicaragua,Nicaragua,Nicaragua,NI,NIC,558,NCG,nq,NK,NIC,505,NCA,NU,180,NCA,NIO,NICARAGUA,2,Cordoba Oro,558,Yes,Managua,NA,.ni,"es-NI,en",3617476,Q3 161 | Niger,Niger,Niger,NE,NER,562,NGR,ng,NR,RN,227,NIG,NG,181,NIG,XOF,NIGER,0,CFA Franc BCEAO,952,Yes,Niamey,AF,.ne,"fr-NE,ha,kr,dje",2440476,Q4 162 | Nigeria,Nigeria,Nigéria,NG,NGA,566,NIG,nr,NI,WAN,234,NGA,NI,182,NGR,NGN,NIGERIA,2,Naira,566,Yes,Abuja,AF,.ng,"en-NG,ha,yo,ig,ff",2328926,Q5 163 | Niue,Niue,Nioué,NU,NIU,570,NIU,xh,,NZ,683,NIU,NE,183,NIU,NZD,NIUE,2,New Zealand Dollar,554,Associated with NZ,Alofi,OC,.nu,"niu,en-NU",4036232,Q6 164 | Norfolk Island,Norfolk Island,Île Norfolk,NF,NFK,574,NFK,nx,NF,AUS,672,NFK,NF,184,NFI,AUD,NORFOLK ISLAND,2,Australian Dollar,36,Territory of AU,Kingston,OC,.nf,en-NF,2155115,Q7 165 | North Korea,Democratic People's Republic of Korea,République populaire démocratique de Corée,KP,PRK,408,KRE,kn,KR,,850,PRK,KN,67,PRK,KPW,"KOREA, DEMOCRATIC PEOPLE’S REPUBLIC OF",2,North Korean Won,408,Yes,Pyongyang,AS,.kp,ko-KP,1873107, 166 | Northern Mariana Islands,Northern Mariana Islands,Îles Mariannes septentrionales,MP,MNP,580,MRA,nw,MY,USA,1-670,NMI,CQ,185,NMA,USD,NORTHERN MARIANA ISLANDS,2,US Dollar,840,Commonwealth of US,Saipan,OC,.mp,"fil,tl,zh,ch-MP,en-MP",4041468,1V 167 | Norway,Norway,Norvège,NO,NOR,578,NOR,no,NO,N,47,NOR,NO,186,NOR,NOK,NORWAY,2,Norwegian Krone,578,Yes,Oslo,EU,.no,"no,nb,nn,se,fi",3144096,Q8 168 | Oman,Oman,Oman,OM,OMN,512,OMA,mk,OM,,968,OMA,MU,187,OMA,OMR,OMAN,3,Rial Omani,512,Yes,Muscat,AS,.om,"ar-OM,en,bal,ur",286963,P4 169 | Pakistan,Pakistan,Pakistan,PK,PAK,586,PAK,pk,PK,PK,92,PAK,PK,188,PAK,PKR,PAKISTAN,2,Pakistan Rupee,586,Yes,Islamabad,AS,.pk,"ur-PK,en-PK,pa,sd,ps,brh",1168579,R0 170 | Palau,Palau,Palaos,PW,PLW,585,PLW,pw,,,680,PLW,PS,189,PLW,USD,PALAU,2,US Dollar,840,Yes,Melekeok,OC,.pw,"pau,sov,en-PW,tox,ja,fil,zh",1559582,1Y 171 | Palestine,State of Palestine,État de Palestine,PS,PSE,275,,"gz,wj",,,970,PLE,"GZ,WE","91,267",PLE,,"PALESTINE, STATE OF",,No universal currency,,In contention,East Jerusalem,AS,.ps,ar-PS,6254930, 172 | Panama,Panama,Panama,PA,PAN,591,PNR,pn,PM,PA,507,PAN,PM,191,PAN,USD,PANAMA,2,US Dollar,840,Yes,Panama City,NA,.pa,"es-PA,en",3703430,R1 173 | Papua New Guinea,Papua New Guinea,Papouasie-Nouvelle-Guinée,PG,PNG,598,PNG,pp,NG,PNG,675,PNG,PP,192,PNG,PGK,PAPUA NEW GUINEA,2,Kina,598,Yes,Port Moresby,OC,.pg,"en-PG,ho,meu,tpi",2088628,R2 174 | Paraguay,Paraguay,Paraguay,PY,PRY,600,PRG,py,PY,PY,595,PAR,PA,194,PAR,PYG,PARAGUAY,0,Guarani,600,Yes,Asuncion,SA,.py,"es-PY,gn",3437598,R4 175 | Peru,Peru,Pérou,PE,PER,604,PRU,pe,PR,PE,51,PER,PE,195,PER,PEN,PERU,2,Nuevo Sol,604,Yes,Lima,SA,.pe,"es-PE,qu,ay",3932488,R5 176 | Philippines,Philippines,Philippines,PH,PHL,608,PHL,ph,PH,RP,63,PHI,RP,196,PHI,PHP,PHILIPPINES,2,Philippine Peso,608,Yes,Manila,AS,.ph,"tl,en-PH,fil",1694008,R6 177 | Pitcairn Islands,Pitcairn,Pitcairn,PN,PCN,612,PTC,pc,PT,,870,PCN,PC,197,,NZD,PITCAIRN,2,New Zealand Dollar,554,Territory of GB,Adamstown,OC,.pn,en-PN,4030699,R8 178 | Poland,Poland,Pologne,PL,POL,616,POL,pl,PL,PL,48,POL,PL,198,POL,PLN,POLAND,2,Zloty,985,Yes,Warsaw,EU,.pl,pl,798544,R9 179 | Portugal,Portugal,Portugal,PT,PRT,620,POR,po,PO,P,351,POR,PO,199,POR,EUR,PORTUGAL,2,Euro,978,Yes,Lisbon,EU,.pt,"pt-PT,mwl",2264397,S1 180 | Puerto Rico,Puerto Rico,Porto Rico,PR,PRI,630,PTR,pr,PU,USA,1,PUR,RQ,200,PUR,USD,PUERTO RICO,2,US Dollar,840,Commonwealth of US,San Juan,NA,.pr,"en-PR,es-PR",4566966,PR 181 | Qatar,Qatar,Qatar,QA,QAT,634,QAT,qa,QT,Q,974,QAT,QA,201,QAT,QAR,QATAR,2,Qatari Rial,634,Yes,Doha,AS,.qa,"ar-QA,es",289688,S3 182 | Romania,Romania,Roumanie,RO,ROU,642,ROU,rm,RO,RO,40,ROU,RO,203,ROU,RON,ROMANIA,2,New Romanian Leu,946,Yes,Bucharest,EU,.ro,"ro,hu,rom",798549,S5 183 | Russia,Russian Federation,Fédération de Russie,RU,RUS,643,RUS,ru,RS,RUS,7,RUS,RS,204,RUS,RUB,RUSSIAN FEDERATION,2,Russian Ruble,643,Yes,Moscow,EU,.ru,"ru,tt,xal,cau,ady,kv,ce,tyv,cv,udm,tut,mns,bua,myv,mdf,chm,ba,inh,tut,kbd,krc,ava,sah,nog",2017370,1Z 184 | Rwanda,Rwanda,Rwanda,RW,RWA,646,RRW,rw,RW,RWA,250,RWA,RW,205,RWA,RWF,RWANDA,0,Rwanda Franc,646,Yes,Kigali,AF,.rw,"rw,en-RW,fr-RW,sw",49518,S6 185 | Réunion,Réunion,Réunion,RE,REU,638,REU,re,RE,F,262,REU,RE,206,REU,EUR,RÉUNION,2,Euro,978,Part of FR,Saint-Denis,AF,.re,fr-RE,935317, 186 | Samoa,Samoa,Samoa,WS,WSM,882,SMO,ws,ZM,WS,685,SAM,WS,212,SAM,WST,SAMOA,2,Tala,882,Yes,Apia,OC,.ws,"sm,en-WS",4034894,Y0 187 | San Marino,San Marino,Saint-Marin,SM,SMR,674,SMR,sm,,RSM,378,SMR,SM,213,SMR,EUR,SAN MARINO,2,Euro,978,Yes,San Marino,EU,.sm,it-SM,3168068,S8 188 | Saudi Arabia,Saudi Arabia,Arabie saoudite,SA,SAU,682,ARS,su,SD,SA,966,KSA,SA,215,KSA,SAR,SAUDI ARABIA,2,Saudi Riyal,682,Yes,Riyadh,AS,.sa,ar-SA,102358,T0 189 | Senegal,Senegal,Sénégal,SN,SEN,686,SEN,sg,SG,SN,221,SEN,SG,217,SEN,XOF,SENEGAL,0,CFA Franc BCEAO,952,Yes,Dakar,AF,.sn,"fr-SN,wo,fuc,mnk",2245662,T1 190 | Serbia,Serbia,Serbie,RS,SRB,688,SRB,rb,YG,SRB,381 p,SRB,"RI,KV",2648,SRB,RSD,SERBIA,2,Serbian Dinar,941,Yes,Belgrade,EU,.rs,"sr,hu,bs,rom",6290252,Z2 191 | Seychelles,Seychelles,Seychelles,SC,SYC,690,SEY,se,SC,SY,248,SEY,SE,220,SEY,SCR,SEYCHELLES,2,Seychelles Rupee,690,Yes,Victoria,AF,.sc,"en-SC,fr-SC",241170,T2 192 | Sierra Leone,Sierra Leone,Sierra Leone,SL,SLE,694,SRL,sl,SL,WAL,232,SLE,SL,221,SLE,SLL,SIERRA LEONE,2,Leone,694,Yes,Freetown,AF,.sl,"en-SL,men,tem",2403846,T8 193 | Singapore,Singapore,Singapour,SG,SGP,702,SNG,si,SR,SGP,65,SIN,SN,222,SIN,SGD,SINGAPORE,2,Singapore Dollar,702,Yes,Singapore,AS,.sg,"cmn,en-SG,ms-SG,ta-SG,zh-SG",1880251,U0 194 | Sint Maarten,Sint Maarten (Dutch part),Saint-Martin (partie néerlandaise),SX,SXM,534,,sn,,,1-721,,NN,,,ANG,SINT MAARTEN (DUTCH PART),2,Netherlands Antillean Guilder,532,Part of NL,Philipsburg,NA,.sx,"nl,en",7609695, 195 | Slovakia,Slovakia,Slovaquie,SK,SVK,703,SVK,xo,SQ,SK,421,SVK,LO,223,SVK,EUR,SLOVAKIA,2,Euro,978,Yes,Bratislava,EU,.sk,"sk,hu",3057568,2B 196 | Slovenia,Slovenia,Slovénie,SI,SVN,705,SVN,xv,LJ,SLO,386,SVN,SI,224,SLO,EUR,SLOVENIA,2,Euro,978,Yes,Ljubljana,EU,.si,"sl,sh",3190538,2A 197 | Solomon Islands,Solomon Islands,Îles Salomon,SB,SLB,90,SLM,bp,SO,SB,677,SOL,BP,225,SOL,SBD,SOLOMON ISLANDS,2,Solomon Islands Dollar,90,Yes,Honiara,OC,.sb,"en-SB,tpi",2103350,D7 198 | Somalia,Somalia,Somalie,SO,SOM,706,SOM,so,SI,SO,252,SOM,SO,226,SOM,SOS,SOMALIA,2,Somali Shilling,706,Yes,Mogadishu,AF,.so,"so-SO,ar-SO,it,en-SO",51537,U1 199 | South Africa,South Africa,Afrique du Sud,ZA,ZAF,710,AFS,sa,ZA,ZA,27,RSA,SF,227,RSA,ZAR,SOUTH AFRICA,2,Rand,710,Yes,Pretoria,AF,.za,"zu,xh,af,nso,en-ZA,tn,st,ts,ss,ve,nr",953987,T3 200 | South Georgia & South Sandwich Islands,,,GS,SGS,239,,xs,,,500,,SX,228,,,,,,,Territory of GB,Grytviken,AN,.gs,en,3474415, 201 | South Korea,Republic of Korea,République de Corée,KR,KOR,410,KOR,ko,KO,ROK,82,KOR,KS,202,KOR,KRW,"KOREA, REPUBLIC OF",0,Won,410,Yes,Seoul,AS,.kr,"ko-KR,en",1835841,M5 202 | South Sudan,South Sudan,Soudan du Sud,SS,SSD,728,SSD,sd,,,211,,OD,,,SSP,SOUTH SUDAN,2,South Sudanese Pound,728,Yes,Juba,AF,,en,7909807, 203 | Spain,Spain,Espagne,ES,ESP,724,E,sp,SP,E,34,ESP,SP,229,ESP,EUR,SPAIN,2,Euro,978,Yes,Madrid,EU,.es,"es-ES,ca,gl,eu,oc",2510769,U3 204 | Sri Lanka,Sri Lanka,Sri Lanka,LK,LKA,144,CLN,ce,SB,CL,94,SRI,CE,231,SRI,LKR,SRI LANKA,2,Sri Lanka Rupee,144,Yes,Colombo,AS,.lk,"si,ta,en",1227603,F1 205 | St. Barthélemy,Saint Barthélemy,Saint-Barthélemy,BL,BLM,652,,sc,,,590,,TB,,,EUR,SAINT BARTHÉLEMY,2,Euro,978,Part of FR,Gustavia,NA,.gp,fr,3578476, 206 | St. Helena,Saint Helena,Sainte-Hélène,SH,SHN,654,SHN,xj,HE,SH,290 n,SHN,SH,207,HEL,SHP,"SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA",2,Saint Helena Pound,654,Territory of GB,Jamestown,AF,.sh,en-SH,3370751, 207 | St. Kitts & Nevis,Saint Kitts and Nevis,Saint-Kitts-et-Nevis,KN,KNA,659,KNA,xd,AT,KN,1-869,SKN,SC,208,SKN,XCD,SAINT KITTS AND NEVIS,2,East Caribbean Dollar,951,Yes,Basseterre,NA,.kn,en-KN,3575174,U7 208 | St. Lucia,Saint Lucia,Sainte-Lucie,LC,LCA,662,LCA,xk,LC,WL,1-758,LCA,ST,209,LCA,XCD,SAINT LUCIA,2,East Caribbean Dollar,951,Yes,Castries,NA,.lc,en-LC,3576468,U9 209 | St. Martin,Saint Martin (French part),Saint-Martin (partie française),MF,MAF,663,,st,,,590,,RN,,,EUR,SAINT MARTIN (FRENCH PART),2,Euro,978,Part of FR,Marigot,NA,.gp,fr,3578421, 210 | St. Pierre & Miquelon,Saint Pierre and Miquelon,Saint-Pierre-et-Miquelon,PM,SPM,666,SPM,xl,FP,F,508,SPM,SB,210,SPM,EUR,SAINT PIERRE AND MIQUELON,2,Euro,978,Part of FR,Saint-Pierre,NA,.pm,fr-PM,3424932,V0 211 | St. Vincent & Grenadines,Saint Vincent and the Grenadines,Saint-Vincent-et-les Grenadines,VC,VCT,670,VCT,xm,VG,WV,1-784,VIN,VC,211,VIN,XCD,SAINT VINCENT AND THE GRENADINES,2,East Caribbean Dollar,951,Yes,Kingstown,NA,.vc,"en-VC,fr",3577815,V1 212 | Sudan,Sudan,Soudan,SD,SDN,729,SDN,sj,SU,SUD,249,SUD,SU,40764,SUD,SDG,SUDAN,2,Sudanese Pound,938,Yes,Khartoum,AF,.sd,"ar-SD,en,fia",366755,V2 213 | Suriname,Suriname,Suriname,SR,SUR,740,SUR,sr,SM,SME,597,SUR,NS,233,SUR,SRD,SURINAME,2,Surinam Dollar,968,Yes,Paramaribo,SA,.sr,"nl-SR,en,srn,hns,jv",3382998,V3 214 | Svalbard & Jan Mayen,Svalbard and Jan Mayen Islands,Îles Svalbard-et-Jan Mayen,SJ,SJM,744,NOR,,SZ,,47,,"SV,JN",234,,NOK,SVALBARD AND JAN MAYEN,2,Norwegian Krone,578,Territory of NO,Longyearbyen,EU,.sj,"no,ru",607072,L9 215 | Swaziland,Swaziland,Swaziland,SZ,SWZ,748,SWZ,sq,SV,SD,268,SWZ,WZ,235,SWZ,SZL,SWAZILAND,2,Lilangeni,748,Yes,Mbabane,AF,.sz,"en-SZ,ss-SZ",934841,V6 216 | Sweden,Sweden,Suède,SE,SWE,752,S,sw,SN,S,46,SWE,SW,236,SWE,SEK,SWEDEN,2,Swedish Krona,752,Yes,Stockholm,EU,.se,"sv-SE,se,sma,fi-SE",2661886,V7 217 | Switzerland,Switzerland,Suisse,CH,CHE,756,SUI,sz,SW,CH,41,SUI,SZ,237,SUI,CHF,SWITZERLAND,2,Swiss Franc,756,Yes,Bern,EU,.ch,"de-CH,fr-CH,it-CH,rm",2658434,V8 218 | Syria,Syrian Arab Republic,République arabe syrienne,SY,SYR,760,SYR,sy,SY,SYR,963,SYR,SY,238,SYR,SYP,SYRIAN ARAB REPUBLIC,2,Syrian Pound,760,Yes,Damascus,AS,.sy,"ar-SY,ku,hy,arc,fr,en",163843,V9 219 | São Tomé & Príncipe,Sao Tome and Principe,Sao Tomé-et-Principe,ST,STP,678,STP,sf,TP,ST,239,STP,TP,214,STP,STD,SAO TOME AND PRINCIPE,2,Dobra,678,Yes,Sao Tome,AF,.st,pt-ST,2410758,S9 220 | Taiwan,,,TW,TWN,158,,ch,,RC,886,TPE,TW,925,TPE,,,,,,Yes,Taipei,AS,.tw,"zh-TW,zh,nan,hak",1668284, 221 | Tajikistan,Tajikistan,Tadjikistan,TJ,TJK,762,TJK,ta,TA,TJ,992,TJK,TI,239,TJK,TJS,TAJIKISTAN,2,Somoni,972,Yes,Dushanbe,AS,.tj,"tg,ru",1220409,2D 222 | Tanzania,United Republic of Tanzania,République-Unie de Tanzanie,TZ,TZA,834,TZA,tz,TN,EAT,255,TAN,TZ,257,TAN,TZS,"TANZANIA, UNITED REPUBLIC OF",2,Tanzanian Shilling,834,Yes,Dodoma,AF,.tz,"sw-TZ,en,ar",149590,W0 223 | Thailand,Thailand,Thaïlande,TH,THA,764,THA,th,TH,T,66,THA,TH,240,THA,THB,THAILAND,2,Baht,764,Yes,Bangkok,AS,.th,"th,en",1605651,W1 224 | Timor-Leste,Timor-Leste,Timor-Leste,TL,TLS,626,TLS,em,TM,RI,670,TLS,TT,242,TLS,USD,TIMOR-LESTE,2,US Dollar,840,Yes,Dili,OC,.tl,"tet,pt-TL,id,en",1966436,Z3 225 | Togo,Togo,Togo,TG,TGO,768,TGO,tg,TG,TG,228,TOG,TO,243,TOG,XOF,TOGO,0,CFA Franc BCEAO,952,Yes,Lome,AF,.tg,"fr-TG,ee,hna,kbp,dag,ha",2363686,W2 226 | Tokelau,Tokelau,Tokelau,TK,TKL,772,TKL,tl,TK,NZ,690,TKL,TL,244,,NZD,TOKELAU,2,New Zealand Dollar,554,Territory of NZ,,OC,.tk,"tkl,en-TK",4031074,W3 227 | Tonga,Tonga,Tonga,TO,TON,776,TON,to,TO,TO,676,TGA,TN,245,TGA,TOP,TONGA,2,Pa’anga,776,Yes,Nuku'alofa,OC,.to,"to,en-TO",4032283,W4 228 | Trinidad & Tobago,Trinidad and Tobago,Trinité-et-Tobago,TT,TTO,780,TRD,tr,TD,TT,1-868,TRI,TD,246,TTO,TTD,TRINIDAD AND TOBAGO,2,Trinidad and Tobago Dollar,780,Yes,Port of Spain,NA,.tt,"en-TT,hns,fr,es,zh",3573591,W5 229 | Tunisia,Tunisia,Tunisie,TN,TUN,788,TUN,ti,TS,TN,216,TUN,TS,248,TUN,TND,TUNISIA,3,Tunisian Dinar,788,Yes,Tunis,AF,.tn,"ar-TN,fr",2464461,W6 230 | Turkey,Turkey,Turquie,TR,TUR,792,TUR,tu,TU,TR,90,TUR,TU,249,TUR,TRY,TURKEY,2,Turkish Lira,949,Yes,Ankara,AS,.tr,"tr-TR,ku,diq,az,av",298795,W8 231 | Turkmenistan,Turkmenistan,Turkménistan,TM,TKM,795,TKM,tk,TR,TM,993,TKM,TX,250,TKM,TMT,TURKMENISTAN,2,Turkmenistan New Manat,934,Yes,Ashgabat,AS,.tm,"tk,ru,uz",1218197,2E 232 | Turks & Caicos Islands,Turks and Caicos Islands,Îles Turques-et-Caïques,TC,TCA,796,TCA,tc,TI,,1-649,TCA,TK,251,TKS,USD,TURKS AND CAICOS ISLANDS,2,US Dollar,840,Territory of GB,Cockburn Town,NA,.tc,en-TC,3576916,W7 233 | Tuvalu,Tuvalu,Tuvalu,TV,TUV,798,TUV,tv,TV,TV,688,TUV,TV,252,TUV,AUD,TUVALU,2,Australian Dollar,36,Yes,Funafuti,OC,.tv,"tvl,en,sm,gil",2110297,2G 234 | U.S. Outlying Islands,,,UM,UMI,581,,"ji,xf,wk,uc,up",,USA,,,"FQ,HQ,DQ,JQ,KQ,MQ,BQ,LQ,WQ",,,,,,,,Territories of US,,OC,.um,en-UM,5854968, 235 | U.S. Virgin Islands,United States Virgin Islands,Îles Vierges américaines,VI,VIR,850,VIR,vi,VI,USA,1-340,VIR,VQ,258,ISV,USD,VIRGIN ISLANDS (U.S.),2,US Dollar,840,Territory of US,Charlotte Amalie,NA,.vi,en-VI,4796775, 236 | UK,United Kingdom of Great Britain and Northern Ireland,Royaume-Uni de Grande-Bretagne et d'Irlande du Nord,GB,GBR,826,G,xxk,UK,GB,44,"ENG,NIR,SCO,WAL",UK,256,GBR,GBP,UNITED KINGDOM,2,Pound Sterling,826,Yes,London,EU,.uk,"en-GB,cy-GB,gd",2635167,X0 237 | US,United States of America,États-Unis d'Amérique,US,USA,840,USA,xxu,US,USA,1,USA,US,259,USA,USD,UNITED STATES,2,US Dollar,840,Yes,Washington,NA,.us,"en-US,es-US,haw,fr",6252001, 238 | Uganda,Uganda,Ouganda,UG,UGA,800,UGA,ug,UG,EAU,256,UGA,UG,253,UGA,UGX,UGANDA,0,Uganda Shilling,800,Yes,Kampala,AF,.ug,"en-UG,lg,sw,ar",226074,W9 239 | Ukraine,Ukraine,Ukraine,UA,UKR,804,UKR,un,UR,UA,380,UKR,UP,254,UKR,UAH,UKRAINE,2,Hryvnia,980,Yes,Kiev,EU,.ua,"uk,ru-UA,rom,pl,hu",690791,2H 240 | United Arab Emirates,United Arab Emirates,Émirats arabes unis,AE,ARE,784,UAE,ts,ER,,971,UAE,AE,255,UAE,AED,UNITED ARAB EMIRATES,2,UAE Dirham,784,Yes,Abu Dhabi,AS,.ae,"ar-AE,fa,en,hi,ur",290557,C0 241 | Uruguay,Uruguay,Uruguay,UY,URY,858,URG,uy,UY,ROU,598,URU,UY,260,URU,UYU,URUGUAY,2,Peso Uruguayo,858,Yes,Montevideo,SA,.uy,es-UY,3439705,X3 242 | Uzbekistan,Uzbekistan,Ouzbékistan,UZ,UZB,860,UZB,uz,UZ,UZ,998,UZB,UZ,261,UZB,UZS,UZBEKISTAN,2,Uzbekistan Sum,860,Yes,Tashkent,AS,.uz,"uz,ru,tg",1512440,2K 243 | Vanuatu,Vanuatu,Vanuatu,VU,VUT,548,VUT,nn,NV,VU,678,VAN,NH,262,VAN,VUV,VANUATU,0,Vatu,548,Yes,Port Vila,OC,.vu,"bi,en-VU,fr-VU",2134431,2L 244 | Vatican City,Holy See,Saint-Siège,VA,VAT,336,CVA,vc,,V,39-06,VAT,VT,110,,EUR,HOLY SEE (VATICAN CITY STATE),2,Euro,978,Yes,Vatican City,EU,.va,"la,it,fr",3164670,X4 245 | Venezuela,Venezuela (Bolivarian Republic of),Venezuela (République bolivarienne du),VE,VEN,862,VEN,ve,VN,YV,58,VEN,VE,263,VEN,VEF,"VENEZUELA, BOLIVARIAN REPUBLIC OF",2,Bolivar,937,Yes,Caracas,SA,.ve,es-VE,3625428, 246 | Vietnam,Viet Nam,Viet Nam,VN,VNM,704,VTN,vm,VS,VN,84,VIE,VM,264,VIE,VND,VIET NAM,0,Dong,704,Yes,Hanoi,AS,.vn,"vi,en,fr,zh,km",1562822,Q1 247 | Wallis & Futuna,Wallis and Futuna Islands,Îles Wallis-et-Futuna,WF,WLF,876,WAL,wf,FW,F,681,WLF,WF,266,WAF,XPF,WALLIS AND FUTUNA,0,CFP Franc,953,Territory of FR,Mata Utu,OC,.wf,"wls,fud,fr-WF",4034749,X8 248 | Western Sahara,Western Sahara,Sahara occidental,EH,ESH,732,AOE,ss,,,212,SAH,WI,268,,MAD,WESTERN SAHARA,2,Moroccan Dirham,504,In contention,El-Aaiun,AF,.eh,"ar,mey",2461445,U5 249 | Yemen,Yemen,Yémen,YE,YEM,887,YEM,ye,YE,YAR,967,YEM,YM,269,YEM,YER,YEMEN,2,Yemeni Rial,886,Yes,Sanaa,AS,.ye,ar-YE,69543,T7 250 | Zambia,Zambia,Zambie,ZM,ZMB,894,ZMB,za,ZB,Z,260,ZAM,ZA,270,ZAM,ZMW,ZAMBIA,2,Zambian Kwacha,967,Yes,Lusaka,AF,.zm,"en-ZM,bem,loz,lun,lue,ny,toi",895949,Y4 251 | Zimbabwe,Zimbabwe,Zimbabwe,ZW,ZWE,716,ZWE,rh,ZW,ZW,263,ZIM,ZI,271,ZIM,ZWL,ZIMBABWE,2,Zimbabwe Dollar,932,Yes,Harare,AF,.zw,"en-ZW,sn,nr,nd",878675,Y5 252 | Åland Islands,Åland Islands,Îles d'Åland,AX,ALA,248,,,,FIN,358,ALD,,1242,,EUR,ÅLAND ISLANDS,2,Euro,978,Part of FI,Mariehamn,EU,.ax,sv-AX,661882, 253 | -------------------------------------------------------------------------------- /live16_clustering_texto/README: -------------------------------------------------------------------------------- 1 | Material para a Live 16 2 | Como Descobrir Padrões em Textos Usando Clustering 3 | https://youtu.be/liQJHtxf-nE 4 | -------------------------------------------------------------------------------- /live16_clustering_texto/nb1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 33, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "%matplotlib inline\n", 12 | "\n", 13 | "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n", 14 | "from sklearn.metrics import silhouette_score\n", 15 | "from sklearn.cluster import KMeans, DBSCAN, SpectralClustering" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Quer aprender como eu faço um projeto de Data Science da ideia ao produto final? Acesse CursoDeDataScience.com" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "https://www.kaggle.com/lukeimurfather/tweets" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "tweets = pd.read_csv(\"nCoV_tweets.csv\", index_col=0, parse_dates=['dt'])" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 129, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "docs = [\"curso de data, dAta science\", \n", 48 | " \"tutorial de data analysis\", \n", 49 | " \"não sei mais o que escrever analysis\"]" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 133, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/html": [ 60 | "
\n", 61 | "\n", 74 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | "
analysisdata
curso de data, dAta science02
tutorial de data analysis11
não sei mais o que escrever analysis10
\n", 100 | "
" 101 | ], 102 | "text/plain": [ 103 | " analysis data\n", 104 | "curso de data, dAta science 0 2\n", 105 | "tutorial de data analysis 1 1\n", 106 | "não sei mais o que escrever analysis 1 0" 107 | ] 108 | }, 109 | "execution_count": 133, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "bag_of_words_transformer = CountVectorizer(binary=False, analyzer='word', stop_words=['de', 'que'], ngram_range=(1,1), min_df=2)\n", 116 | "mx = bag_of_words_transformer.fit_transform(docs).todense()\n", 117 | "terms = bag_of_words_transformer.get_feature_names()\n", 118 | "pd.DataFrame(mx, columns=terms, index=docs)\n", 119 | "\n", 120 | "\n", 121 | "#lowercase\n", 122 | "#analyzer='char'\n", 123 | "#strip_accents='unicode'\n", 124 | "#binary=True\n", 125 | "\n", 126 | "#stop_words=['de']\n", 127 | "#ngram_range=(1,1)\n", 128 | "#min_df\n", 129 | "\n", 130 | "#unigrama, bigrama, trigrama, " 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "tf-idf ~= frequencia do palavra no documento * inverso da frequencia da palavra em todos os documentos" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 137, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/html": [ 150 | "
\n", 151 | "\n", 164 | "\n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | "
analysiscursodatadeescrevermaisnãoquescienceseitutorial
curso de data, dAta science0.0000000.4521230.6877030.3438510.0000000.0000000.0000000.0000000.4521230.0000000.000000
tutorial de data analysis0.4598540.0000000.4598540.4598540.0000000.0000000.0000000.0000000.0000000.0000000.604652
não sei mais o que escrever analysis0.3220020.0000000.0000000.0000000.4233940.4233940.4233940.4233940.0000000.4233940.000000
\n", 226 | "
" 227 | ], 228 | "text/plain": [ 229 | " analysis curso data de \\\n", 230 | "curso de data, dAta science 0.000000 0.452123 0.687703 0.343851 \n", 231 | "tutorial de data analysis 0.459854 0.000000 0.459854 0.459854 \n", 232 | "não sei mais o que escrever analysis 0.322002 0.000000 0.000000 0.000000 \n", 233 | "\n", 234 | " escrever mais não que \\\n", 235 | "curso de data, dAta science 0.000000 0.000000 0.000000 0.000000 \n", 236 | "tutorial de data analysis 0.000000 0.000000 0.000000 0.000000 \n", 237 | "não sei mais o que escrever analysis 0.423394 0.423394 0.423394 0.423394 \n", 238 | "\n", 239 | " science sei tutorial \n", 240 | "curso de data, dAta science 0.452123 0.000000 0.000000 \n", 241 | "tutorial de data analysis 0.000000 0.000000 0.604652 \n", 242 | "não sei mais o que escrever analysis 0.000000 0.423394 0.000000 " 243 | ] 244 | }, 245 | "execution_count": 137, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "bag_of_words_transformer = TfidfVectorizer()\n", 252 | "mx = bag_of_words_transformer.fit_transform(docs).todense()\n", 253 | "terms = bag_of_words_transformer.get_feature_names()\n", 254 | "pd.DataFrame(mx, columns=terms, index=docs)\n", 255 | "\n", 256 | "\n", 257 | "# norm = Each output row will have unit norm, either: * ‘l2’: Sum of squares of vector elements is 1.\n", 258 | "#The cosine similarity between two vectors is their dot product when l2 norm has been applied. \n", 259 | "#* ‘l1’: Sum of absolute values of vector elements is 1. See preprocessing.normalize.\n", 260 | "\n", 261 | "#use_idf = False \n", 262 | "#use_idf = False e norm = l1, frequência simples" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 6, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "from nltk.stem import SnowballStemmer" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 139, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "text/plain": [ 282 | "'não sei mais o que escrever analys'" 283 | ] 284 | }, 285 | "execution_count": 139, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "stemmer.stem(docs[2])" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 7, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "data": { 301 | "text/plain": [ 302 | "('analis', 'analis')" 303 | ] 304 | }, 305 | "execution_count": 7, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "stemmer = SnowballStemmer(language='portuguese')\n", 312 | "stemmer.stem(\"analisado\"), stemmer.stem(\"analise\")" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 153, 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "(6706, 618)" 324 | ] 325 | }, 326 | "execution_count": 153, 327 | "metadata": {}, 328 | "output_type": "execute_result" 329 | } 330 | ], 331 | "source": [ 332 | "bag_of_words_transformer = CountVectorizer(min_df=4, stop_words='english', ngram_range=(3,3))\n", 333 | "#ngram range 2,2, 3,3\n", 334 | "# stem\n", 335 | "mx = bag_of_words_transformer.fit_transform(tweets['txt'])#.todense()\n", 336 | "mx.shape" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 154, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "from sklearn.pipeline import make_pipeline\n", 346 | "from sklearn.preprocessing import MaxAbsScaler" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 155, 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "name": "stdout", 356 | "output_type": "stream", 357 | "text": [ 358 | "K = 2 - Silhouette: 0.7849594690794792\n", 359 | "K = 3 - Silhouette: 0.7872649732846221\n", 360 | "K = 4 - Silhouette: 0.7899806458433944\n", 361 | "K = 5 - Silhouette: 0.7916851880937629\n", 362 | "K = 6 - Silhouette: 0.7917029047299742\n", 363 | "K = 7 - Silhouette: 0.7967897372340083\n", 364 | "K = 8 - Silhouette: 0.7965854032241488\n", 365 | "K = 9 - Silhouette: 0.7980160269702951\n" 366 | ] 367 | } 368 | ], 369 | "source": [ 370 | "for k in range(2,10):\n", 371 | " cluster = make_pipeline(MaxAbsScaler(), KMeans(n_clusters=k, random_state=0))\n", 372 | " cluster.fit(mx)\n", 373 | " p = cluster.predict(mx)\n", 374 | " \n", 375 | " sil = silhouette_score(mx, p)\n", 376 | " print(\"K = {} - Silhouette: {}\".format(k, sil))" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 156, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "terms = bag_of_words_transformer.get_feature_names()" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 157, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "name": "stdout", 395 | "output_type": "stream", 396 | "text": [ 397 | "\n", 398 | "Cluster 0 - Size 6453\n", 399 | "coronavirus update wuhan 0.002015\n", 400 | "coronavirus death toll 0.002170\n", 401 | "30 hours birth 0.002325\n", 402 | "accidentally leaked real 0.002479\n", 403 | "guan zhuang bing 0.002634\n", 404 | "tencent accidentally leaked 0.002634\n", 405 | "zhuang bing du 0.002634\n", 406 | "just 30 hours 0.002789\n", 407 | "novel coronavirus 2019 0.002944\n", 408 | "cruise ship japan 0.003099\n", 409 | "2019 novel coronavirus 0.003254\n", 410 | "news china coronavirus 0.003409\n", 411 | "world health organization 0.003719\n", 412 | "coronavirus coronaoutbreak coronanews 0.004184\n", 413 | "coronaoutbreak coronanews ncov2019 0.004184\n", 414 | "coronavirus asiannetwalking https 0.004339\n", 415 | "health coronavirus asiannetwalking 0.004339\n", 416 | "amid coronavirus outbreak 0.005114\n", 417 | "coronavirus 2019 ncov 0.005114\n", 418 | "coronavirus outbreak https 0.007438\n", 419 | "dtype: float64\n", 420 | "\n", 421 | "Cluster 1 - Size 14\n", 422 | "coronavirus vaccine https 0.000000\n", 423 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n", 424 | "coronavirus wuhan 2019ncov 0.000000\n", 425 | "coronavirus wuhan https 0.000000\n", 426 | "gt https sdfcrodiom 0.071429\n", 427 | "efzbdv4cot details gt 0.071429\n", 428 | "https efzbdv4cot details 0.071429\n", 429 | "https n8owlcko0x https 0.142857\n", 430 | "gt https n8owlcko0x 0.142857\n", 431 | "gt https 9orx4j6buu 0.214286\n", 432 | "https 9orx4j6buu https 0.214286\n", 433 | "answered survive details 0.428571\n", 434 | "outbreak answered survive 0.428571\n", 435 | "survive details gt 0.428571\n", 436 | "outbreak answered details 0.500000\n", 437 | "answered details gt 0.500000\n", 438 | "biggest questions outbreak 1.000000\n", 439 | "questions outbreak answered 1.000000\n", 440 | "details gt https 1.000000\n", 441 | "coronavirus biggest questions 1.000000\n", 442 | "dtype: float64\n", 443 | "\n", 444 | "Cluster 2 - Size 9\n", 445 | "coronavirus vaccine breakthrough 0.000000\n", 446 | "coronaviruswuhan coronavirusoutbreak https 0.000000\n", 447 | "coronavirus vaccine https 0.000000\n", 448 | "coronavirus wuhan 2019ncov 0.000000\n", 449 | "zone china sure 0.000000\n", 450 | "coronavirus wuhan https 0.000000\n", 451 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n", 452 | "coronavirusoutbreak china coronavirus 0.000000\n", 453 | "coronavirusoutbreak coronavirus https 0.000000\n", 454 | "details click https 0.444444\n", 455 | "anti pollution clean 1.000000\n", 456 | "pollution clean air 1.000000\n", 457 | "oxybreath pro highly 1.000000\n", 458 | "breathing mask details 1.000000\n", 459 | "clean air breathing 1.000000\n", 460 | "pro highly effective 1.000000\n", 461 | "highly effective anti 1.000000\n", 462 | "air breathing mask 1.000000\n", 463 | "effective anti pollution 1.000000\n", 464 | "mask details click 1.000000\n", 465 | "dtype: float64\n", 466 | "\n", 467 | "Cluster 3 - Size 14\n", 468 | "coronavirus survive details 0.0\n", 469 | "coronavirus wuhan https 0.0\n", 470 | "coronavirus wuhan 2019ncov 0.0\n", 471 | "coronavirus vaccine https 0.0\n", 472 | "coronavirus vaccine breakthrough 0.0\n", 473 | "coronavirus updates live 0.0\n", 474 | "coronavirusoutbreak safety tips 0.0\n", 475 | "coronavirus transmitted people 0.0\n", 476 | "coronavirus travel ban 0.0\n", 477 | "coronavirus update china 0.0\n", 478 | "year old woman 1.0\n", 479 | "woman 15th person 1.0\n", 480 | "coronavirus fifth queensland 1.0\n", 481 | "37 year old 1.0\n", 482 | "diagnosed coronavirus fifth 1.0\n", 483 | "person australia diagnosed 1.0\n", 484 | "15th person australia 1.0\n", 485 | "australia diagnosed coronavirus 1.0\n", 486 | "fifth queensland https 1.0\n", 487 | "old woman 15th 1.0\n", 488 | "dtype: float64\n", 489 | "\n", 490 | "Cluster 4 - Size 18\n", 491 | "coronavirus survive details 0.000000\n", 492 | "coronavirus transmitted people 0.000000\n", 493 | "coronavirus travel ban 0.000000\n", 494 | "coronavirusoutbreak coronavirus https 0.000000\n", 495 | "coronavirus update wuhan 0.000000\n", 496 | "coronavirus update china 0.000000\n", 497 | "coronavirus vaccine breakthrough 0.000000\n", 498 | "coronavirus vaccine https 0.000000\n", 499 | "coronavirus wuhan 2019ncov 0.000000\n", 500 | "coronavirus wuhan https 0.000000\n", 501 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n", 502 | "coronavirus updates live 0.000000\n", 503 | "zone china sure 0.000000\n", 504 | "whatsapp 0555171905 https 0.777778\n", 505 | "days vals surprise 0.777778\n", 506 | "0205414305or whatsapp 0555171905 0.944444\n", 507 | "reach 0205414305or whatsapp 1.000000\n", 508 | "special reach 0205414305or 1.000000\n", 509 | "surprise special reach 1.000000\n", 510 | "vals surprise special 1.000000\n", 511 | "dtype: float64\n", 512 | "\n", 513 | "Cluster 5 - Size 12\n", 514 | "cruelty stop eating 0.0\n", 515 | "coronavirus vaccine https 0.0\n", 516 | "coronavirus wuhan 2019ncov 0.0\n", 517 | "coronavirus wuhan https 0.0\n", 518 | "coronavirus wuhancoronavirus wuhanvirus 0.0\n", 519 | "coronavirusoutbreak china coronavirus 0.0\n", 520 | "coronavirusoutbreak coronavirus https 0.0\n", 521 | "coronavirusoutbreak safety tips 0.0\n", 522 | "coronaviruswuhan coronavirusoutbreak https 0.0\n", 523 | "coronavirus epidemic https 0.0\n", 524 | "help publishing link 1.0\n", 525 | "publishing link help 1.0\n", 526 | "syrie est dur 1.0\n", 527 | "est dur https 1.0\n", 528 | "awareness suffering syria 1.0\n", 529 | "link help raise 1.0\n", 530 | "suffering syria syrie 1.0\n", 531 | "syria syrie est 1.0\n", 532 | "raise awareness suffering 1.0\n", 533 | "help raise awareness 1.0\n", 534 | "dtype: float64\n", 535 | "\n", 536 | "Cluster 6 - Size 11\n", 537 | "coronavirus updates live 0.000000\n", 538 | "coronavirus vaccine https 0.000000\n", 539 | "coronavirus wuhan 2019ncov 0.000000\n", 540 | "coronavirus vaccine breakthrough 0.000000\n", 541 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n", 542 | "coronavirusoutbreak china coronavirus 0.000000\n", 543 | "coronavirusoutbreak coronavirus https 0.000000\n", 544 | "coronavirusoutbreak safety tips 0.000000\n", 545 | "coronavirus wuhan https 0.000000\n", 546 | "bing du https 0.181818\n", 547 | "guan zhuang bing 0.181818\n", 548 | "zhuang bing du 0.181818\n", 549 | "coronavirus statistics https 0.454545\n", 550 | "leak true terrifying 0.909091\n", 551 | "terrifying coronavirus statistics 0.909091\n", 552 | "true terrifying coronavirus 0.909091\n", 553 | "china tencent accidentally 1.000000\n", 554 | "tencent accidentally leak 1.000000\n", 555 | "did china tencent 1.000000\n", 556 | "accidentally leak true 1.000000\n", 557 | "dtype: float64\n", 558 | "\n", 559 | "Cluster 7 - Size 168\n", 560 | "kits virus update 0.071429\n", 561 | "test lab opens 0.071429\n", 562 | "opens cdc ships 0.071429\n", 563 | "lab opens cdc 0.071429\n", 564 | "wuhan test lab 0.071429\n", 565 | "https 9orx4j6buu https 0.077381\n", 566 | "virus coronavirus https 0.083333\n", 567 | "efzbdv4cot details gt 0.083333\n", 568 | "https efzbdv4cot details 0.083333\n", 569 | "n8owlcko0x virus https 0.101190\n", 570 | "gt https qvezftkwkq 0.113095\n", 571 | "coronavirus sars flu 0.119048\n", 572 | "9orx4j6buu virus https 0.142857\n", 573 | "https n8owlcko0x virus 0.154762\n", 574 | "virus coronavirus sars 0.160714\n", 575 | "gt https n8owlcko0x 0.196429\n", 576 | "https 9orx4j6buu virus 0.196429\n", 577 | "survive details gt 0.244048\n", 578 | "gt https 9orx4j6buu 0.273810\n", 579 | "details gt https 1.000000\n", 580 | "dtype: float64\n", 581 | "\n", 582 | "Cluster 8 - Size 7\n", 583 | "coronavirus update china 0.000000\n", 584 | "coronavirus update wuhan 0.000000\n", 585 | "coronavirus vaccine breakthrough 0.000000\n", 586 | "coronaviruswuhan coronavirusoutbreak https 0.000000\n", 587 | "coronavirus wuhan 2019ncov 0.000000\n", 588 | "zone china sure 0.000000\n", 589 | "coronavirus wuhan https 0.000000\n", 590 | "coronavirus wuhancoronavirus wuhanvirus 0.000000\n", 591 | "coronavirusoutbreak china coronavirus 0.000000\n", 592 | "coronavirusoutbreak coronavirus https 0.000000\n", 593 | "coronavirusoutbreak safety tips 0.000000\n", 594 | "rate recovery rate 0.714286\n", 595 | "fully automated live 0.857143\n", 596 | "mortality rate recovery 0.857143\n", 597 | "automated live coronavirus 0.857143\n", 598 | "live statistics mortality 1.000000\n", 599 | "coronavirus updates live 1.000000\n", 600 | "updates live statistics 1.000000\n", 601 | "statistics mortality rate 1.000000\n", 602 | "live coronavirus updates 1.000000\n", 603 | "dtype: float64\n" 604 | ] 605 | } 606 | ], 607 | "source": [ 608 | "k = 9\n", 609 | "cluster = make_pipeline(MaxAbsScaler(), KMeans(n_clusters=k, random_state=0))\n", 610 | "cluster.fit(mx)\n", 611 | "p = cluster.predict(mx)\n", 612 | "\n", 613 | "for c in np.unique(p):\n", 614 | " print(\"\\nCluster {} - Size {}\".format(c, (p == c).sum()))\n", 615 | " rank = pd.Series(np.array(mx[p==c].mean(axis=0)).squeeze(), index=terms).sort_values().tail(20)\n", 616 | " print(rank)" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 147, 622 | "metadata": {}, 623 | "outputs": [ 624 | { 625 | "name": "stdout", 626 | "output_type": "stream", 627 | "text": [ 628 | "\n", 629 | "Cluster 0\n", 630 | "test 0.058252\n", 631 | "lab 0.058252\n", 632 | "china 0.058252\n", 633 | "experts 0.063107\n", 634 | "sars 0.065534\n", 635 | "symptoms 0.067961\n", 636 | "questions 0.067961\n", 637 | "answered 0.067961\n", 638 | "biggest 0.067961\n", 639 | "efzbdv4cot 0.087379\n", 640 | "qvezftkwkq 0.092233\n", 641 | "outbreak 0.094660\n", 642 | "survive 0.114078\n", 643 | "n8owlcko0x 0.169903\n", 644 | "virus 0.174757\n", 645 | "9orx4j6buu 0.237864\n", 646 | "coronavirus 0.260922\n", 647 | "gt 0.294498\n", 648 | "https 0.588997\n", 649 | "details 1.000000\n", 650 | "dtype: float64\n", 651 | "\n", 652 | "Cluster 1\n", 653 | "amp 0.015000\n", 654 | "case 0.015846\n", 655 | "thanks 0.016000\n", 656 | "good 0.016308\n", 657 | "just 0.017462\n", 658 | "cases 0.017590\n", 659 | "virus 0.017692\n", 660 | "2020 0.018000\n", 661 | "chinese 0.018154\n", 662 | "coronavirusoutbreak 0.018308\n", 663 | "health 0.019077\n", 664 | "quarantine 0.020000\n", 665 | "spread 0.020615\n", 666 | "people 0.021538\n", 667 | "wuhan 0.022359\n", 668 | "latest 0.024462\n", 669 | "outbreak 0.024769\n", 670 | "china 0.058000\n", 671 | "coronavirus 0.152808\n", 672 | "https 0.272308\n", 673 | "dtype: float64\n" 674 | ] 675 | } 676 | ], 677 | "source": [ 678 | "centroids = cluster.named_steps['kmeans'].cluster_centers_\n", 679 | "for c in range(centroids.shape[0]):\n", 680 | " print(\"\\nCluster {}\".format(c))\n", 681 | " rank = pd.Series(centroids[c, :], index=terms).sort_values().tail(20)\n", 682 | " print(rank)" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 158, 688 | "metadata": {}, 689 | "outputs": [ 690 | { 691 | "data": { 692 | "text/plain": [ 693 | "array([[0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n", 694 | " 2.7774603 ],\n", 695 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n", 696 | " 2.7774603 ],\n", 697 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n", 698 | " 2.7774603 ],\n", 699 | " ...,\n", 700 | " [1.73034273, 2.86338604, 3.63284061, ..., 3.12811828, 2.12630845,\n", 701 | " 3.27326835],\n", 702 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n", 703 | " 2.7774603 ],\n", 704 | " [0.0243835 , 2.2801271 , 3.1933573 , ..., 2.60482705, 1.23336436,\n", 705 | " 2.7774603 ]])" 706 | ] 707 | }, 708 | "execution_count": 158, 709 | "metadata": {}, 710 | "output_type": "execute_result" 711 | } 712 | ], 713 | "source": [ 714 | "cluster.transform(mx)" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 159, 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [ 723 | "tweets['cluster'] = p" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": 160, 729 | "metadata": {}, 730 | "outputs": [ 731 | { 732 | "name": "stdout", 733 | "output_type": "stream", 734 | "text": [ 735 | "Cluster 0 = what the actual -\n", 736 | "\n", 737 | "Cluster 1 = What is coronavirus? The biggest questions about the outbreak, answered. SURVIVE SEE DETAILS AT ==>... https://t.co/21AGGnBj58\n", 738 | "\n", 739 | "Cluster 2 = @IsChinar OxyBreath Pro\n", 740 | "Highly Effective Anti-Pollution Clean Air Breathing Mask.\n", 741 | "Full details please click on a li... https://t.co/uWq9DZ9XI9\n", 742 | "\n", 743 | "Cluster 3 = A 37-year-old woman has become the 15th person in Australia diagnosed with coronavirus - the fifth in Queensland.... https://t.co/UtfPvHwjvR\n", 744 | "\n", 745 | "Cluster 4 = Vals Is Here Surprise That Special Someone Now\n", 746 | "\n", 747 | "You can reach Us On 0205414305or WhatsApp 0555171905 \n", 748 | "For The Bes... https://t.co/E9CSZvxFCT\n", 749 | "\n", 750 | "Cluster 5 = \n", 751 | "\n", 752 | "Can you help us by publishing this link to help raise Awareness of the suffering in #Syria\n", 753 | "\n", 754 | "#Syrie\n", 755 | "C'est dur a r... https://t.co/FnS9hjdXFo\n", 756 | "\n", 757 | "Cluster 6 = Did China's Tencent Accidentally Leak The True Terrifying #Coronavirus Statistics https://t.co/gyd8C00Pxg\n", 758 | "\n", 759 | "Cluster 7 = Coronavirus Latest Updates: Everything You Need to Know SEE DETAILS AT ==> https://t.co/9orX4j6BuU #virus... https://t.co/EZkVAQuI3G\n", 760 | "\n", 761 | "Cluster 8 = @ABSCBNNews @raphbosano Fully Automated Live #CoronaVirus Updates. \n", 762 | "* Live Statistics: Mortality rate, recovery rat... https://t.co/RXbHWuWM58\n", 763 | "\n" 764 | ] 765 | } 766 | ], 767 | "source": [ 768 | "for c in np.unique(p):\n", 769 | " print('Cluster {} = {}'.format(c, tweets[tweets['cluster'] == c]['txt'].iloc[0]))\n", 770 | " print()\n", 771 | " " 772 | ] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": null, 777 | "metadata": {}, 778 | "outputs": [], 779 | "source": [] 780 | } 781 | ], 782 | "metadata": { 783 | "kernelspec": { 784 | "display_name": "Python 3", 785 | "language": "python", 786 | "name": "python3" 787 | }, 788 | "language_info": { 789 | "codemirror_mode": { 790 | "name": "ipython", 791 | "version": 3 792 | }, 793 | "file_extension": ".py", 794 | "mimetype": "text/x-python", 795 | "name": "python", 796 | "nbconvert_exporter": "python", 797 | "pygments_lexer": "ipython3", 798 | "version": "3.7.3" 799 | } 800 | }, 801 | "nbformat": 4, 802 | "nbformat_minor": 4 803 | } 804 | -------------------------------------------------------------------------------- /live17_stacking/README: -------------------------------------------------------------------------------- 1 | Arquivos para a live 17 - Extraia o poder máximo dos seus modelos usando Stacking Ensembles 2 | https://youtu.be/TSoQGRhhHBE 3 | -------------------------------------------------------------------------------- /live17_stacking/nb1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Procurando um curso prático de Data Science que cabe no seu bolso?\n", 18 | "# CursoDeDataScience.com\n", 19 | "# Começa 20:05 - não pule!" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 87, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "train = pd.read_csv(\"train.csv\")\n", 29 | "test = pd.read_csv(\"test.csv\")\n", 30 | "train['Sex_binario'] = train['Sex'].map({\"male\": 0, \"female\": 1})\n", 31 | "\n", 32 | "test['Sex_binario'] = test['Sex'].map({\"male\": 0, \"female\": 1})" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 88, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "
\n", 44 | "\n", 57 | "\n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedSex_binario
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS0
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C1
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS1
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S1
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS0
\n", 159 | "
" 160 | ], 161 | "text/plain": [ 162 | " PassengerId Survived Pclass \\\n", 163 | "0 1 0 3 \n", 164 | "1 2 1 1 \n", 165 | "2 3 1 3 \n", 166 | "3 4 1 1 \n", 167 | "4 5 0 3 \n", 168 | "\n", 169 | " Name Sex Age SibSp \\\n", 170 | "0 Braund, Mr. Owen Harris male 22.0 1 \n", 171 | "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", 172 | "2 Heikkinen, Miss. Laina female 26.0 0 \n", 173 | "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", 174 | "4 Allen, Mr. William Henry male 35.0 0 \n", 175 | "\n", 176 | " Parch Ticket Fare Cabin Embarked Sex_binario \n", 177 | "0 0 A/5 21171 7.2500 NaN S 0 \n", 178 | "1 0 PC 17599 71.2833 C85 C 1 \n", 179 | "2 0 STON/O2. 3101282 7.9250 NaN S 1 \n", 180 | "3 0 113803 53.1000 C123 S 1 \n", 181 | "4 0 373450 8.0500 NaN S 0 " 182 | ] 183 | }, 184 | "execution_count": 88, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "train.head()" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 82, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "X = train.select_dtypes(include=np.number).drop([\"PassengerId\", 'Survived'], axis=1).fillna(0)\n", 200 | "y = train['Survived']" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 78, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n", 210 | "from sklearn.linear_model import LogisticRegression\n", 211 | "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", 212 | "from sklearn.model_selection import KFold\n", 213 | "from sklearn.metrics import log_loss, accuracy_score\n", 214 | "from sklearn.pipeline import make_pipeline" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 89, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "RF Accuracy: 0.7869955156950673 - Log Loss: 0.8891877224352214\n", 227 | "ET Accuracy: 0.7802690582959642 - Log Loss: 1.7277767849421113\n", 228 | "LR StdScaler Accuracy: 0.7713004484304933 - Log Loss: 0.4548870233895985\n", 229 | "LR MinMax Accuracy: 0.773542600896861 - Log Loss: 0.45730271249069515\n", 230 | "\n", 231 | "RF Accuracy: 0.8292134831460675 - Log Loss: 0.6313360340802087\n", 232 | "ET Accuracy: 0.8089887640449438 - Log Loss: 2.533916920090966\n", 233 | "LR StdScaler Accuracy: 0.7955056179775281 - Log Loss: 0.4579224442106097\n", 234 | "LR MinMax Accuracy: 0.802247191011236 - Log Loss: 0.4610789905245168\n", 235 | "\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "kf = KFold(n_splits=2, random_state=0, shuffle=True)\n", 241 | "\n", 242 | "\n", 243 | "second_level = np.zeros((X.shape[0], 4))\n", 244 | "\n", 245 | "for tr, ts in kf.split(X,y):\n", 246 | " Xtr, Xval = X.iloc[tr], X.iloc[ts]\n", 247 | " ytr, yval = y.iloc[tr], y.iloc[ts]\n", 248 | " \n", 249 | " rf = RandomForestClassifier(n_estimators=100, n_jobs=6, random_state=10)\n", 250 | " rf.fit(Xtr, ytr)\n", 251 | " prf = rf.predict_proba(Xval)[:,1]\n", 252 | " prf_ = (prf > 0.5).astype(int)\n", 253 | " \n", 254 | " print(\"RF Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, prf_), log_loss(yval, prf)))\n", 255 | " \n", 256 | " et = ExtraTreesClassifier(n_estimators=100, n_jobs=6, random_state=10)\n", 257 | " et.fit(Xtr, ytr)\n", 258 | " pet = et.predict_proba(Xval)[:,1]\n", 259 | " pet_ = (pet > 0.5).astype(int)\n", 260 | " \n", 261 | " print(\"ET Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, pet_), log_loss(yval, pet)))\n", 262 | " \n", 263 | " lr1 = make_pipeline(StandardScaler(), LogisticRegression())\n", 264 | " lr1.fit(Xtr, ytr)\n", 265 | " plr1 = lr1.predict_proba(Xval)[:,1]\n", 266 | " plr1_ = (plr1 > 0.5).astype(int)\n", 267 | " \n", 268 | " print(\"LR StdScaler Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, plr1_), log_loss(yval, plr1)))\n", 269 | " \n", 270 | " lr2 = make_pipeline(MinMaxScaler(), LogisticRegression())\n", 271 | " lr2.fit(Xtr, ytr)\n", 272 | " plr2 = lr2.predict_proba(Xval)[:,1]\n", 273 | " plr2_ = (plr2 > 0.5).astype(int)\n", 274 | " \n", 275 | " print(\"LR MinMax Accuracy: {} - Log Loss: {}\".format(accuracy_score(yval, plr2_), log_loss(yval, plr2)))\n", 276 | " \n", 277 | " second_level[ts, 0] = prf\n", 278 | " second_level[ts, 1] = pet\n", 279 | " second_level[ts, 2] = plr1\n", 280 | " second_level[ts, 3] = plr2\n", 281 | " \n", 282 | " print()\n", 283 | " \n", 284 | "# fatores de diversidade" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "# second_level.mean(axis=1)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "folds = [1, 2, 3]\n", 303 | "\n", 304 | "primeiro nivel\n", 305 | "\n", 306 | "ciclo 1 = [1,2] [3]\n", 307 | "ciclo 2 = [1,3] [2]\n", 308 | "ciclo 3 = [2,3] [1]\n", 309 | "\n", 310 | "segundo nivel\n", 311 | "\n", 312 | "ciclo 1 = [1,2] [3]\n", 313 | "ciclo 2 = [1,3] [2]\n", 314 | "ciclo 3 = [2,3] [1]\n", 315 | "\n", 316 | "\n", 317 | "\n", 318 | "\n" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "# modelos - gbm + rede neural, knn \n", 328 | "# features\n", 329 | "# exemplos \n", 330 | "# hiperparametros " 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 97, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/plain": [ 341 | "array([[0.15 , 0.19 , 0.0947167 , 0.11582552],\n", 342 | " [0.95 , 0.99 , 0.90010202, 0.88974478],\n", 343 | " [0.54 , 0.76 , 0.65737866, 0.63540321],\n", 344 | " ...,\n", 345 | " [0.74 , 0.76 , 0.61083421, 0.59159169],\n", 346 | " [0.84 , 0.92 , 0.45575654, 0.45086285],\n", 347 | " [0.35583333, 0.4 , 0.10479333, 0.11872796]])" 348 | ] 349 | }, 350 | "execution_count": 97, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "second_level" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 95, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "name": "stdout", 366 | "output_type": "stream", 367 | "text": [ 368 | "Stack Accuracy: 0.8004484304932735 Log loss: 0.4379152913562436\n", 369 | "\n", 370 | "Stack Accuracy: 0.8157303370786517 Log loss: 0.42892608158477763\n", 371 | "\n" 372 | ] 373 | } 374 | ], 375 | "source": [ 376 | "for tr, ts in kf.split(X,y):\n", 377 | " \n", 378 | " Xtr, Xval = second_level[tr], second_level[ts]\n", 379 | " ytr, yval = y.iloc[tr], y.iloc[ts]\n", 380 | " \n", 381 | " lr_stack = LogisticRegression(C=1.)\n", 382 | " lr_stack.fit(Xtr, ytr)\n", 383 | " plr_stack = lr_stack.predict_proba(Xval)[:,1]\n", 384 | " plr_stack_ = (plr_stack > 0.5).astype(int)\n", 385 | " \n", 386 | " print(\"Stack Accuracy: {} Log loss: {}\".format(accuracy_score(yval, plr_stack_), log_loss(yval, plr_stack)))\n", 387 | " print()" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 75, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "data": { 397 | "text/html": [ 398 | "
\n", 399 | "\n", 412 | "\n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | "
0123
01.0000000.9353800.5956840.578839
10.9353801.0000000.4973130.479914
20.5956840.4973131.0000000.987037
30.5788390.4799140.9870371.000000
\n", 453 | "
" 454 | ], 455 | "text/plain": [ 456 | " 0 1 2 3\n", 457 | "0 1.000000 0.935380 0.595684 0.578839\n", 458 | "1 0.935380 1.000000 0.497313 0.479914\n", 459 | "2 0.595684 0.497313 1.000000 0.987037\n", 460 | "3 0.578839 0.479914 0.987037 1.000000" 461 | ] 462 | }, 463 | "execution_count": 75, 464 | "metadata": {}, 465 | "output_type": "execute_result" 466 | } 467 | ], 468 | "source": [ 469 | "pd.DataFrame(np.corrcoef(second_level.T))" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [] 478 | } 479 | ], 480 | "metadata": { 481 | "kernelspec": { 482 | "display_name": "Python 3", 483 | "language": "python", 484 | "name": "python3" 485 | }, 486 | "language_info": { 487 | "codemirror_mode": { 488 | "name": "ipython", 489 | "version": 3 490 | }, 491 | "file_extension": ".py", 492 | "mimetype": "text/x-python", 493 | "name": "python", 494 | "nbconvert_exporter": "python", 495 | "pygments_lexer": "ipython3", 496 | "version": "3.7.3" 497 | } 498 | }, 499 | "nbformat": 4, 500 | "nbformat_minor": 4 501 | } 502 | -------------------------------------------------------------------------------- /live18_rede_neural/README: -------------------------------------------------------------------------------- 1 | Materiais da Live 18: Como Criar Sua Primeira Rede Neural com Tensorflow 2 | https://youtu.be/s0s6Q1GLJGo 3 | -------------------------------------------------------------------------------- /live20_covid_einstein/README: -------------------------------------------------------------------------------- 1 | Material da live sobre dados disponibilizados pelo hospital Albert Einstein sobre pacientes com suspeita de COVID-19 2 | https://www.kaggle.com/einsteindata4u/covid19 3 | https://youtu.be/Wg1RcYNf4Lg 4 | -------------------------------------------------------------------------------- /lives_ml_na_industria/README: -------------------------------------------------------------------------------- 1 | Arquivos para as Lives 009 e 010 2 | -------------------------------------------------------------------------------- /lives_ml_na_industria/notes_on_dataset.txt: -------------------------------------------------------------------------------- 1 | Description of physical setup: 2 | The data comes from a continuous flow process. 3 | Sample rate is 1 Hz. 4 | In the first stage, Machines 1, 2, and 3 operate in parallel, and feed their outputs into a step that combines the flows. 5 | Output from the combiner is measured in 15 locations. These measurements are the primary measurements to predict. 6 | Next, the output flows into a second stage, where Machines 4 and 5 process in series. 7 | Measurements are made again in the same 15 locations. These are the secondary measurements to predict. 8 | 9 | Measurements are noisy. 10 | Each measurement also has a target or Setpoint (setpoints are included in the first row of data). 11 | The goal is to predict the measurements (or the error versus setpoints) for as many of the 15 measurements as possible. 12 | Some measurements will be more predictable than others! 13 | Prediction of measurements after the first stage are the primary interest. 14 | Prediction of measurements after the second stage are nice-to-have but the data is much more noisy. 15 | 16 | Note on variable naming conventions 17 | ~.C.Setpoint Setpoint for Controlled variable 18 | ~.C.Actual Actual value of Controlled variable 19 | ~.U.Actual Actual value of Uncontrolled variable 20 | Others Environmental or raw material variables, States / events, etc. 21 | 22 | Start col End col Description 23 | 0 0 Time stamp 24 | 1 2 Factory ambient conditions 25 | 3 6 First stage, Machine 1, raw material properties (material going in to Machine 1) 26 | 7 14 First stage, Machine 1 process variables 27 | 15 18 First stage, Machine 2, raw material properties (material going in to Machine 2) 28 | 19 26 First stage, Machine 2 process variables 29 | 27 30 First stage, Machine 3, raw material properties (material going in to Machine 3) 30 | 31 38 First stage, Machine 3 process variables 31 | 39 41 Combiner stage process parameters. Here we combines the outputs from Machines 1, 2, and 3. 32 | 42 71 PRIMARY OUTPUT TO CONTROL: Measurements of 15 features (in mm), along with setpoint or target for each 33 | 72 78 Second stage, Machine 4 process variables 34 | 79 85 Second stage, Machine 5 process variables 35 | 86 115 SECONDARY OUTPUT TO CONTROL: Measurements of 15 features (in mm), along with setpoint or target for each 36 | 37 | 38 | 39 | 40 | --------------------------------------------------------------------------------