├── Bases_Datos ├── Cuadernos ├── Datos │ ├── Datos.md │ ├── paises_estandar.xls │ ├── Listados_DIVIPOLA.xlsx │ ├── DIVIPOLA_Municipios.xlsx │ ├── DIVIPOLA_Departamentos.xlsx │ ├── EN_PAIS.csv │ ├── EN_DEPARTAMENTO.csv │ └── paises.csv ├── Notebooks │ ├── Cuadernos.md │ ├── Instalar_kernel_MariaDB.ipynb │ ├── Bases_Datos_SQL-3.ipynb │ └── Comando magico.ipynb └── Imagenes │ ├── Imagenes.md │ ├── JOIN.png │ ├── SGBD.jpeg │ ├── SGBD.png │ ├── indices.png │ ├── resol_n_n.jpeg │ ├── abstraccion.jpeg │ ├── autor_libro.jpeg │ ├── enum_atrib.jpeg │ ├── No_es_entidad.jpeg │ ├── atributos_e_r.jpeg │ ├── atributos_e_r.png │ ├── boxplots_region.png │ ├── instancias_n_m.jpeg │ ├── logo-final-ap.png │ ├── muchos_muchos.jpeg │ ├── recursiva_e_r.jpeg │ ├── Relacion_uno_uno.jpeg │ ├── dependencia_e_r.jpeg │ ├── entidad_correcta.jpeg │ ├── rel_ciudad_depto.jpeg │ ├── relacion_binaria.jpeg │ ├── sakila-skema_e_r.png │ ├── trajectory_plots.png │ ├── Relación_recursiva.jpeg │ ├── Representacion_1_N.jpeg │ ├── convenio_notacion.jpeg │ ├── multiple_relacion.jpeg │ ├── planta_perosna_1_1.jpeg │ ├── uno_muchos_entidad.jpeg │ ├── Relacion_entre_tablas.jpeg │ ├── resol_muchos_muchos_2.jpeg │ ├── Ecuación_cuadrática.svg.png │ └── Grafica_relacion_binaria.jpeg ├── Syllabus ├── reame.md ├── Cuadernos │ ├── reame.md │ └── Syllabus_Big Data.ipynb └── Imagenes │ ├── Imagenes.md │ └── logo-final-ap.png ├── Spark ├── Imagenes │ ├── Imagenes.md │ ├── roc.png │ ├── spark-shell.png │ ├── spark_logo.png │ ├── ejemplo_arbol.png │ ├── ejemplo_arbol_2.png │ ├── ejemplo_arbol_3.png │ ├── ejemplo_arbol_4.png │ ├── matriz_confusion.png │ ├── pyspark-command-shell.png │ ├── spark-workers-alive.png │ └── funcion_perdida_regresion.png └── Cuadernos │ └── Cuadernos.md ├── Python ├── Cuadernos │ ├── Cuadernos.md │ └── Decorators.ipynb ├── Imagenes │ ├── Imagenes.md │ └── threads_400.png └── Elementos_Python ├── Almacenamiento ├── reame.md ├── Imagenes │ ├── Imagenes.md │ ├── chunckshdf5.jpeg │ ├── tuberia_filtros.jpeg │ └── dataset_almacenamiento.jpeg └── Cuadernos │ └── Cuadernos.md ├── Dask ├── images │ ├── Imagenes.md │ ├── hdd.jpg │ ├── ui.png │ ├── array.png │ ├── mydask.png │ ├── tasks.png │ ├── fail-case.gif │ ├── Coiled_Logo.jpg │ ├── pandas_logo.png │ ├── embarrassing.gif │ ├── generic-dask.png │ ├── ml-dimensions.png │ ├── distributed_session.png │ ├── architecture-1536x947.png │ ├── grid_search_schedule.gif │ ├── dask_horizontal.svg │ ├── merged_grid_search_graph.svg │ ├── dask-dataframe.svg │ └── unmerged_grid_search_graph.svg ├── Datos.md └── Cuadernos │ ├── Cuadernos.md │ ├── sources.py │ ├── accounts.py │ ├── conf.py │ ├── 00_overview.ipynb │ ├── prep.py │ ├── 10_coiled_quickstart.ipynb │ └── 01_dask_Mejores_Practicas.ipynb └── README.md /Bases_Datos/Cuadernos: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Syllabus/reame.md: -------------------------------------------------------------------------------- 1 | Syllabus del curso 2 | -------------------------------------------------------------------------------- /Bases_Datos/Datos/Datos.md: -------------------------------------------------------------------------------- 1 | Datos de ejemplo 2 | -------------------------------------------------------------------------------- /Spark/Imagenes/Imagenes.md: -------------------------------------------------------------------------------- 1 | Imágenes de Spark 2 | -------------------------------------------------------------------------------- /Python/Cuadernos/Cuadernos.md: -------------------------------------------------------------------------------- 1 | Cuadernos de Python 2 | -------------------------------------------------------------------------------- /Python/Imagenes/Imagenes.md: -------------------------------------------------------------------------------- 1 | Imágenes de Python 2 | -------------------------------------------------------------------------------- /Spark/Cuadernos/Cuadernos.md: -------------------------------------------------------------------------------- 1 | Cuadernos de Spark 2 | -------------------------------------------------------------------------------- /Almacenamiento/reame.md: -------------------------------------------------------------------------------- 1 | Cuadernos de almacenamiento 2 | -------------------------------------------------------------------------------- /Dask/images/Imagenes.md: -------------------------------------------------------------------------------- 1 | Imagenes del tutoria de dask 2 | -------------------------------------------------------------------------------- /Syllabus/Cuadernos/reame.md: -------------------------------------------------------------------------------- 1 | Contenido del Syllabus 2 | -------------------------------------------------------------------------------- /Syllabus/Imagenes/Imagenes.md: -------------------------------------------------------------------------------- 1 | Imagenes del syllabus 2 | -------------------------------------------------------------------------------- /Python/Elementos_Python: -------------------------------------------------------------------------------- 1 | Lecciones escenciales de Python 2 | -------------------------------------------------------------------------------- /Almacenamiento/Imagenes/Imagenes.md: -------------------------------------------------------------------------------- 1 | Imágenes de almacenamiento 2 | -------------------------------------------------------------------------------- /Bases_Datos/Notebooks/Cuadernos.md: -------------------------------------------------------------------------------- 1 | Cuadernos de bases de datos 2 | -------------------------------------------------------------------------------- /Almacenamiento/Cuadernos/Cuadernos.md: -------------------------------------------------------------------------------- 1 | Cuadernos de almacenamiento 2 | -------------------------------------------------------------------------------- /Dask/Datos.md: -------------------------------------------------------------------------------- 1 | Cuadernos de paralelización en big data usando el motor Dask 2 | -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/Imagenes.md: -------------------------------------------------------------------------------- 1 | Imagenes asociadas a los cuadernos de bases de datos 2 | -------------------------------------------------------------------------------- /Dask/images/hdd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/hdd.jpg -------------------------------------------------------------------------------- /Dask/images/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/ui.png -------------------------------------------------------------------------------- /Dask/images/array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/array.png -------------------------------------------------------------------------------- /Dask/images/mydask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/mydask.png -------------------------------------------------------------------------------- /Dask/images/tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/tasks.png -------------------------------------------------------------------------------- /Spark/Imagenes/roc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/roc.png -------------------------------------------------------------------------------- /Dask/images/fail-case.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/fail-case.gif -------------------------------------------------------------------------------- /Dask/images/Coiled_Logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/Coiled_Logo.jpg -------------------------------------------------------------------------------- /Dask/images/pandas_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/pandas_logo.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/JOIN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/JOIN.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/SGBD.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/SGBD.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/SGBD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/SGBD.png -------------------------------------------------------------------------------- /Dask/Cuadernos/Cuadernos.md: -------------------------------------------------------------------------------- 1 | Estos son cuadernos traducidos del tutorial del motor DASK para paralelización y Big Data 2 | -------------------------------------------------------------------------------- /Dask/images/embarrassing.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/embarrassing.gif -------------------------------------------------------------------------------- /Dask/images/generic-dask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/generic-dask.png -------------------------------------------------------------------------------- /Dask/images/ml-dimensions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/ml-dimensions.png -------------------------------------------------------------------------------- /Spark/Imagenes/spark-shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/spark-shell.png -------------------------------------------------------------------------------- /Spark/Imagenes/spark_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/spark_logo.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/indices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/indices.png -------------------------------------------------------------------------------- /Python/Imagenes/threads_400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Python/Imagenes/threads_400.png -------------------------------------------------------------------------------- /Spark/Imagenes/ejemplo_arbol.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/ejemplo_arbol.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/resol_n_n.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/resol_n_n.jpeg -------------------------------------------------------------------------------- /Dask/images/distributed_session.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/distributed_session.png -------------------------------------------------------------------------------- /Spark/Imagenes/ejemplo_arbol_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/ejemplo_arbol_2.png -------------------------------------------------------------------------------- /Spark/Imagenes/ejemplo_arbol_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/ejemplo_arbol_3.png -------------------------------------------------------------------------------- /Spark/Imagenes/ejemplo_arbol_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/ejemplo_arbol_4.png -------------------------------------------------------------------------------- /Spark/Imagenes/matriz_confusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/matriz_confusion.png -------------------------------------------------------------------------------- /Syllabus/Imagenes/logo-final-ap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Syllabus/Imagenes/logo-final-ap.png -------------------------------------------------------------------------------- /Bases_Datos/Datos/paises_estandar.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Datos/paises_estandar.xls -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/abstraccion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/abstraccion.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/autor_libro.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/autor_libro.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/enum_atrib.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/enum_atrib.jpeg -------------------------------------------------------------------------------- /Dask/images/architecture-1536x947.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/architecture-1536x947.png -------------------------------------------------------------------------------- /Dask/images/grid_search_schedule.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Dask/images/grid_search_schedule.gif -------------------------------------------------------------------------------- /Almacenamiento/Imagenes/chunckshdf5.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Almacenamiento/Imagenes/chunckshdf5.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Datos/Listados_DIVIPOLA.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Datos/Listados_DIVIPOLA.xlsx -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/No_es_entidad.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/No_es_entidad.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/atributos_e_r.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/atributos_e_r.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/atributos_e_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/atributos_e_r.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/boxplots_region.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/boxplots_region.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/instancias_n_m.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/instancias_n_m.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/logo-final-ap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/logo-final-ap.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/muchos_muchos.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/muchos_muchos.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/recursiva_e_r.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/recursiva_e_r.jpeg -------------------------------------------------------------------------------- /Spark/Imagenes/pyspark-command-shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/pyspark-command-shell.png -------------------------------------------------------------------------------- /Spark/Imagenes/spark-workers-alive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/spark-workers-alive.png -------------------------------------------------------------------------------- /Bases_Datos/Datos/DIVIPOLA_Municipios.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Datos/DIVIPOLA_Municipios.xlsx -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/Relacion_uno_uno.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/Relacion_uno_uno.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/dependencia_e_r.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/dependencia_e_r.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/entidad_correcta.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/entidad_correcta.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/rel_ciudad_depto.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/rel_ciudad_depto.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/relacion_binaria.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/relacion_binaria.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/sakila-skema_e_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/sakila-skema_e_r.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/trajectory_plots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/trajectory_plots.png -------------------------------------------------------------------------------- /Almacenamiento/Imagenes/tuberia_filtros.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Almacenamiento/Imagenes/tuberia_filtros.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Datos/DIVIPOLA_Departamentos.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Datos/DIVIPOLA_Departamentos.xlsx -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/Relación_recursiva.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/Relación_recursiva.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/Representacion_1_N.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/Representacion_1_N.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/convenio_notacion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/convenio_notacion.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/multiple_relacion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/multiple_relacion.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/planta_perosna_1_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/planta_perosna_1_1.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/uno_muchos_entidad.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/uno_muchos_entidad.jpeg -------------------------------------------------------------------------------- /Spark/Imagenes/funcion_perdida_regresion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Spark/Imagenes/funcion_perdida_regresion.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/Relacion_entre_tablas.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/Relacion_entre_tablas.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/resol_muchos_muchos_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/resol_muchos_muchos_2.jpeg -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/Ecuación_cuadrática.svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/Ecuación_cuadrática.svg.png -------------------------------------------------------------------------------- /Bases_Datos/Imagenes/Grafica_relacion_binaria.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Bases_Datos/Imagenes/Grafica_relacion_binaria.jpeg -------------------------------------------------------------------------------- /Almacenamiento/Imagenes/dataset_almacenamiento.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/BigData/main/Almacenamiento/Imagenes/dataset_almacenamiento.jpeg -------------------------------------------------------------------------------- /Dask/Cuadernos/sources.py: -------------------------------------------------------------------------------- 1 | flights_url = "https://storage.googleapis.com/dask-tutorial-data/nycflights.tar.gz" 2 | lazy_url = "http://www.google.com" 3 | bag_url = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv" 4 | -------------------------------------------------------------------------------- /Dask/Cuadernos/accounts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank', 'George', 5 | 'Hannah', 'Ingrid', 'Jerry', 'Kevin', 'Laura', 'Michael', 'Norbert', 'Oliver', 6 | 'Patricia', 'Quinn', 'Ray', 'Sarah', 'Tim', 'Ursula', 'Victor', 'Wendy', 7 | 'Xavier', 'Yvonne', 'Zelda'] 8 | 9 | k = 100 10 | 11 | 12 | def account_params(k): 13 | ids = np.arange(k, dtype=int) 14 | names2 = np.random.choice(names, size=k, replace=True) 15 | wealth_mag = np.random.exponential(100, size=k) 16 | wealth_trend = np.random.normal(10, 10, size=k) 17 | freq = np.random.exponential(size=k) 18 | freq /= freq.sum() 19 | 20 | return ids, names2, wealth_mag, wealth_trend, freq 21 | 22 | def account_entries(n, ids, names, wealth_mag, wealth_trend, freq): 23 | indices = np.random.choice(ids, size=n, replace=True, p=freq) 24 | amounts = ((np.random.normal(size=n) + wealth_trend[indices]) 25 | * wealth_mag[indices]) 26 | 27 | return pd.DataFrame({'id': indices, 28 | 'names': names[indices], 29 | 'amount': amounts.astype('i4')}, 30 | columns=['id', 'names', 'amount']) 31 | 32 | 33 | def accounts(n, k): 34 | ids, names, wealth_mag, wealth_trend, freq = account_params(k) 35 | df = account_entries(n, ids, names, wealth_mag, wealth_trend, freq) 36 | return df 37 | 38 | 39 | def json_entries(n, *args): 40 | df = account_entries(n, *args) 41 | g = df.groupby(df.id).groups 42 | 43 | data = [] 44 | for k in g: 45 | sub = df.iloc[g[k]] 46 | d = dict(id=int(k), name=sub['names'].iloc[0], 47 | transactions=[{'transaction-id': int(i), 'amount': int(a)} 48 | for i, a in list(zip(sub.index, sub.amount))]) 49 | data.append(d) 50 | 51 | return data 52 | 53 | def accounts_json(n, k): 54 | args = account_params(k) 55 | return json_entries(n, *args) 56 | -------------------------------------------------------------------------------- /Dask/images/dask_horizontal.svg: -------------------------------------------------------------------------------- 1 | dask -------------------------------------------------------------------------------- /Bases_Datos/Datos/EN_PAIS.csv: -------------------------------------------------------------------------------- 1 | SGL_PAIS;TXT_NME_PAIS 2 | AFG;Afganistán 3 | AFS;Sudáfrica 4 | AHL;Antillas Holandesas 5 | ALB;Albania 6 | ANB;Antigua y Barbuda 7 | AND;Andorra 8 | ANG;Angola 9 | ARA;Arabia Saudita 10 | ARG;Argentina 11 | ARL;Argelia 12 | ARM;Armenia 13 | ATC;Antártica 14 | AUS;Australia 15 | AUT;Austria 16 | AZE;Azerbaiyán 17 | BAR;Bahrein 18 | BEA;Bielorrusia 19 | BEL;Bélgica 20 | BEN;Benín 21 | BER;Bermuda 22 | BGD;Bangladesh 23 | BHS;Bahamas 24 | BIR;Birmania(Myanmar) 25 | BKF;Burkina Faso 26 | BLZ;Belice 27 | BOL;Bolivia 28 | BOS;Bosnia Herzegovina 29 | BOT;Botzwana 30 | BRA;Brasil 31 | BRB;Barbados 32 | BRN;Brunei 33 | BUL;Bulgaria 34 | BUR;Burundi 35 | BUT;Bután 36 | CAM;Camerún 37 | CAN;Canadá 38 | CAT;Qatar 39 | CAZ;Kazajstán 40 | CBJ;Camboya 41 | CBV;Cabo Verde 42 | CHA;Chad 43 | CHL;Chile 44 | CHN;China 45 | CHP;Chipre 46 | CIN;Singapur 47 | CMF;Costa de Marfil 48 | COL;Colombia 49 | COM;Comores 50 | CON;Congo 51 | CRC;Costa Rica 52 | CRN;Corea del Norte 53 | CRO;Croacia 54 | CRS;Corea del Sur 55 | CUB;Cuba 56 | DIN;Dinamarca 57 | DOM;República Dominicana 58 | DON;Dominica 59 | EAU;Emiratos Arabes 60 | EGI;Egipto 61 | ELS;El Salvador 62 | EQU;Ecuador 63 | ESC;Escocia 64 | ESH;Sahara Occidental 65 | ESP;España 66 | EST;Estonia 67 | ETP;Etiopía 68 | EUA;Estados Unidos 69 | FIL;Filipinas 70 | FIN;Finlandia 71 | FJI;Fiji 72 | FRA;Francia 73 | GAB;Gabón 74 | GAL;Gales 75 | GAN;Ghana 76 | GBR;Gran Bretaña 77 | GEO;Georgia 78 | GFR;Guyana Francesa 79 | GNE;Guinea 80 | GRD;Grenada 81 | GRE;Grecia 82 | GUA;Guatemala 83 | GUI;Guyana 84 | HKG;Hong Kong 85 | HOL;Holanda 86 | HON;Honduras 87 | HTI;Haití 88 | HUN;Hungría 89 | IDN;Indonesia 90 | IND;India 91 | ING;Inglaterra 92 | IOT;Britsh Indian Ocean 93 | IRA;Irán 94 | IRL;Irlanda 95 | IRN;Irlanda del Norte 96 | IRQ;Irak 97 | ISL;Islandia 98 | ISR;Israel 99 | ITA;Italia 100 | IUG;Yugoslavia 101 | JAM;Jamaica 102 | JAP;Japón 103 | JOR;Jordania 104 | KWT;Kuwait 105 | LBN;Líbano 106 | LET;Letonia (Latvia) 107 | LIB;Libia 108 | LIT;Lituania 109 | LUX;Luxemburgo 110 | MAC;Macao 111 | MAL;Malasia 112 | MAR;Marruecos 113 | MAU;Mauricio 114 | MBQ;Mozambique 115 | MEX;México 116 | MGL;Mongolia 117 | MLI;Malí 118 | MLT;Malta 119 | MLV;Malawi 120 | MMR;Myanmar 121 | MOL;Moldavia 122 | MON;Mónaco 123 | N/A;No Aplica 124 | NAM;Namibia 125 | NCL;Nueva Caledonia 126 | NGA;Nigeria 127 | NIC;Nicaragua 128 | NIG;Níger 129 | NOR;Noruega 130 | NPL;Nepal 131 | NRU;Nauru 132 | NZL;Nueva Zelanda 133 | PAN;Panamá 134 | PAQ;Pakistán 135 | PCN;Pitcairn 136 | PER;Perú 137 | POL;Polonia 138 | POR;Portugal 139 | PRG;Paraguay 140 | PTR;Puerto Rico 141 | QUE;Kenia 142 | REU;Reunión 143 | RFA;Alemania 144 | ROM;Rumania 145 | RSS;Rusia 146 | RUA;Ruanda 147 | SEN;Senegal 148 | SMR;San Marino 149 | SRI;Sri Lanka 150 | SUA;Suazilandia 151 | SUE;Suecia 152 | SUI;Suiza 153 | SUR;Surinam 154 | SVK;Eslovaquia 155 | SVN;Eslovenia 156 | SYC;Seychelles 157 | TAI;Tailandia 158 | TAN;Tanzania 159 | TCA;Islas Turks y Caicos 160 | TCH;República Checa 161 | TGO;Togo 162 | TRT;Trinidad Y Tobago 163 | TUN;Túnez 164 | TUR;Turquía 165 | TWD;Taiwan 166 | UCR;Ucrania 167 | UGA;Uganda 168 | URU;Uruguay 169 | UZB;Uzbekistán 170 | VAT;Vaticano 171 | VEN;Venezuela 172 | VTN;Vietnam 173 | ZAN;Zambia 174 | ZAR;Zaire 175 | ZIN;Zimbabue 176 | -------------------------------------------------------------------------------- /Dask/Cuadernos/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | # import os 16 | # import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'Dask Tutorial' 23 | copyright = '2018, Dask Developers' 24 | author = 'Dask Developers' 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | "sphinx.ext.mathjax", 43 | 'nbsphinx', 44 | ] 45 | 46 | nbsphinx_timeout = 600 47 | # nbsphinx_execute = "always" 48 | 49 | 50 | nbsphinx_prolog = """ 51 | {% set docname = env.doc2path(env.docname, base=None) %} 52 | 53 | You can run this notebook in a `live session `_ |Binder| or view it `on Github `_. 55 | 56 | .. |Binder| image:: https://mybinder.org/badge.svg 57 | :target: https://mybinder.org/v2/gh/dask/dask-tutorial/main?urlpath=lab/tree/{{ docname }} 58 | """ 59 | 60 | 61 | # Add any paths that contain templates here, relative to this directory. 62 | templates_path = ['_templates'] 63 | 64 | # The suffix(es) of source filenames. 65 | # You can specify multiple suffix as a list of string: 66 | # 67 | # source_suffix = ['.rst', '.md'] 68 | source_suffix = '.rst' 69 | 70 | # The master toctree document. 71 | master_doc = 'index' 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | # 76 | # This is also used if you do content translation via gettext catalogs. 77 | # Usually you set "language" from the command line for these cases. 78 | language = None 79 | 80 | # List of patterns, relative to source directory, that match files and 81 | # directories to ignore when looking for source files. 82 | # This pattern also affects html_static_path and html_extra_path . 83 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = 'sphinx' 87 | 88 | 89 | # -- Options for HTML output ------------------------------------------------- 90 | 91 | # The theme to use for HTML and HTML Help pages. See the documentation for 92 | # a list of builtin themes. 93 | # 94 | html_theme = 'dask_sphinx_theme' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | # 100 | # html_theme_options = {} 101 | 102 | # Add any paths that contain custom static files (such as style sheets) here, 103 | # relative to this directory. They are copied after the builtin static files, 104 | # so a file named "default.css" will overwrite the builtin "default.css". 105 | html_static_path = ['_static'] 106 | 107 | # Custom sidebar templates, must be a dictionary that maps document names 108 | # to template names. 109 | # 110 | # The default sidebars (for documents that don't match any pattern) are 111 | # defined by theme itself. Builtin themes are using these templates by 112 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 113 | # 'searchbox.html']``. 114 | # 115 | # html_sidebars = {} 116 | 117 | 118 | # -- Options for HTMLHelp output --------------------------------------------- 119 | 120 | # Output file base name for HTML help builder. 121 | htmlhelp_basename = 'DaskTutorialdoc' 122 | 123 | 124 | # -- Options for LaTeX output ------------------------------------------------ 125 | 126 | latex_elements = { 127 | # The paper size ('letterpaper' or 'a4paper'). 128 | # 129 | # 'papersize': 'letterpaper', 130 | 131 | # The font size ('10pt', '11pt' or '12pt'). 132 | # 133 | # 'pointsize': '10pt', 134 | 135 | # Additional stuff for the LaTeX preamble. 136 | # 137 | # 'preamble': '', 138 | 139 | # Latex figure (float) alignment 140 | # 141 | # 'figure_align': 'htbp', 142 | } 143 | 144 | # Grouping the document tree into LaTeX files. List of tuples 145 | # (source start file, target name, title, 146 | # author, documentclass [howto, manual, or own class]). 147 | latex_documents = [ 148 | (master_doc, 'DaskTutorial.tex', 'Dask Tutorial Documentation', 149 | 'Dask Developers', 'manual'), 150 | ] 151 | 152 | 153 | # -- Options for manual page output ------------------------------------------ 154 | 155 | # One entry per manual page. List of tuples 156 | # (source start file, name, description, authors, manual section). 157 | man_pages = [ 158 | (master_doc, 'dasktutorial', 'Dask Tutorial Documentation', 159 | [author], 1) 160 | ] 161 | 162 | 163 | # -- Options for Texinfo output ---------------------------------------------- 164 | 165 | # Grouping the document tree into Texinfo files. List of tuples 166 | # (source start file, target name, title, author, 167 | # dir menu entry, description, category) 168 | texinfo_documents = [ 169 | (master_doc, 'DaskTutorial', 'Dask Tutorial Documentation', 170 | author, 'DaskTutorial', 'One line description of project.', 171 | 'Miscellaneous'), 172 | ] 173 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Big Data 2 | Este repositorio contiene las clases del curso en Big Data 3 | - Syllabus 4 | - [Syllabus, sistemas de información](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Syllabus/Cuadernos/Syllabus_Big%20Data.ipynb) 5 | - Programación Python 6 | - [Python, primeros pasos](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/Módulo%202-%20Introducción%20a%20la%20programación/3.%20Programación%20en%20Python/Cuadernos/Intro_Python.ipynb) 7 | - [Funciones en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Funciones.ipynb) 8 | - [Colecciones de Objetos en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Colecciones.ipynb) 9 | - [Módulos en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Paquetes.ipynb) 10 | - [Concepto de Clase en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/Módulo%202-%20Introducción%20a%20la%20programación/3.%20Programación%20en%20Python/Cuadernos/Intro_Clases_Python.ipynb) 11 | - [Decoradores en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Decorators.ipynb) 12 | - Introducción a Pandas [Github](https://github.com/AprendizajeProfundo/Diplomado/blob/master/Temas/Módulo%202-%20Introducción%20a%20la%20programación/3.%20Programación%20en%20Python/Cuadernos/pandas_Intro.ipynb) 13 | - [Tratamiento de datos con Pandas](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%2013-%20Talleres/Cuadernos/Taller_Pandas.ipynb) 14 | - [Introducción a tensores con Numpy](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/Módulo%201-%20Matemáticas%20y%20Estadística/1.%20Matemáticas/Cuadernos/Intro_Tensores_I.ipynb) 15 | - [Multiprocesos y multihilos](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Python/Cuadernos/05_A_Mulitprocesos_Python_am.ipynb) 16 | 17 | - Bases de Datos Relacionales 18 | - [Introducción a bases de datos](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Intro.ipynb) 19 | - [Modelo entidad-relación](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Modelo_E_R.ipynb) 20 | - [Implementación de bases de datos]( https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Implementacion.ipynb) 21 | - [Algebra relacional](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Algebra_Relacional.ipynb ) 22 | - [SQL I](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_SQL.ipynb) 23 | - [SQL II](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_SQL-2.ipynb) 24 | - [SQL III -Ejemplo Sakila](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_SQL-3.ipynb) 25 | - SQLAlchemy para consultar las bases de datos desde Python-Pandas [Github](https://github.com/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/SQLAlquimia.ipynb) 26 | - Motor Dask para Big Data en Python 27 | - [Introducción la procesamiento paralelo con Dask](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/01_dask_delayed_am.ipynb) 28 | - [Ejecución retrasada (lazzy)](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/01x_lazy_am.ipynb) 29 | - [Mejores prácticas con ejecución retrasada](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/01_dask_Mejores_Practicas.ipynb) 30 | - [Colleciones bag](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/02_bag_am.ipynb) 31 | - [Arrays](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/03_array_am.ipynb) 32 | - [Dataframes](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/04_dataframe_am.ipynb) 33 | - [Ejecución distribuida](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/05_distributed_am.ipynb) 34 | - [Ejecución distribuida avanzada](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/06_distributed_advanced_am.ipynb) 35 | - [Ejecución con un cluster en la nube - Coiled](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/10_coiled_quickstart.ipynb) 36 | - [Almacenamiento eficiente de dask-dataframes](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/07_dataframe_storage_am.ipynb) 37 | - [Aprendizaje de Maquinas paralelo y distribuido](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/08_machine_learning_am.ipynb) 38 | - [Almacenamiento hdf5](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Almacenamiento/Cuadernos/hdf5.ipynb) 39 | - Spark 40 | - [Instalacion de Spark: Ubuntu, Jupyterlab](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_install.ipynb) 41 | - [Introducción a Spark](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_Introduccion.ipynb) 42 | - [Introducción a Máquinas de Aprendizaje con Spark](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_Machine_Learning.ipynb) 43 | - [Introducción a árboles de decisión y regresión con Spark](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_Machine_Learning-tree.ipynb) 44 | -------------------------------------------------------------------------------- /Bases_Datos/Notebooks/Instalar_kernel_MariaDB.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "orange-local", 6 | "metadata": {}, 7 | "source": [ 8 | "## Instalación del kernel de MariaDB para Jupyter Lab (version 2.2.6)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "adjustable-galaxy", 14 | "metadata": {}, 15 | "source": [ 16 | "Siga las siguient3s instrucciones que encuentra en [MariaDB Jupyter Kernel Installation](https://mariadb.com/kb/en/mariadb-jupyter-kernel-installation/)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "popular-river", 22 | "metadata": {}, 23 | "source": [ 24 | "python3 -m pip install mariadb_kernel" 25 | ] 26 | }, 27 | { 28 | "cell_type": "raw", 29 | "id": "monthly-ireland", 30 | "metadata": {}, 31 | "source": [ 32 | "(bigdata_c) alvaro@Lenovo:~$ python3 -m pip install mariadb_kernel\n", 33 | "(bigdata_c) alvaro@Lenovo:~$ python3 -m mariadb_kernel.install" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "endless-savannah", 39 | "metadata": {}, 40 | "source": [ 41 | "# Configuración de MariaDB para el IKernel de MariaDB" 42 | ] 43 | }, 44 | { 45 | "cell_type": "raw", 46 | "id": "minor-undergraduate", 47 | "metadata": {}, 48 | "source": [ 49 | "Ahora vaya a la carpeta \n", 50 | "\n", 51 | "~/.jupyter \n", 52 | "\n", 53 | "y cree el archivo mariadb_config.json. por ejemplo, si tiene instalado el editor gedit escriba\n", 54 | "\n", 55 | "gedit mariadb_config.json\n", 56 | "\n", 57 | "Copie el siguiente código que define un string de conección que usara Jupyter LAb apatr conectarse con MariaDB. Coloque su password(en el lugar de \"securepassword\" de root, si va a comunicarse con este usuario" 58 | ] 59 | }, 60 | { 61 | "cell_type": "raw", 62 | "id": "amateur-somalia", 63 | "metadata": {}, 64 | "source": [ 65 | "{\n", 66 | " \"user\": \"root\",\n", 67 | " \"host\": \"localhost\",\n", 68 | " \"port\": \"3306\",\n", 69 | " \"password\": \"securepassword\",\n", 70 | " \"start_server\": \"True\",\n", 71 | " \"client_bin\": \"/usr/bin/mariadb\",\n", 72 | " \"server_bin\": \"/usr/bin/mariadbd\"\n", 73 | "}\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "id": "determined-applicant", 79 | "metadata": {}, 80 | "source": [ 81 | "Si necesita cambiar el password de root siga las siguientes instrucciones [Cambiar password en mariadb](https://blog.carreralinux.com.ar/2017/03/cambiar-la-clave-de-root-mysql-mariadb/)\n", 82 | "\n", 83 | "Si perdió el password de root entre coo super usuario de linux asi y siga las instrucciones del enlace." 84 | ] 85 | }, 86 | { 87 | "cell_type": "raw", 88 | "id": "minor-kazakhstan", 89 | "metadata": {}, 90 | "source": [ 91 | "(bigdata_c) alvaro@Lenovo:~$ sudo mariadb" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "id": "social-timer", 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# Prueba" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "id": "optical-dragon", 107 | "metadata": {}, 108 | "source": [ 109 | "Seleccione el kernel de Mariadb arriba. O lanzar un nuevo kernel. Apenas entre escriba " 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 1, 115 | "id": "twelve-australia", 116 | "metadata": { 117 | "jupyter": { 118 | "source_hidden": true 119 | } 120 | }, 121 | "outputs": [], 122 | "source": [] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 1, 127 | "id": "established-backing", 128 | "metadata": { 129 | "jupyter": { 130 | "source_hidden": true 131 | } 132 | }, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/html": [ 137 | "
Database
birdwatchers
information_schema
mysql
performance_schema
rookery
test
" 138 | ] 139 | }, 140 | "metadata": {}, 141 | "output_type": "display_data" 142 | } 143 | ], 144 | "source": [ 145 | "show databases;" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "id": "cooperative-peeing", 151 | "metadata": {}, 152 | "source": [ 153 | "etc." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 2, 159 | "id": "instant-decision", 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/html": [] 165 | }, 166 | "metadata": {}, 167 | "output_type": "display_data" 168 | } 169 | ], 170 | "source": [ 171 | "use test;" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 3, 177 | "id": "destroyed-fisher", 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/html": [ 183 | "
Tables_in_test
books
status_names
" 184 | ] 185 | }, 186 | "metadata": {}, 187 | "output_type": "display_data" 188 | } 189 | ], 190 | "source": [ 191 | "show tables;" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 4, 197 | "id": "painful-scotland", 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/html": [ 203 | "
book_idtitlestatus
100The Catche in the Rye1
200The Catcher of the Rye1
300My Antonia0
" 204 | ] 205 | }, 206 | "metadata": {}, 207 | "output_type": "display_data" 208 | } 209 | ], 210 | "source": [ 211 | "select * from books;" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "rotary-victorian", 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": "MariaDB", 226 | "language": "SQL", 227 | "name": "mariadb_kernel" 228 | }, 229 | "language_info": { 230 | "file_extension": ".sql", 231 | "mimetype": "text/plain", 232 | "name": "SQL" 233 | } 234 | }, 235 | "nbformat": 4, 236 | "nbformat_minor": 5 237 | } 238 | -------------------------------------------------------------------------------- /Dask/Cuadernos/00_overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Introduction\n", 15 | "\n", 16 | "Welcome to the Dask Tutorial.\n", 17 | "\n", 18 | "Dask is a parallel computing library that scales the existing Python ecosystem. This tutorial will introduce Dask and parallel data analysis more generally.\n", 19 | "\n", 20 | "Dask can scale down to your laptop and up to a cluster. Here, we'll use an environment you setup on your laptop to analyze medium sized datasets in parallel locally." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Overview" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "Dask provides multi-core and distributed parallel execution on larger-than-memory datasets.\n", 35 | "\n", 36 | "We can think of Dask at a high and a low level\n", 37 | "\n", 38 | "* **High level collections:** Dask provides high-level Array, Bag, and DataFrame\n", 39 | " collections that mimic NumPy, lists, and Pandas but can operate in parallel on\n", 40 | " datasets that don't fit into memory. Dask's high-level collections are\n", 41 | " alternatives to NumPy and Pandas for large datasets.\n", 42 | "* **Low Level schedulers:** Dask provides dynamic task schedulers that\n", 43 | " execute task graphs in parallel. These execution engines power the\n", 44 | " high-level collections mentioned above but can also power custom,\n", 45 | " user-defined workloads. These schedulers are low-latency (around 1ms) and\n", 46 | " work hard to run computations in a small memory footprint. Dask's\n", 47 | " schedulers are an alternative to direct use of `threading` or\n", 48 | " `multiprocessing` libraries in complex cases or other task scheduling\n", 49 | " systems like `Luigi` or `IPython parallel`.\n", 50 | "\n", 51 | "Different users operate at different levels but it is useful to understand\n", 52 | "both.\n", 53 | "\n", 54 | "The Dask [use cases](https://stories.dask.org/en/latest/) provides a number of sample workflows where Dask should be a good fit." 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Prepare" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "You should clone this repository: \n", 69 | "\n", 70 | " git clone http://github.com/dask/dask-tutorial\n", 71 | "\n", 72 | "The included file `environment.yml` in the `binder` subdirectory contains a list of all of the packages needed to run this tutorial. To install them using `conda`, you can do\n", 73 | "\n", 74 | " conda env create -f binder/environment.yml\n", 75 | " conda activate dask-tutorial\n", 76 | " jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", 77 | " jupyter labextension install @bokeh/jupyter_bokeh\n", 78 | " \n", 79 | "Do this *before* running this notebook." 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "## Links" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "* Reference\n", 94 | " * [Docs](https://dask.org/)\n", 95 | " * [Examples](https://examples.dask.org/)\n", 96 | " * [Code](https://github.com/dask/dask/)\n", 97 | " * [Blog](https://blog.dask.org/)\n", 98 | "* Ask for help\n", 99 | " * [`dask`](http://stackoverflow.com/questions/tagged/dask) tag on Stack Overflow, for usage questions\n", 100 | " * [github issues](https://github.com/dask/dask/issues/new) for bug reports and feature requests\n", 101 | " * [gitter chat](https://gitter.im/dask/dask) for general, non-bug, discussion\n", 102 | " * Attend a live tutorial" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Tutorial Structure\n", 110 | "\n", 111 | "Each section is a Jupyter notebook. There's a mixture of text, code, and exercises.\n", 112 | "\n", 113 | "If you haven't used Jupyterlab, it's similar to the Jupyter Notebook. If you haven't used the Notebook, the quick intro is\n", 114 | "\n", 115 | "1. There are two modes: command and edit\n", 116 | "2. From command mode, press `Enter` to edit a cell (like this markdown cell)\n", 117 | "3. From edit mode, press `Esc` to change to command mode\n", 118 | "4. Press `shift+enter` to execute a cell and move to the next cell.\n", 119 | "\n", 120 | "The toolbar has commands for executing, converting, and creating cells.\n", 121 | "\n", 122 | "The layout of the tutorial will be as follows:\n", 123 | "- Foundations: an explanation of what Dask is, how it works, and how to use lower-level primitives to set up computations. Casual users may wish to skip this section, although we consider it useful knowledge for all users.\n", 124 | "- Distributed: information on running Dask on the distributed scheduler, which enables scale-up to distributed settings and enhanced monitoring of task operations. The distributed scheduler is now generally the recommended engine for executing task work, even on single workstations or laptops.\n", 125 | "- Collections: convenient abstractions giving a familiar feel to big data\n", 126 | " - bag: Python iterators with a functional paradigm, such as found in func/iter-tools and toolz - generalize lists/generators to big data; this will seem very familiar to users of PySpark's [RDD](http://spark.apache.org/docs/2.1.0/api/python/pyspark.html#pyspark.RDD)\n", 127 | " - array: massive multi-dimensional numerical data, with Numpy functionality\n", 128 | " - dataframes: massive tabular data, with Pandas functionality\n", 129 | " \n", 130 | "Whereas there is a wealth of information in the documentation, linked above, here we aim to give practical advice to aid your understanding and application of Dask in everyday situations. This means that you should not expect every feature of Dask to be covered, but the examples hopefully are similar to the kinds of work-flows that you have in mind.\n", 131 | "\n", 132 | "## Exercise: Print `Hello, world!`\n", 133 | "Each notebook will have exercises for you to solve. You'll be given a blank or partially completed cell, followed by a hidden cell with a solution. For example.\n", 134 | "\n", 135 | "\n", 136 | "Print the text \"Hello, world!\"." 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# Your code here" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "The next cell has the solution. Click the ellipses to expand the solution, and always make sure to run the solution cell,\n", 153 | "in case later sections of the notebook depend on the output from the solution." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "print(\"Hello, world!\")" 163 | ] 164 | } 165 | ], 166 | "metadata": { 167 | "anaconda-cloud": {}, 168 | "kernelspec": { 169 | "display_name": "Python 3", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.8.8" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 4 188 | } 189 | -------------------------------------------------------------------------------- /Dask/Cuadernos/prep.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import argparse 4 | import os 5 | from glob import glob 6 | import json 7 | import gzip 8 | import tarfile 9 | import urllib.request 10 | 11 | import h5py 12 | import numpy as np 13 | import pandas as pd 14 | from skimage.transform import resize 15 | 16 | from accounts import account_entries, account_params, json_entries 17 | 18 | import sources 19 | 20 | DATASETS = ["random", "weather", "accounts", "flights", "all"] 21 | here = os.path.dirname(__file__) 22 | data_dir = os.path.abspath(os.path.join(here, '../data')) 23 | 24 | 25 | def parse_args(args=None): 26 | parser = argparse.ArgumentParser(description='Downloads, generates and prepares data for the Dask tutorial.') 27 | parser.add_argument('--no-ssl-verify', dest='no_ssl_verify', action='store_true', 28 | default=False, help='Disables SSL verification.') 29 | parser.add_argument("--small", action="store_true", default=None, 30 | help="Whether to use smaller example datasets. Checks DASK_TUTORIAL_SMALL environment variable if not specified.") 31 | parser.add_argument("-d", "--dataset", choices=DATASETS, help="Datasets to generate.", default="all") 32 | 33 | return parser.parse_args(args) 34 | 35 | 36 | 37 | if not os.path.exists(data_dir): 38 | raise OSError('data/ directory not found, aborting data preparation. ' \ 39 | 'Restore it with "git checkout data" from the base ' \ 40 | 'directory.') 41 | 42 | 43 | def flights(small=None): 44 | start = time.time() 45 | flights_raw = os.path.join(data_dir, 'nycflights.tar.gz') 46 | flightdir = os.path.join(data_dir, 'nycflights') 47 | jsondir = os.path.join(data_dir, 'flightjson') 48 | if small is None: 49 | small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False)) 50 | 51 | if small: 52 | N = 500 53 | else: 54 | N = 10_000 55 | 56 | if not os.path.exists(flights_raw): 57 | print("- Downloading NYC Flights dataset... ", end='', flush=True) 58 | url = sources.flights_url 59 | urllib.request.urlretrieve(url, flights_raw) 60 | print("done", flush=True) 61 | 62 | if not os.path.exists(flightdir): 63 | print("- Extracting flight data... ", end='', flush=True) 64 | tar_path = os.path.join(data_dir, 'nycflights.tar.gz') 65 | with tarfile.open(tar_path, mode='r:gz') as flights: 66 | flights.extractall('data/') 67 | 68 | if small: 69 | for path in glob(os.path.join(data_dir, "nycflights", "*.csv")): 70 | with open(path, 'r') as f: 71 | lines = f.readlines()[:1000] 72 | 73 | with open(path, 'w') as f: 74 | f.writelines(lines) 75 | 76 | print("done", flush=True) 77 | 78 | if not os.path.exists(jsondir): 79 | print("- Creating json data... ", end='', flush=True) 80 | os.mkdir(jsondir) 81 | for path in glob(os.path.join(data_dir, 'nycflights', '*.csv')): 82 | prefix = os.path.splitext(os.path.basename(path))[0] 83 | df = pd.read_csv(path, nrows=N) 84 | df.to_json(os.path.join(data_dir, 'flightjson', prefix + '.json'), 85 | orient='records', lines=True) 86 | print("done", flush=True) 87 | else: 88 | return 89 | 90 | end = time.time() 91 | print("** Created flights dataset! in {:0.2f}s**".format(end - start)) 92 | 93 | def random_array(small=None): 94 | if small is None: 95 | small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False)) 96 | 97 | if small: 98 | blocksize = 5000 99 | else: 100 | blocksize = 1000000 101 | 102 | nblocks = 1000 103 | shape = nblocks * blocksize 104 | 105 | t0 = time.time() 106 | if os.path.exists(os.path.join(data_dir, 'random.hdf5')): 107 | return 108 | 109 | with h5py.File(os.path.join(data_dir, 'random.hdf5'), mode='w') as f: 110 | dset = f.create_dataset('/x', shape=(shape,), dtype='f4') 111 | for i in range(0, shape, blocksize): 112 | dset[i: i + blocksize] = np.random.exponential(size=blocksize) 113 | 114 | t1 = time.time() 115 | print("Created random data for array exercise in {:0.2f}s".format(t1 - t0)) 116 | 117 | 118 | def accounts_csvs(small=None): 119 | t0 = time.time() 120 | if small is None: 121 | small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False)) 122 | 123 | if small: 124 | num_files, n, k = 3, 10000, 100 125 | else: 126 | num_files, n, k = 3, 1000000, 500 127 | 128 | fn = os.path.join(data_dir, 'accounts.%d.csv' % (num_files - 1)) 129 | 130 | if os.path.exists(fn): 131 | return 132 | 133 | args = account_params(k) 134 | 135 | for i in range(num_files): 136 | df = account_entries(n, *args) 137 | df.to_csv(os.path.join(data_dir, 'accounts.%d.csv' % i), 138 | index=False) 139 | 140 | t1 = time.time() 141 | print("Created CSV acccouts in {:0.2f}s".format(t1 - t0)) 142 | 143 | 144 | def accounts_json(small=None): 145 | t0 = time.time() 146 | if small is None: 147 | small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False)) 148 | 149 | if small: 150 | num_files, n, k = 50, 10000, 250 151 | else: 152 | num_files, n, k = 50, 100000, 500 153 | fn = os.path.join(data_dir, 'accounts.%02d.json.gz' % (num_files - 1)) 154 | if os.path.exists(fn): 155 | return 156 | 157 | args = account_params(k) 158 | 159 | for i in range(num_files): 160 | seq = json_entries(n, *args) 161 | fn = os.path.join(data_dir, 'accounts.%02d.json.gz' % i) 162 | with gzip.open(fn, 'wb') as f: 163 | f.write(os.linesep.join(map(json.dumps, seq)).encode()) 164 | 165 | t1 = time.time() 166 | print("Created JSON acccouts in {:0.2f}s".format(t1 - t0)) 167 | 168 | 169 | def create_weather(small=None): 170 | t0 = time.time() 171 | if small is None: 172 | small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False)) 173 | 174 | if small: 175 | growth = 1 176 | else: 177 | growth = 32 178 | filenames = sorted(glob(os.path.join(data_dir, 'weather-small', '*.hdf5'))) 179 | 180 | if not filenames: 181 | ws_dir = os.path.join(data_dir, 'weather-small') 182 | raise ValueError('Did not find any hdf5 files in {}'.format(ws_dir)) 183 | 184 | if not os.path.exists(os.path.join(data_dir, 'weather-big')): 185 | os.mkdir(os.path.join(data_dir, 'weather-big')) 186 | 187 | if all(os.path.exists(fn.replace('small', 'big')) for fn in filenames): 188 | return 189 | 190 | for fn in filenames: 191 | with h5py.File(fn, mode='r') as f: 192 | x = f['/t2m'][:] 193 | 194 | if small: 195 | y = x 196 | chunks = (180, 180) 197 | else: 198 | y = resize(x, (x.shape[0] * growth, x.shape[1] * growth), mode='constant') 199 | chunks = (500, 500) 200 | 201 | out_fn = os.path.join(data_dir, 'weather-big', os.path.split(fn)[-1]) 202 | 203 | with h5py.File(out_fn, mode='w') as f: 204 | f.create_dataset('/t2m', data=y, chunks=chunks) 205 | t1 = time.time() 206 | print("Created weather dataset in {:0.2f}s".format(t1 - t0)) 207 | 208 | 209 | def main(args=None): 210 | args = parse_args(args) 211 | 212 | if (args.no_ssl_verify): 213 | print("- Disabling SSL Verification... ", end='', flush=True) 214 | import ssl 215 | ssl._create_default_https_context = ssl._create_unverified_context 216 | print("done", flush=True) 217 | 218 | if args.dataset == "random" or args.dataset == "all": 219 | random_array(args.small) 220 | if args.dataset == "weather" or args.dataset == "all": 221 | create_weather(args.small) 222 | if args.dataset == "accounts" or args.dataset == "all": 223 | accounts_csvs(args.small) 224 | accounts_json(args.small) 225 | if args.dataset == "flights" or args.dataset == "all": 226 | flights(args.small) 227 | 228 | 229 | if __name__ == '__main__': 230 | sys.exit(main()) 231 | -------------------------------------------------------------------------------- /Dask/images/merged_grid_search_graph.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | merged 11 | 12 | 13 | data 14 | 15 | Training Data 16 | 17 | 18 | vect1 19 | 20 | CountVectorizer 21 | - ngram_range=(1, 1) 22 | 23 | 24 | data->vect1 25 | 26 | 27 | 28 | 29 | tfidf_1_1 30 | 31 | TfidfTransformer 32 | - norm='l1' 33 | 34 | 35 | vect1->tfidf_1_1 36 | 37 | 38 | 39 | 40 | tfidf_1_2 41 | 42 | TfidfTransformer 43 | - norm='l2' 44 | 45 | 46 | vect1->tfidf_1_2 47 | 48 | 49 | 50 | 51 | sgd_1_1_1 52 | 53 | SGDClassifier 54 | - alpha=1e-3 55 | 56 | 57 | tfidf_1_1->sgd_1_1_1 58 | 59 | 60 | 61 | 62 | sgd_1_1_2 63 | 64 | SGDClassifier 65 | - alpha=1e-4 66 | 67 | 68 | tfidf_1_1->sgd_1_1_2 69 | 70 | 71 | 72 | 73 | sgd_1_1_3 74 | 75 | SGDClassifier 76 | - alpha=1e-5 77 | 78 | 79 | tfidf_1_1->sgd_1_1_3 80 | 81 | 82 | 83 | 84 | sgd_1_2_1 85 | 86 | SGDClassifier 87 | - alpha=1e-3 88 | 89 | 90 | tfidf_1_2->sgd_1_2_1 91 | 92 | 93 | 94 | 95 | sgd_1_2_2 96 | 97 | SGDClassifier 98 | - alpha=1e-4 99 | 100 | 101 | tfidf_1_2->sgd_1_2_2 102 | 103 | 104 | 105 | 106 | sgd_1_2_3 107 | 108 | SGDClassifier 109 | - alpha=1e-5 110 | 111 | 112 | tfidf_1_2->sgd_1_2_3 113 | 114 | 115 | 116 | 117 | best 118 | 119 | Choose Best Parameters 120 | 121 | 122 | sgd_1_1_1->best 123 | 124 | 125 | 126 | 127 | sgd_1_1_2->best 128 | 129 | 130 | 131 | 132 | sgd_1_1_3->best 133 | 134 | 135 | 136 | 137 | sgd_1_2_1->best 138 | 139 | 140 | 141 | 142 | sgd_1_2_2->best 143 | 144 | 145 | 146 | 147 | sgd_1_2_3->best 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /Dask/images/dask-dataframe.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 20 | 42 | 53 | 54 | 56 | 57 | 59 | image/svg+xml 60 | 62 | 63 | 64 | 65 | 66 | 71 | 78 | 85 | 92 | 99 | 106 | January, 2016 118 | Febrary, 2016 130 | March, 2016 142 | April, 2016 154 | May, 2016 166 | Pandas DataFrame 183 | } 195 | Dask DataFrame 212 | } 224 | 225 | 226 | -------------------------------------------------------------------------------- /Bases_Datos/Datos/EN_DEPARTAMENTO.csv: -------------------------------------------------------------------------------- 1 | SGL_PAIS;SGL_DEPARTAMENTO;SGL_REGION;TXT_NME_DEPARTAMENTO;TXT_NME_DEPARTAMENTO_FILTRO 2 | AFG;1;;Badahsan;BADAHSAN 3 | AFG;10;;Gazni;GAZNI 4 | AFG;11;;Hawst;HAWST 5 | AFG;13;;Herat;HERAT 6 | AFG;18;;Kunar;KUNAR 7 | AFG;8;;Faryab;FARYAB 8 | AFG;N/A;;No Aplica;NO APLICA 9 | AFS;N/A;;No Aplica;NO APLICA 10 | AHL;N/A;;No Aplica;NO APLICA 11 | ALB;3;;Devoll;DEVOLL 12 | ARA;N/A;;No Aplica;NO APLICA 13 | ARG;N/A;;No Aplica;NO APLICA 14 | ARL;N/A;;No Aplica;NO APLICA 15 | ARM;N/A;;No Aplica;NO APLICA 16 | AUS;N/A;;No Aplica;NO APLICA 17 | AUT;N/A;;No Aplica;NO APLICA 18 | AZE;N/A;;No Aplica;NO APLICA 19 | BEA;N/A;;No Aplica;NO APLICA 20 | BEL;N/A;;No Aplica;NO APLICA 21 | BGD;N/A;;No Aplica;NO APLICA 22 | BOL;N/A;;No Aplica;NO APLICA 23 | BOS;N/A;;No Aplica;NO APLICA 24 | BRA;AL;;Alagoas;ALAGOAS 25 | BRA;AM;;Amazonas;AMAZONAS 26 | BRA;BA;;Bahia;BAHIA 27 | BRA;CE;;Ceará;CEARA 28 | BRA;DF;;Distrito Federal;DISTRITO FEDERAL 29 | BRA;ES;;Espírito Santo;ESPIRITO SANTO 30 | BRA;GO;;Goiás;GOIAS 31 | BRA;MA;;Maranhão;MARANHAO 32 | BRA;MG;;Minas Gerais;MINAS GERAIS 33 | BRA;MS;;Mato Grosso do Sul;MATO GROSSO DO SUL 34 | BRA;MT;;Mato Grosso;MATO GROSSO 35 | BRA;N/A;;No Aplica;NO APLICA 36 | BRA;PA;;Pará;PARA 37 | BRA;PB;;Paraíba;PARAIBA 38 | BRA;PE;;Pernambuco;PERNAMBUCO 39 | BRA;PI;;Piauí;PIAUI 40 | BRA;PR;;Paraná;PARANA 41 | BRA;RJ;;Rio de Janeiro;RIO DE JANEIRO 42 | BRA;RN;;Rio Grande do Norte;RIO GRANDE DO NORTE 43 | BRA;RO;;Rondônia;RONDONIA 44 | BRA;RR;;Roraima;RORAIMA 45 | BRA;RS;;Rio Grande do Sul;RIO GRANDE DO SUL 46 | BRA;SC;;Santa Catarina;SANTA CATARINA 47 | BRA;SE;;Sergipe;SERGIPE 48 | BRA;SP;;São Paulo;SAO PAULO 49 | BRA;TO;;Tocantins;TOCANTINS 50 | BRN;N/A;;No Aplica;NO APLICA 51 | BUL;N/A;;No Aplica;NO APLICA 52 | BUR;N/A;;No Aplica;NO APLICA 53 | CAM;N/A;;No Aplica;NO APLICA 54 | CAN;N/A;;No Aplica;NO APLICA 55 | CAZ;N/A;;No Aplica;NO APLICA 56 | CHA;N/A;;No Aplica;NO APLICA 57 | CHL;1;;Tarapacá;TARAPACA 58 | CHL;10;;Los lagos;LOS LAGOS 59 | CHL;11;;Aysén;AYSEN 60 | CHL;12;;Magallanes y Ant.Chilena;MAGALLANES Y ANTCHILENA 61 | CHL;13;;Metropolitana de Santiago;METROPOLITANA DE SANTIAGO 62 | CHL;2;;Antofagasta ;ANTOFAGASTA 63 | CHL;3;;Atacama;ATACAMA 64 | CHL;4;;Coquimbo ;COQUIMBO 65 | CHL;5;;Valparaíso;VALPARAISO 66 | CHL;6;;Libertador B. O Higgins;LIBERTADOR B OHIGGINS 67 | CHL;7;;Maule;MAULE 68 | CHL;8;;Biobio;BIOBIO 69 | CHL;9;;Araucanía;ARAUCANIA 70 | CHL;N/A;;No Aplica;NO APLICA 71 | CHN;N/A;;No Aplica;NO APLICA 72 | CHP;N/A;;No Aplica;NO APLICA 73 | CIN;N/A;;No Aplica;NO APLICA 74 | COL;AM;AM;Amazonas;AMAZONAS 75 | COL;AN;NW;Antioquia;ANTIOQUIA 76 | COL;AR;OR;Arauca;ARAUCA 77 | COL;AT;CA;Atlantico;ATLANTICO 78 | COL;BL;CA;Bolivar;BOLIVAR 79 | COL;BY;CE;Boyaca;BOYACA 80 | COL;CA;PA;Cauca;CAUCA 81 | COL;CE;CA;Cesar;CESAR 82 | COL;CH;PA;Choco;CHOCO 83 | COL;CL;NW;Caldas;CALDAS 84 | COL;CO;CA;Cordoba;CORDOBA 85 | COL;CQ;AM;Caqueta;CAQUETA 86 | COL;CS;OR;Casanare;CASANARE 87 | COL;CU;CE;Cundinamarca;CUNDINAMARCA 88 | COL;DC;DC;Distrito Capital;DISTRITO CAPITAL 89 | COL;GA;AM;Guaviare;GUAVIARE 90 | COL;GJ;CA;La Guajira;LA GUAJIRA 91 | COL;GN;AM;Guainia;GUAINIA 92 | COL;HU;CE;Huila;HUILA 93 | COL;MA;CA;Magdalena;MAGDALENA 94 | COL;ME;OR;Meta;META 95 | COL;N/A;;No Aplica;NO APLICA 96 | COL;NA;PA;Nariño;NARINO 97 | COL;NO;CE;Norte Santander;NORTE SANTANDER 98 | COL;PU;AM;Putumayo;PUTUMAYO 99 | COL;QU;NW;Quindio;QUINDIO 100 | COL;RI;NW;Risaralda;RISARALDA 101 | COL;SN;CE;Santander;SANTANDER 102 | COL;SP;CA;San Andres;SAN ANDRES 103 | COL;SU;CA;Sucre;SUCRE 104 | COL;TO;CE;Tolima;TOLIMA 105 | COL;VA;AM;Vaupes;VAUPES 106 | COL;VI;OR;Vichada;VICHADA 107 | COL;VL;PA;Valle;VALLE 108 | CON;N/A;;No Aplica;NO APLICA 109 | CRC;N/A;;No Aplica;NO APLICA 110 | CRO;N/A;;No Aplica;NO APLICA 111 | CRS;N/A;;No Aplica;NO APLICA 112 | CUB;CA;;Ciego de Avila;CIEGO DE AVILA 113 | CUB;CG;;Cienfuegos;CIENFUEGOS 114 | CUB;CH;;Ciudad de la Habana;CIUDAD DE LA HABANA 115 | CUB;CM;;Camaguey;CAMAGUEY 116 | CUB;GM;;Granma;GRANMA 117 | CUB;GT;;Guantánamo;GUANTANAMO 118 | CUB;H;;La Habana;LA HABANA 119 | CUB;HG;;Holguín;HOLGUIN 120 | CUB;LT;;Las Tunas;LAS TUNAS 121 | CUB;MZ;;Matanzas;MATANZAS 122 | CUB;N/A;;No Aplica;NO APLICA 123 | CUB;PR;;Pinar del Río;PINAR DEL RIO 124 | CUB;SC;;Santiago de Cuba;SANTIAGO DE CUBA 125 | CUB;SS;;Sancti Spiritus;SANCTI SPIRITUS 126 | CUB;VC;;Villa Clara;VILLA CLARA 127 | DIN;N/A;;No Aplica;NO APLICA 128 | DOM;N/A;;No Aplica;NO APLICA 129 | EAU;N/A;;No Aplica;NO APLICA 130 | EGI;N/A;;No Aplica;NO APLICA 131 | ELS;N/A;;No Aplica;NO APLICA 132 | EQU;N/A;;No Aplica;NO APLICA 133 | ESC;N/A;;No Aplica;NO APLICA 134 | ESP;N/A;;No Aplica;NO APLICA 135 | EST;N/A;;No Aplica;NO APLICA 136 | ETP;N/A;;No Aplica;NO APLICA 137 | EUA;N/A;;No Aplica;NO APLICA 138 | FIL;N/A;;No Aplica;NO APLICA 139 | FIN;N/A;;No Aplica;NO APLICA 140 | FRA;N/A;;No Aplica;NO APLICA 141 | GAB;N/A;;No Aplica;NO APLICA 142 | GAL;N/A;;No Aplica;NO APLICA 143 | GBR;N/A;;No Aplica;NO APLICA 144 | GEO;N/A;;No Aplica;NO APLICA 145 | GFR;N/A;;No Aplica;NO APLICA 146 | GRE;N/A;;No Aplica;NO APLICA 147 | GUA;N/A;;No Aplica;NO APLICA 148 | GUI;N/A;;No Aplica;NO APLICA 149 | HKG;N/A;;No Aplica;NO APLICA 150 | HOL;N/A;;No Aplica;NO APLICA 151 | HON;N/A;;No Aplica;NO APLICA 152 | HTI;N/A;;No Aplica;NO APLICA 153 | HUN;N/A;;No Aplica;NO APLICA 154 | IDN;N/A;;No Aplica;NO APLICA 155 | IND;N/A;;No Aplica;NO APLICA 156 | ING;N/A;;No Aplica;NO APLICA 157 | IRA;N/A;;No Aplica;NO APLICA 158 | IRL;N/A;;No Aplica;NO APLICA 159 | IRN;N/A;;No Aplica;NO APLICA 160 | IRQ;N/A;;No Aplica;NO APLICA 161 | ISL;N/A;;No Aplica;NO APLICA 162 | ISR;N/A;;No Aplica;NO APLICA 163 | ITA;N/A;;No Aplica;NO APLICA 164 | IUG;N/A;;No Aplica;NO APLICA 165 | JAM;N/A;;No Aplica;NO APLICA 166 | JAP;N/A;;No Aplica;NO APLICA 167 | KWT;N/A;;No Aplica;NO APLICA 168 | LBN;N/A;;No Aplica;NO APLICA 169 | LET;N/A;;No Aplica;NO APLICA 170 | LIT;N/A;;No Aplica;NO APLICA 171 | LUX;N/A;;No Aplica;NO APLICA 172 | MAC;N/A;;No Aplica;NO APLICA 173 | MAL;N/A;;No Aplica;NO APLICA 174 | MAR;N/A;;No Aplica;NO APLICA 175 | MAU;N/A;;No Aplica;NO APLICA 176 | MBQ;N/A;;No Aplica;NO APLICA 177 | MEX;AG;;Aguascalientes;AGUASCALIENTES 178 | MEX;AO;;Oaxaca;OAXACA 179 | MEX;BC;;Baja California;BAJA CALIFORNIA 180 | MEX;Bs;;Baja California Sur;BAJA CALIFORNIA SUR 181 | MEX;CH;;Chihuahua;CHIHUAHUA 182 | MEX;CL;;Colima;COLIMA 183 | MEX;CO;;Coahuila;COAHUILA 184 | MEX;CS;;Chiapas;CHIAPAS 185 | MEX;DF;;Ciudad de México;CIUDAD DE MEXICO 186 | MEX;DG;;Durango;DURANGO 187 | MEX;ED;;Estado de México;ESTADO DE MEXICO 188 | MEX;GR;;Guerrero;GUERRERO 189 | MEX;GT;;Guanajuato;GUANAJUATO 190 | MEX;HG;;Hidalgo;HIDALGO 191 | MEX;JA;;Jalisco;JALISCO 192 | MEX;MI;;Michoacán;MICHOACAN 193 | MEX;MO;;Morelos;MORELOS 194 | MEX;N/A;;No Aplica;NO APLICA 195 | MEX;NA;;Nayarit;NAYARIT 196 | MEX;NL;;Nuevo León;NUEVO LEON 197 | MEX;PU;;Puebla;PUEBLA 198 | MEX;QO;;Quintana Roo;QUINTANA ROO 199 | MEX;QR;;Querétaro;QUERETARO 200 | MEX;SI;;Sinaloa;SINALOA 201 | MEX;SL;;San Luis Potosí;SAN LUIS POTOSI 202 | MEX;SO;;Sonora;SONORA 203 | MEX;TA;;Tamaulipas;TAMAULIPAS 204 | MEX;TB;;Tabasco;TABASCO 205 | MEX;TL;;Tlaxcala;TLAXCALA 206 | MEX;VE;CE;Veracruz;VERACRUZ 207 | MEX;YU;;Yucatán;YUCATAN 208 | MEX;ZA;;Zacatecas;ZACATECAS 209 | MOL;N/A;;No Aplica;NO APLICA 210 | N/A;N/A;;No Aplica;NO APLICA 211 | NAM;N/A;;No Aplica;NO APLICA 212 | NGA;N/A;;No Aplica;NO APLICA 213 | NIC;N/A;;No Aplica;NO APLICA 214 | NOR;N/A;;No Aplica;NO APLICA 215 | NPL;N/A;;No Aplica;NO APLICA 216 | NZL;N/A;;No Aplica;NO APLICA 217 | PAN;3;;Colón;COLON 218 | PAN;4;;Chiriquí;CHIRIQUI 219 | PAN;6;;Herrera;HERRERA 220 | PAN;8;;Panamá;PANAMA 221 | PAN;9;;Veraguas;VERAGUAS 222 | PAN;N/A;;No Aplica;NO APLICA 223 | PAQ;N/A;;No Aplica;NO APLICA 224 | PER;N/A;;No Aplica;NO APLICA 225 | POL;N/A;;No Aplica;NO APLICA 226 | POR;N/A;;No Aplica;NO APLICA 227 | PRG;N/A;;No Aplica;NO APLICA 228 | PTR;N/A;;No Aplica;NO APLICA 229 | QUE;N/A;;No Aplica;NO APLICA 230 | RFA;1;BA;Aichach-Friedberg;AICHACH FRIEDBERG 231 | RFA;10;BA;Bad Tölz-Wolfratshausen;BAD TOLZ WOLFRATSHAUSEN 232 | RFA;11;BA;Baja Algovia;BAJA ALGOVIA 233 | RFA;14;BA;Berchtesgadener Land;BERCHTESGADENER LAND 234 | RFA;18;BA;Danubio-Ries;DANUBIO RIES 235 | RFA;19;BA;Deggendorf;DEGGENDORF 236 | RFA;2;BA;Algovia Oriental;ALGOVIA ORIENTAL 237 | RFA;25;BA;Garmisch-Partenkirchen;GARMISCH PARTENKIRCHEN 238 | RFA;27;BA;Kelheim;KELHEIM 239 | RFA;29;BA;Main-Spessart;MAIN SPESSART 240 | RFA;32;BA;Nuevo Ulm;NUEVO ULM 241 | RFA;36;BA;Rosenheim;ROSENHEIM 242 | RFA;39;BA;Starnberg;STARNBERG 243 | RFA;4;BA;Altötting;ALTOTTING 244 | RFA;41;BA;Weißenburg-Gunzenhausen;WEIBENBURG GUNZENHAUSEN 245 | RFA;43;BE;Charlottenburg-Wilmersdor;CHARLOTTENBURG WILMERSDOR 246 | RFA;49;BE;Treptow-Köpenick;TREPTOW KOPENICK 247 | RFA;50;BR;Barnim;BARNIM 248 | RFA;52;BR;Elbe-Elster;ELBE ELSTER 249 | RFA;54;BR;Märkisch-Oderland;MARKISCH ODERLAND 250 | RFA;57;BR;Oder-Spree;ODER SPREE 251 | RFA;59;BR;Potsdam-Mittelmark;POTSDAM MITTELMARK 252 | RFA;6;BA;Ansbach;ANSBACH 253 | RFA;61;BR;Spree-Neiße;SPREE NEIBE 254 | RFA;64;BW;Alb-Donau-Kreis;ALB DONAU KREIS 255 | RFA;67;BW;Bodensee;BODENSEE 256 | RFA;68;BW;Breisgau-Hochschwarzwald;BREISGAU HOCHSCHWARZWALD 257 | RFA;71;BW;Enz;ENZ 258 | RFA;75;BW;Heidenheim;HEIDENHEIM 259 | RFA;78;BW;Karlsruhe;KARLSRUHE 260 | RFA;79;BW;Konstanz;KONSTANZ 261 | RFA;8;BA;Augsburgo;AUGSBURGO 262 | RFA;81;BW;Ludwigsburg;LUDWIGSBURG 263 | RFA;82;BW;Ortenaukreis;ORTENAUKREIS 264 | RFA;84;BW;Rastatt;RASTATT 265 | RFA;85;BW;Schwäbisch Hall;SCHWABISCH HALL 266 | RFA;86;BW;Tubinga;TUBINGA 267 | RFA;9;BA;Bad Kissingen;BAD KISSINGEN 268 | RFA;N/A;;No Aplica;NO APLICA 269 | ROM;N/A;;No Aplica;NO APLICA 270 | RSS;N/A;;No Aplica;NO APLICA 271 | RUA;N/A;;No Aplica;NO APLICA 272 | SEN;N/A;;No Aplica;NO APLICA 273 | SUA;N/A;;No Aplica;NO APLICA 274 | SUE;N/A;;No Aplica;NO APLICA 275 | SUI;N/A;;No Aplica;NO APLICA 276 | SUR;N/A;;No Aplica;NO APLICA 277 | SVN;N/A;;No Aplica;NO APLICA 278 | TAI;N/A;;No Aplica;NO APLICA 279 | TCH;N/A;;No Aplica;NO APLICA 280 | TGO;N/A;;No Aplica;NO APLICA 281 | TRT;N/A;;No Aplica;NO APLICA 282 | TUN;N/A;;No Aplica;NO APLICA 283 | TUR;N/A;;No Aplica;NO APLICA 284 | UCR;N/A;;No Aplica;NO APLICA 285 | UGA;N/A;;No Aplica;NO APLICA 286 | URU;N/A;;No Aplica;NO APLICA 287 | UZB;N/A;;No Aplica;NO APLICA 288 | VEN;AN;;Anzoategui;ANZOATEGUI 289 | VEN;AP;;Apure;APURE 290 | VEN;AR;;Aragua;ARAGUA 291 | VEN;BA;;Barinas;BARINAS 292 | VEN;BO;;Bolivar;BOLIVAR 293 | VEN;CA;;Carabobo;CARABOBO 294 | VEN;CO;;Cojedes;COJEDES 295 | VEN;DF;;Distrito Federal;DISTRITO FEDERAL 296 | VEN;FA;;Falcon;FALCON 297 | VEN;LA;;Lara;LARA 298 | VEN;ME;;Merida;MERIDA 299 | VEN;MI;;Miranda;MIRANDA 300 | VEN;MO;;Monagas;MONAGAS 301 | VEN;N/A;;No Aplica;NO APLICA 302 | VEN;NE;;Nueva Esparta;NUEVA ESPARTA 303 | VEN;PO;;Portuguesa;PORTUGUESA 304 | VEN;SU;;Sucre;SUCRE 305 | VEN;TA;;Tachira;TACHIRA 306 | VEN;TR;;Trujillo;TRUJILLO 307 | VEN;ZU;;Zulia;ZULIA 308 | VTN;N/A;;No Aplica;NO APLICA 309 | ZAN;N/A;;No Aplica;NO APLICA 310 | ZAR;N/A;;No Aplica;NO APLICA 311 | ZIN;N/A;;No Aplica;NO APLICA 312 | -------------------------------------------------------------------------------- /Bases_Datos/Notebooks/Bases_Datos_SQL-3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "respective-miniature", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "
\n", 11 | " \n", 12 | "
\n", 13 | "\n", 14 | "# Aprendizaje Profundo" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "id": "blond-video", 20 | "metadata": {}, 21 | "source": [ 22 | "#
SQL -Base de datos Sakila
" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "confident-aruba", 28 | "metadata": {}, 29 | "source": [ 30 | "## Profesores" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "buried-raising", 36 | "metadata": {}, 37 | "source": [ 38 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 39 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com " 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "sized-scholar", 45 | "metadata": {}, 46 | "source": [ 47 | "## Asesora Medios y Marketing digital\n", 48 | " " 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "convinced-enzyme", 54 | "metadata": {}, 55 | "source": [ 56 | "4. Maria del Pilar Montenegro, pmontenegro88@gmail.com " 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "id": "secondary-accountability", 62 | "metadata": {}, 63 | "source": [ 64 | "## Contenido" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "boxed-jimmy", 70 | "metadata": {}, 71 | "source": [ 72 | "* [Introducción](#Introducción)\n", 73 | "* [Instalación](#Instalación)\n", 74 | "* [Inspección General de la base de datos](#Inspección-General-de-la-base-de-datos)\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "id": "prime-secret", 80 | "metadata": {}, 81 | "source": [ 82 | "## Introducción" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "id": "unauthorized-essence", 88 | "metadata": {}, 89 | "source": [ 90 | "La base de datos de muestra de Sakila fue desarrollada inicialmente por Mike Hillyer, un ex miembro del equipo de documentación de MySQL AB, y está destinada a proporcionar un esquema estándar que se puede utilizar para ejemplos en libros, tutoriales, artículos, muestras, etc. La base de datos de muestra de Sakila también sirve para resaltar características de MySQL como Vistas, Procedimientos almacenados y Disparadores." 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "id": "careful-diving", 96 | "metadata": {}, 97 | "source": [ 98 | "## Instalación" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "id": "indian-potato", 104 | "metadata": {}, 105 | "source": [ 106 | "La base de datos de muestra de Sakila está disponible en [sakila download](https://dev.mysql.com/doc/index-other.html). El archivo descargable está disponible en un archivo tar comprimido o en formato Zip. El archivo comprimido contiene tres archivos: sakila-schema.sql, sakila-data.sql, and sakila.mwb.\n", 107 | "\n", 108 | "Descarge la base Sakila en uno de los formatos TAR o ZIP." 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "brutal-console", 114 | "metadata": {}, 115 | "source": [ 116 | "Sakila contiene comentarios específicos de la versión de MySQL, ya que el esquema y los datos de sakila dependen de la versión de su servidor MySQL. Por ejemplo, el servidor MySQL 5.7.5 agregó soporte para la indexación de datos espaciales a InnoDB, por lo que la tabla de direcciones incluirá una columna de ubicación con reconocimiento espacial para MySQL 5.7.5 y superior." 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "id": "institutional-damage", 122 | "metadata": {}, 123 | "source": [ 124 | "El archivo sakila-schema.sql contiene todas las declaraciones CREATE necesarias para crear la estructura de la base de datos de Sakila, incluidas tablas, vistas, procedimientos almacenados y desencadenadores.\n", 125 | "\n", 126 | "El archivo sakila-data.sql contiene las instrucciones INSERT necesarias para completar la estructura creada por el archivo sakila-schema.sql, junto con las definiciones de los desencadenantes que deben crearse después de la carga de datos inicial.\n", 127 | "\n", 128 | "El archivo sakila.mwb es un modelo de datos de MySQL Workbench que puede abrir dentro de MySQL Workbench para examinar la estructura de la base de datos. Para obtener más información, consulte MySQL Workbench.\n", 129 | "\n", 130 | "Para instalar la base de datos de muestra de Sakila, siga estos pasos:" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "educated-austin", 136 | "metadata": {}, 137 | "source": [ 138 | "1. Extraiga el archivo de instalación en una ubicación temporal como C: \\ temp \\ o / tmp /. Cuando descomprime el archivo, crea un directorio llamado sakila-db que contiene los archivos sakila-schema.sql y sakila-data.sql.\n", 139 | "\n", 140 | "2. Conéctese al servidor MariaDB o MySQL desde la terminal, según sea su caso.\n" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "id": "occupational-bloom", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "mysql -u root -p" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "id": "unnecessary-mentor", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "mariadb -u root -p" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "id": "medical-channel", 166 | "metadata": {}, 167 | "source": [ 168 | "Ejecute el script sakila-schema.sql para crear la estructura de la base de datos y ejecute el script sakila-data.sql para completar la estructura de la base de datos, utilizando los siguientes comandos: (reemplace /temp/ por la carpeta ebn donde haya descargado y descomprimido los archivos)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "id": "packed-minority", 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "SOURCE /temp/sakila-db/sakila-schema.sql;" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "id": "monetary-theta", 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "SOURCE /temp/sakila-db/sakila-data.sql;" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "id": "angry-death", 194 | "metadata": {}, 195 | "source": [ 196 | "Ya estamos listos. Verifiquemos" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "id": "comic-council", 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "USE sakila;" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "id": "immune-philadelphia", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "SHOW FULL TABLES;" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "id": "chinese-conflict", 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "SELECT COUNT(*) FROM film;" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "id": "herbal-welcome", 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "SELECT COUNT(*) FROM film_text;" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "id": "activated-paste", 242 | "metadata": {}, 243 | "source": [ 244 | "### Esquema de la base de datos" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "id": "liable-elephant", 250 | "metadata": {}, 251 | "source": [ 252 | "El esquema de la base de datos se muestra en el siguiente diagrama producido por [worbench](https://www.mysql.com/products/workbench/)." 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "id": "naval-storage", 258 | "metadata": {}, 259 | "source": [ 260 | "![Esquema sakila](../Imagenes/sakila-skema_e_r.png)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "id": "colonial-forty", 266 | "metadata": {}, 267 | "source": [ 268 | "### Ejercicio" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "id": "existing-being", 274 | "metadata": {}, 275 | "source": [ 276 | "Use [DBeaver](https://dbeaver.io/) para reproducir el diagrama." 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "id": "sustained-egyptian", 282 | "metadata": {}, 283 | "source": [ 284 | "## Inspección General de la base de datos " 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "id": "returning-petersburg", 290 | "metadata": {}, 291 | "source": [ 292 | "Revise en este enlace los principales aspectos de la base de datos: [descripción de la base de datos](https://www3.ntu.edu.sg/home/ehchua/programming/sql/SampleDatabases.html#zz-2.)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "id": "sustainable-minute", 298 | "metadata": {}, 299 | "source": [ 300 | "### Consultando la base de datos" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "id": "greenhouse-belgium", 306 | "metadata": {}, 307 | "source": [ 308 | "1. Lista de nombre completo de actores ordenada por appelido\n", 309 | "2. Número de actores por categoría del film, orden descendiente\n", 310 | "3. Actor que más ventas genera por tienda (store)\n", 311 | "4. Ventas totales por tienda" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "id": "stretch-third", 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "SELECT CONCAT(s.first_name, _utf8' ', s.last_name) AS Actor \n", 322 | "FROM actor as s\n", 323 | "order by s.last_name\n", 324 | "limit 10;" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "id": "sorted-helen", 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "SELECT c.name as categoria, COUNT(actor_id) AS numero_actores\n", 335 | "FROM category AS c\n", 336 | "JOIN film_category as fc ON c.category_id = fc.category_id\n", 337 | "JOIN film as f ON fc.film_id = f.film_id\n", 338 | "JOIN film_actor as fa ON f.film_id = fa.film_id\n", 339 | "GROUP BY categoria\n", 340 | "ORDER BY numero_actores DESC;" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "id": "useful-distinction", 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "SELECT d.address as tienda,\n", 351 | " CONCAT(a.first_name, _utf8' ', a.last_name) AS actor_estrella,\n", 352 | " MAX(p.amount) as valor\n", 353 | "FROM actor AS a\n", 354 | "JOIN film_actor AS fa ON a.actor_id = fa.actor_id\n", 355 | "JOIN film AS f ON fa.film_id = f.film_id\n", 356 | "JOIN inventory AS i ON f.film_id = i.film_id\n", 357 | "JOIN rental AS r ON i.inventory_id = r.inventory_id\n", 358 | "JOIN payment AS p ON r.rental_id = p.rental_id\n", 359 | "JOIN store AS s ON i.store_id = s.store_id\n", 360 | "JOIN address AS d ON s.address_id = d.address_id\n", 361 | "GROUP BY tienda;" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "id": "ranging-hearing", 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "select d.address as tienda,\n", 372 | " SUM(p.amount) as valor\n", 373 | "from address as d\n", 374 | "join store as s on s.address_id = d.address_id\n", 375 | "join inventory as i on i.store_id = s.store_id\n", 376 | "join rental as r on r.inventory_id = i.inventory_id\n", 377 | "join payment as p on p.rental_id = r.rental_id\n", 378 | "group by tienda;" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "id": "passing-being", 384 | "metadata": {}, 385 | "source": [ 386 | "### Los actores que más actuan" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "id": "derived-cradle", 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [ 396 | "select CONCAT(a.first_name, _utf8' ', a.last_name) as actor,\n", 397 | " count(a.actor_id) as numero_actuaciones\n", 398 | "from actor as a\n", 399 | "join film_actor as fa on a.actor_id = fa.actor_id\n", 400 | "group by (a.actor_id)\n", 401 | "having numero_actuaciones>15\n", 402 | "order by numero_actuaciones desc\n", 403 | ";" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "id": "failing-digit", 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "select CONCAT(f.title,_utf8' ', f.description)\n", 414 | "from film as f\n", 415 | "limit 10;" 416 | ] 417 | } 418 | ], 419 | "metadata": { 420 | "kernelspec": { 421 | "display_name": "MariaDB", 422 | "language": "SQL", 423 | "name": "mariadb_kernel" 424 | }, 425 | "language_info": { 426 | "file_extension": ".sql", 427 | "mimetype": "text/plain", 428 | "name": "SQL" 429 | } 430 | }, 431 | "nbformat": 4, 432 | "nbformat_minor": 5 433 | } 434 | -------------------------------------------------------------------------------- /Dask/Cuadernos/10_coiled_quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "#
Diplomado en Big Data
" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#
Ejecución con un cluster en la nube: Coiled
" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Profesores" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 38 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com " 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Asesora Medios y Marketing digital\n", 46 | " " 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "4. Maria del Pilar Montenegro, pmontenegro88@gmail.com " 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "## Contenido" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "\n", 68 | "* [Introducción](#Introducción)\n", 69 | "* [Lanzar un cluster](#Lanzar-un-cluster)\n", 70 | "* [Analizar datos en la nube](#Analizar-datos-en-la-nube)\n", 71 | "* [Detener un cluster](#Detener-un-cluster)\n", 72 | "* [Entornos de software](#Entornos-de-software)\n", 73 | "* [Crear un entorno de software](#Crear-un-entorno-de-software)\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Fuente" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "Esta es una traducción libre del tutorial disponible en [coiled-tutorial](https://cloud.coiled.io/examples/notebooks)." 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Introducción" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "### Instalación de Coiled" 102 | ] 103 | }, 104 | { 105 | "cell_type": "raw", 106 | "metadata": {}, 107 | "source": [ 108 | "conda create --name coiled python=3.8 jupyterlab\n", 109 | "conda activate coiled\n", 110 | "conda install -c conda-forge coiled dask\n", 111 | "conda upgrade coiled dask distributed" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## Lanzar un cluster" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "El primer paso es hacer girar un Dask Cluster. En Coiled, esto se hace creando una instancia `coiled.Cluster`, hay [ argumentos clave](https://docs.coiled.io/user_guide/api.html#coiled.Cluster) que puede utilizar para especificar más los detalles de su clúster. Por favor lea laa [cluster creation documentation](https://docs.coiled.io/user_guide/cluster_creation.html) para saber más.\n", 126 | "\n", 127 | "Tenga en cuenta que le daremos un nombre a este clúster, si no especifica este argumento de palabra clave, los clústeres recibirán un nombre único generado aleatoriamente." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 1, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stderr", 137 | "output_type": "stream", 138 | "text": [ 139 | "/home/alvaro/anaconda3/envs/coiled/lib/python3.8/site-packages/rich/live.py:227: UserWarning: install \"ipywidgets\" for Jupyter support\n", 140 | " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" 141 | ] 142 | }, 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "Found software environment build\n" 148 | ] 149 | }, 150 | { 151 | "data": { 152 | "text/html": [ 153 | "
\n"
154 |       ],
155 |       "text/plain": []
156 |      },
157 |      "metadata": {},
158 |      "output_type": "display_data"
159 |     }
160 |    ],
161 |    "source": [
162 |     "import coiled\n",
163 |     "\n",
164 |     "cluster = coiled.Cluster(name=\"quickstart-example\", n_workers=10)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "Una vez que se ha creado un clúster (puede ver el estado en su [Panel de control en espiral] (https://cloud.coiled.io/)), puede conectar Dask al clúster creando una instancia de *cliente distribuido*."
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "from dask.distributed import Client\n",
181 |     "\n",
182 |     "client = Client(cluster)\n",
183 |     "client"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "## Analizar datos en la nube"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "Ahora que tenemos nuestro clúster en ejecución y Dask conectado a él, ejecutemos un cálculo. Este ejemplo ejecutará el cálculo en aproximadamente 84 millones de filas."
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "import dask.dataframe as dd\n",
207 |     "\n",
208 |     "df = dd.read_csv(\n",
209 |     "    \"s3://nyc-tlc/trip data/yellow_tripdata_2019-*.csv\",\n",
210 |     "    dtype={\n",
211 |     "        \"payment_type\": \"UInt8\",\n",
212 |     "        \"VendorID\": \"UInt8\",\n",
213 |     "        \"passenger_count\": \"UInt8\",\n",
214 |     "        \"RatecodeID\": \"UInt8\",\n",
215 |     "    },\n",
216 |     "    storage_options={\"anon\": True},\n",
217 |     "    blocksize=\"16 MiB\",\n",
218 |     ").persist()\n",
219 |     "\n",
220 |     "df.groupby(\"passenger_count\").tip_amount.mean().compute()"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "## Detener un cluster"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "De forma predeterminada, los clústeres se cerrarán después de 20 minutos de inactividad. Puede detener un clúster presionando el botón de parada en el [Coiled dashboard](https://cloud.coiled.io/). Alternativamente, podemos obtener una lista de todos los clústeres en ejecución y usar el nombre del clúster para detenerlo."
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 3,
240 |    "metadata": {},
241 |    "outputs": [
242 |     {
243 |      "data": {
244 |       "text/plain": [
245 |        "{'quickstart-example': {'id': 18813,\n",
246 |        "  'status': 'running',\n",
247 |        "  'account': 'alvaro-montenegro',\n",
248 |        "  'dashboard_address': 'http://ec2-100-26-244-131.compute-1.amazonaws.com:8787',\n",
249 |        "  'configuration': 310,\n",
250 |        "  'options': {'region': 'us-east-1',\n",
251 |        "   'credentials': {},\n",
252 |        "   'account_role': None,\n",
253 |        "   'target_environment': 'dev',\n",
254 |        "   'spot': True},\n",
255 |        "  'address': 'tls://ec2-100-26-244-131.compute-1.amazonaws.com:8786'}}"
256 |       ]
257 |      },
258 |      "execution_count": 3,
259 |      "metadata": {},
260 |      "output_type": "execute_result"
261 |     }
262 |    ],
263 |    "source": [
264 |     "coiled.list_clusters()"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "El comando `list_clusters` devuelve un diccionario con el nombre del clúster utilizado como clave. Podemos tomar eso y luego llamar al comando `coiled.delete_cluster()` para detener el clúster en ejecución, y `client.close()` para cerrar el cliente."
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 4,
277 |    "metadata": {},
278 |    "outputs": [
279 |     {
280 |      "data": {
281 |       "text/html": [
282 |        "
Cluster deleted successfully.\n",
283 |        "
\n" 284 | ], 285 | "text/plain": [ 286 | "\u001b[32mCluster deleted successfully.\u001b[0m\n" 287 | ] 288 | }, 289 | "metadata": {}, 290 | "output_type": "display_data" 291 | } 292 | ], 293 | "source": [ 294 | "coiled.delete_cluster(name=\"quickstart-example\")\n", 295 | "client.close()" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "Ahora puede regresa a [Coiled dashboard](https://cloud.coiled.io/) y verificar que el cluster está deteniendose/detenido" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "## Entornos de software\n" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "Los entornos de software son imágenes de Docker (el equivalente de Kubernet de Google) que contienen todas sus dependencias y archivos que podría necesitar para ejecutar sus cálculos. Si no especifica un entorno de software para el constructor `coiled.Cluster`, usaremos el entorno de software predeterminado de Coiled. Puede obtener más información sobre los entornos de software en nuestro[documentation](https://docs.coiled.io/user_guide/software_environment.html)." 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "## Crear un entorno de software" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "Al crear entornos de software, hay [varios argumentos clave](https://docs.coiled.io/user_guide/api.html#coiled.create_software_environment) que puede utilizar para crear un entorno personalizado para su trabajo." 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "coiled.create_software_environment(\n", 340 | " name=\"quickstart\", \n", 341 | " conda={\n", 342 | " \"channels\": [\"conda-forge\"], \n", 343 | " \"dependencies\": [\"coiled\"]\n", 344 | " }\n", 345 | ")" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "Ahora podemos seguir nuestro flujo de trabajo anterior de creación de un clúster; esta vez, usaremos nuestro entorno de software recién creado, conectar el clúster a Dask y luego ejecutar el mismo ejemplo." 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "cluster = coiled.Cluster(n_workers=10, software=\"quickstart\")\n", 362 | "client = Client(cluster)\n", 363 | "client" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "Si usted va a [Coiled dashboard](https://cloud.coiled.io/), en la columna **Entorno de software**, puede ver que estamos usando el entorno de software de inicio rápido que acabamos de crear. Tenga en cuenta también que esta vez, el clúster tendrá un nombre generado aleatoriamente.\n", 371 | "\n", 372 | "Ahora ejecutemos el mismo cálculo que antes, pero utilizando el clúster que se ejecuta con el entorno de software que hemos creado recientemente." 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "df = dd.read_csv(\n", 382 | " \"s3://nyc-tlc/trip data/yellow_tripdata_2019-*.csv\",\n", 383 | " dtype={\n", 384 | " \"payment_type\": \"UInt8\",\n", 385 | " \"VendorID\": \"UInt8\",\n", 386 | " \"passenger_count\": \"UInt8\",\n", 387 | " \"RatecodeID\": \"UInt8\",\n", 388 | " },\n", 389 | " storage_options={\"anon\": True},\n", 390 | " blocksize=\"16 MiB\",\n", 391 | ").persist()\n", 392 | "\n", 393 | "df.groupby(\"passenger_count\").tip_amount.mean().compute()" 394 | ] 395 | } 396 | ], 397 | "metadata": { 398 | "kernelspec": { 399 | "display_name": "Python 3", 400 | "language": "python", 401 | "name": "python3" 402 | }, 403 | "language_info": { 404 | "codemirror_mode": { 405 | "name": "ipython", 406 | "version": 3 407 | }, 408 | "file_extension": ".py", 409 | "mimetype": "text/x-python", 410 | "name": "python", 411 | "nbconvert_exporter": "python", 412 | "pygments_lexer": "ipython3", 413 | "version": "3.8.10" 414 | } 415 | }, 416 | "nbformat": 4, 417 | "nbformat_minor": 4 418 | } 419 | -------------------------------------------------------------------------------- /Python/Cuadernos/Decorators.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "#
Diplomado en Inteligencia Artificial y Aprendizaje Profundo
" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#
Introducción a Decoradores (Decorators) en Python
" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Profesores" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 31 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n", 32 | "3. Campo Elías Pardo Turriago, cepardot@unal.edu.co " 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Asesora Medios y Marketing digital\n", 40 | " " 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "4. Maria del Pilar Montenegro, pmontenegro88@gmail.com " 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Asistentes" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "5. Oleg Jarma, ojarmam@unal.edu.co \n", 62 | "6. Laura Lizarazo, ljlizarazore@unal.edu.co " 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Contenido" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "* [Introducción](#Introducción)\n", 77 | "* [Decoradores](#Decoradores)\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "## Introducción " 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Los decorators constituyen un patrón de programación que se utiliza cuando es necesario incluir un comportamiento adcional a objetos específicos.\n", 92 | "\n", 93 | "Una forma de agregar tal comportamiento adicional es decorar los objetos creados con tipos que aportan la funcionalidad extra.\n", 94 | "\n", 95 | "Estos decoradores envuelven el objeto original pero presentan exactamente la misma interfaz para\n", 96 | "el usuario de ese objeto. \n", 97 | "\n", 98 | "Por lo tanto, el patrón de diseño del decorador extiende el comportamiento\n", 99 | "de un objeto sin utilizar subclassing. \n", 100 | "\n", 101 | "Esta decoración de un objeto es transparente a los clientes de los decoradores.\n", 102 | "\n", 103 | "En Python, los decoradores son funciones que toman otra función (u otro objeto invocable\n", 104 | "como un método) y devuelve una tercera función que representa el \n", 105 | "comportamiento decorado." 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## Decoradores " 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "### Definición de un decorador" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "Para definir un decorador, debe definir un objeto invocable, como una función que\n", 127 | "toma otra función como parámetro y devuelve una nueva función.\n", 128 | "\n", 129 | "A continuación se da un ejemplo de la definición de una función decoradora de registro (logger) muy simple\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 5, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "def logger(func):\n", 139 | " def inner():\n", 140 | " print('llamando ', func.__name__)\n", 141 | " func()\n", 142 | " print('llamada', func.__name__)\n", 143 | " \n", 144 | " return inner" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "Observe que la función *logger* retorna una función, *inner*, la cual a su vez llamará a una tercera función *func*." 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### Usando el decorador" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "Veamos ahora el efecto del decorador en acción. Usaremos la función *target* como la función que vamos a decorar." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 7, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "def target(): \n", 175 | " print('Dentro de la función target')" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 8, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "llamando target\n", 188 | "Dentro de la función target\n", 189 | "llamada target\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "t1 = logger(target)\n", 195 | "\n", 196 | "t1()" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### Suavizando el trabajo" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "Python proporciona algo de azúcar sintáctico que permite decorar directamente la función desde su definición. Este es el uso más practico d elos decoradores." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 4, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "calling target\n", 223 | "Dentro de la función target\n", 224 | "called target\n" 225 | ] 226 | } 227 | ], 228 | "source": [ 229 | "@logger\n", 230 | "def target():\n", 231 | " print('Dentro de la función target')\n", 232 | "\n", 233 | "target()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### Funciones con parámetros" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "En este caso la función decoradora debe incluir los parámetros. Veamos el ejemplo." 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 10, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "def logger(func):\n", 257 | " def inner(x, y):\n", 258 | " print('llamando ', func.__name__, 'con ',x , 'y',y)\n", 259 | " func(x, y)\n", 260 | " print('regresando de ',func.__name__)\n", 261 | " return inner" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 11, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "llamando mi_funcion con 5 y 6\n", 274 | "x + y = 11\n", 275 | "regresando de mi_funcion\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "@logger\n", 281 | "def mi_funcion(x, y):\n", 282 | " print('x + y = ', x+y)\n", 283 | "\n", 284 | "mi_funcion(5,6)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### Decoradores apilados (stacked decorators)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "Es posible apilar decoradores. Veamos el ejemplo. Vamos a imprimir un texto. Los decoradores agregaran negrilla (bold) e itálica (italic) al texto impreso." 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 15, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "name": "stdout", 308 | "output_type": "stream", 309 | "text": [ 310 | "hola mundo\n" 311 | ] 312 | } 313 | ], 314 | "source": [ 315 | "# decoradores\n", 316 | "def make_bold(fn):\n", 317 | " def makebold_wrapper():\n", 318 | " return \"\" + fn() + \"\"\n", 319 | " return makebold_wrapper\n", 320 | "\n", 321 | "def make_italic(fn):\n", 322 | " def makeitalic_wrapper():\n", 323 | " return \"\" + fn() + \"\"\n", 324 | " return makeitalic_wrapper\n", 325 | "\n", 326 | "# aplica los decoradores\n", 327 | "\n", 328 | "@make_bold\n", 329 | "@make_italic\n", 330 | "def hello():\n", 331 | " return 'hola mundo'\n", 332 | "\n", 333 | "print(hello())" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "### Decoradores para métodos de clases" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "En este caso, es importante recordar que los métodos toman el \n", 348 | "parámetro especial *self* como el primer parámetro que se utiliza para hacer referencia al objeto del que\n", 349 | "se está aplicando el método. \n", 350 | "\n", 351 | "Por lo tanto, es necesario que el decorador tome este\n", 352 | "parámetro en cuenta; es decir, la función envuelta interna debe tomar al menos un\n", 353 | "parámetro que representa a *self*. Veámos el ejemplo." 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 17, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "def pretty_print(method):\n", 363 | " def method_wrapper(self):\n", 364 | " return \"

{0}

\".format(method(self))\n", 365 | " return method_wrapper" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 25, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "class Persona:\n", 375 | " def __init__(self, nombre, appellido, edad):\n", 376 | " self.nombre = nombre\n", 377 | " self.apellido = appellido\n", 378 | " self.edad = edad\n", 379 | " \n", 380 | " def print_self(self):\n", 381 | " print('Persona - ', self.nombre, ', ', self.edad)\n", 382 | " \n", 383 | " @pretty_print\n", 384 | " def get_nombre_completo(self):\n", 385 | " return self.nombre + \" \" + self.apellido" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 26, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "name": "stdout", 395 | "output_type": "stream", 396 | "text": [ 397 | "Comenzamos\n", 398 | "Persona - Alvaro , 61\n", 399 | "

Alvaro Montenegro

\n", 400 | "Hecho!\n" 401 | ] 402 | } 403 | ], 404 | "source": [ 405 | "print('Comenzamos')\n", 406 | "p = Persona('Alvaro', 'Montenegro', 61)\n", 407 | "p.print_self()\n", 408 | "print(p.get_nombre_completo())\n", 409 | "print('Hecho!')" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "### Decoradores para métodos de clases con parámetros" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "Aquí convinamos las anteriores dos subsecciones. Veamos." 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 28, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "def trace(method): \n", 433 | " def method_wrapper(self, x, y):\n", 434 | " print('Llamando', method.__name__, 'con', x, y)\n", 435 | " method(self, x, y)\n", 436 | " print('Llamado', method.__name__, 'with', x, y)\n", 437 | " return method_wrapper" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 29, 443 | "metadata": {}, 444 | "outputs": [], 445 | "source": [ 446 | "class Point:\n", 447 | " def __init__(self, x, y):\n", 448 | " self.x = x\n", 449 | " self.y = y\n", 450 | "\n", 451 | " @trace\n", 452 | " def move_to(self, x, y):\n", 453 | " self.x = x\n", 454 | " self.y = y\n", 455 | "\n", 456 | " def __str__(self):\n", 457 | " return 'Point - ' + str(self.x) + ',' + str(self.y)\n" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 31, 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "name": "stdout", 467 | "output_type": "stream", 468 | "text": [ 469 | "Point - 1,1\n", 470 | "Llamando move_to con 5 5\n", 471 | "Llamado move_to with 5 5\n", 472 | "Point - 5,5\n" 473 | ] 474 | } 475 | ], 476 | "source": [ 477 | "p = Point(1, 1)\n", 478 | "print(p)\n", 479 | "p.move_to(5,5)\n", 480 | "print(p)" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "[[Volver al inicio]](#Contenido)" 488 | ] 489 | } 490 | ], 491 | "metadata": { 492 | "kernelspec": { 493 | "display_name": "Python 3", 494 | "language": "python", 495 | "name": "python3" 496 | }, 497 | "language_info": { 498 | "codemirror_mode": { 499 | "name": "ipython", 500 | "version": 3 501 | }, 502 | "file_extension": ".py", 503 | "mimetype": "text/x-python", 504 | "name": "python", 505 | "nbconvert_exporter": "python", 506 | "pygments_lexer": "ipython3", 507 | "version": "3.8.8" 508 | } 509 | }, 510 | "nbformat": 4, 511 | "nbformat_minor": 4 512 | } 513 | -------------------------------------------------------------------------------- /Bases_Datos/Datos/paises.csv: -------------------------------------------------------------------------------- 1 | nombre, name, nom, iso2, iso3, phone_code 2 | "Afganistán","Afghanistan","Afghanistan","AF","AFG","93" 3 | "Albania","Albania","Albanie","AL","ALB","355" 4 | "Alemania","Germany","Allemagne","DE","DEU","49" 5 | "Andorra","Andorra","Andorra","AD","AND","376" 6 | "Angola","Angola","Angola","AO","AGO","244" 7 | "Anguila","Anguilla","Anguilla","AI","AIA","1 264" 8 | "Antártida","Antarctica","L'Antarctique","AQ","ATA","672" 9 | "Antigua y Barbuda","Antigua and Barbuda","Antigua et Barbuda","AG","ATG","1 268" 10 | "Arabia Saudita","Saudi Arabia","Arabie Saoudite","SA","SAU","966" 11 | "Argelia","Algeria","Algérie","DZ","DZA","213" 12 | "Argentina","Argentina","Argentine","AR","ARG","54" 13 | "Armenia","Armenia","L'Arménie","AM","ARM","374" 14 | "Aruba","Aruba","Aruba","AW","ABW","297" 15 | "Australia","Australia","Australie","AU","AUS","61" 16 | "Austria","Austria","Autriche","AT","AUT","43" 17 | "Azerbaiyán","Azerbaijan","L'Azerbaïdjan","AZ","AZE","994" 18 | "Bélgica","Belgium","Belgique","BE","BEL","32" 19 | "Bahamas","Bahamas","Bahamas","BS","BHS","1 242" 20 | "Bahrein","Bahrain","Bahreïn","BH","BHR","973" 21 | "Bangladesh","Bangladesh","Bangladesh","BD","BGD","880" 22 | "Barbados","Barbados","Barbade","BB","BRB","1 246" 23 | "Belice","Belize","Belize","BZ","BLZ","501" 24 | "Benín","Benin","Bénin","BJ","BEN","229" 25 | "Bhután","Bhutan","Le Bhoutan","BT","BTN","975" 26 | "Bielorrusia","Belarus","Biélorussie","BY","BLR","375" 27 | "Birmania","Myanmar","Myanmar","MM","MMR","95" 28 | "Bolivia","Bolivia","Bolivie","BO","BOL","591" 29 | "Bosnia y Herzegovina","Bosnia and Herzegovina","Bosnie-Herzégovine","BA","BIH","387" 30 | "Botsuana","Botswana","Botswana","BW","BWA","267" 31 | "Brasil","Brazil","Brésil","BR","BRA","55" 32 | "Brunéi","Brunei","Brunei","BN","BRN","673" 33 | "Bulgaria","Bulgaria","Bulgarie","BG","BGR","359" 34 | "Burkina Faso","Burkina Faso","Burkina Faso","BF","BFA","226" 35 | "Burundi","Burundi","Burundi","BI","BDI","257" 36 | "Cabo Verde","Cape Verde","Cap-Vert","CV","CPV","238" 37 | "Camboya","Cambodia","Cambodge","KH","KHM","855" 38 | "Camerún","Cameroon","Cameroun","CM","CMR","237" 39 | "Canadá","Canada","Canada","CA","CAN","1" 40 | "Chad","Chad","Tchad","TD","TCD","235" 41 | "Chile","Chile","Chili","CL","CHL","56" 42 | "China","China","Chine","CN","CHN","86" 43 | "Chipre","Cyprus","Chypre","CY","CYP","357" 44 | "Ciudad del Vaticano","Vatican City State","Cité du Vatican","VA","VAT","39" 45 | "Colombia","Colombia","Colombie","CO","COL","57" 46 | "Comoras","Comoros","Comores","KM","COM","269" 47 | "República del Congo","Republic of the Congo","République du Congo","CG","COG","242" 48 | "República Democrática del Congo","Democratic Republic of the Congo","République démocratique du Congo","CD","COD","243" 49 | "Corea del Norte","North Korea","Corée du Nord","KP","PRK","850" 50 | "Corea del Sur","South Korea","Corée du Sud","KR","KOR","82" 51 | "Costa de Marfil","Ivory Coast","Côte-d'Ivoire","CI","CIV","225" 52 | "Costa Rica","Costa Rica","Costa Rica","CR","CRI","506" 53 | "Croacia","Croatia","Croatie","HR","HRV","385" 54 | "Cuba","Cuba","Cuba","CU","CUB","53" 55 | "Curazao","Curaçao","Curaçao","CW","CWU","5999" 56 | "Dinamarca","Denmark","Danemark","DK","DNK","45" 57 | "Dominica","Dominica","Dominique","DM","DMA","1 767" 58 | "Ecuador","Ecuador","Equateur","EC","ECU","593" 59 | "Egipto","Egypt","Egypte","EG","EGY","20" 60 | "El Salvador","El Salvador","El Salvador","SV","SLV","503" 61 | "Emiratos Árabes Unidos","United Arab Emirates","Emirats Arabes Unis","AE","ARE","971" 62 | "Eritrea","Eritrea","Erythrée","ER","ERI","291" 63 | "Eslovaquia","Slovakia","Slovaquie","SK","SVK","421" 64 | "Eslovenia","Slovenia","Slovénie","SI","SVN","386" 65 | "España","Spain","Espagne","ES","ESP","34" 66 | "Estados Unidos de América","United States of America","États-Unis d'Amérique","US","USA","1" 67 | "Estonia","Estonia","L'Estonie","EE","EST","372" 68 | "Etiopía","Ethiopia","Ethiopie","ET","ETH","251" 69 | "Filipinas","Philippines","Philippines","PH","PHL","63" 70 | "Finlandia","Finland","Finlande","FI","FIN","358" 71 | "Fiyi","Fiji","Fidji","FJ","FJI","679" 72 | "Francia","France","France","FR","FRA","33" 73 | "Gabón","Gabon","Gabon","GA","GAB","241" 74 | "Gambia","Gambia","Gambie","GM","GMB","220" 75 | "Georgia","Georgia","Géorgie","GE","GEO","995" 76 | "Ghana","Ghana","Ghana","GH","GHA","233" 77 | "Gibraltar","Gibraltar","Gibraltar","GI","GIB","350" 78 | "Granada","Grenada","Grenade","GD","GRD","1 473" 79 | "Grecia","Greece","Grèce","GR","GRC","30" 80 | "Groenlandia","Greenland","Groenland","GL","GRL","299" 81 | "Guadalupe","Guadeloupe","Guadeloupe","GP","GLP","590" 82 | "Guam","Guam","Guam","GU","GUM","1 671" 83 | "Guatemala","Guatemala","Guatemala","GT","GTM","502" 84 | "Guayana Francesa","French Guiana","Guyane française","GF","GUF","594" 85 | "Guernsey","Guernsey","Guernesey","GG","GGY","44" 86 | "Guinea","Guinea","Guinée","GN","GIN","224" 87 | "Guinea Ecuatorial","Equatorial Guinea","Guinée Equatoriale","GQ","GNQ","240" 88 | "Guinea-Bissau","Guinea-Bissau","Guinée-Bissau","GW","GNB","245" 89 | "Guyana","Guyana","Guyane","GY","GUY","592" 90 | "Haití","Haiti","Haïti","HT","HTI","509" 91 | "Honduras","Honduras","Honduras","HN","HND","504" 92 | "Hong kong","Hong Kong","Hong Kong","HK","HKG","852" 93 | "Hungría","Hungary","Hongrie","HU","HUN","36" 94 | "India","India","Inde","IN","IND","91" 95 | "Indonesia","Indonesia","Indonésie","ID","IDN","62" 96 | "Irán","Iran","Iran","IR","IRN","98" 97 | "Irak","Iraq","Irak","IQ","IRQ","964" 98 | "Irlanda","Ireland","Irlande","IE","IRL","353" 99 | "Isla Bouvet","Bouvet Island","Bouvet Island","BV","BVT","" 100 | "Isla de Man","Isle of Man","Ile de Man","IM","IMN","44" 101 | "Isla de Navidad","Christmas Island","Christmas Island","CX","CXR","61" 102 | "Isla Norfolk","Norfolk Island","Île de Norfolk","NF","NFK","672" 103 | "Islandia","Iceland","Islande","IS","ISL","354" 104 | "Islas Bermudas","Bermuda Islands","Bermudes","BM","BMU","1 441" 105 | "Islas Caimán","Cayman Islands","Iles Caïmans","KY","CYM","1 345" 106 | "Islas Cocos (Keeling)","Cocos (Keeling) Islands","Cocos (Keeling","CC","CCK","61" 107 | "Islas Cook","Cook Islands","Iles Cook","CK","COK","682" 108 | "Islas de Åland","Åland Islands","Îles Åland","AX","ALA","358" 109 | "Islas Feroe","Faroe Islands","Iles Féro","FO","FRO","298" 110 | "Islas Georgias del Sur y Sandwich del Sur","South Georgia and the South Sandwich Islands","Géorgie du Sud et les Îles Sandwich du Sud","GS","SGS","500" 111 | "Islas Heard y McDonald","Heard Island and McDonald Islands","Les îles Heard et McDonald","HM","HMD","" 112 | "Islas Maldivas","Maldives","Maldives","MV","MDV","960" 113 | "Islas Malvinas","Falkland Islands (Malvinas)","Iles Falkland (Malvinas","FK","FLK","500" 114 | "Islas Marianas del Norte","Northern Mariana Islands","Iles Mariannes du Nord","MP","MNP","1 670" 115 | "Islas Marshall","Marshall Islands","Iles Marshall","MH","MHL","692" 116 | "Islas Pitcairn","Pitcairn Islands","Iles Pitcairn","PN","PCN","870" 117 | "Islas Salomón","Solomon Islands","Iles Salomon","SB","SLB","677" 118 | "Islas Turcas y Caicos","Turks and Caicos Islands","Iles Turques et Caïques","TC","TCA","1 649" 119 | "Islas Ultramarinas Menores de Estados Unidos","United States Minor Outlying Islands","États-Unis Îles mineures éloignées","UM","UMI","246" 120 | "Islas Vírgenes Británicas","Virgin Islands","Iles Vierges","VG","VGB","1 284" 121 | "Islas Vírgenes de los Estados Unidos","United States Virgin Islands","Îles Vierges américaines","VI","VIR","1 340" 122 | "Israel","Israel","Israël","IL","ISR","972" 123 | "Italia","Italy","Italie","IT","ITA","39" 124 | "Jamaica","Jamaica","Jamaïque","JM","JAM","1 876" 125 | "Japón","Japan","Japon","JP","JPN","81" 126 | "Jersey","Jersey","Maillot","JE","JEY","44" 127 | "Jordania","Jordan","Jordan","JO","JOR","962" 128 | "Kazajistán","Kazakhstan","Le Kazakhstan","KZ","KAZ","7" 129 | "Kenia","Kenya","Kenya","KE","KEN","254" 130 | "Kirguistán","Kyrgyzstan","Kirghizstan","KG","KGZ","996" 131 | "Kiribati","Kiribati","Kiribati","KI","KIR","686" 132 | "Kuwait","Kuwait","Koweït","KW","KWT","965" 133 | "Líbano","Lebanon","Liban","LB","LBN","961" 134 | "Laos","Laos","Laos","LA","LAO","856" 135 | "Lesoto","Lesotho","Lesotho","LS","LSO","266" 136 | "Letonia","Latvia","La Lettonie","LV","LVA","371" 137 | "Liberia","Liberia","Liberia","LR","LBR","231" 138 | "Libia","Libya","Libye","LY","LBY","218" 139 | "Liechtenstein","Liechtenstein","Liechtenstein","LI","LIE","423" 140 | "Lituania","Lithuania","La Lituanie","LT","LTU","370" 141 | "Luxemburgo","Luxembourg","Luxembourg","LU","LUX","352" 142 | "México","Mexico","Mexique","MX","MEX","52" 143 | "Mónaco","Monaco","Monaco","MC","MCO","377" 144 | "Macao","Macao","Macao","MO","MAC","853" 145 | "Macedônia","Macedonia","Macédoine","MK","MKD","389" 146 | "Madagascar","Madagascar","Madagascar","MG","MDG","261" 147 | "Malasia","Malaysia","Malaisie","MY","MYS","60" 148 | "Malawi","Malawi","Malawi","MW","MWI","265" 149 | "Mali","Mali","Mali","ML","MLI","223" 150 | "Malta","Malta","Malte","MT","MLT","356" 151 | "Marruecos","Morocco","Maroc","MA","MAR","212" 152 | "Martinica","Martinique","Martinique","MQ","MTQ","596" 153 | "Mauricio","Mauritius","Iles Maurice","MU","MUS","230" 154 | "Mauritania","Mauritania","Mauritanie","MR","MRT","222" 155 | "Mayotte","Mayotte","Mayotte","YT","MYT","262" 156 | "Micronesia","Estados Federados de","Federados Estados de","FM","FSM","691" 157 | "Moldavia","Moldova","Moldavie","MD","MDA","373" 158 | "Mongolia","Mongolia","Mongolie","MN","MNG","976" 159 | "Montenegro","Montenegro","Monténégro","ME","MNE","382" 160 | "Montserrat","Montserrat","Montserrat","MS","MSR","1 664" 161 | "Mozambique","Mozambique","Mozambique","MZ","MOZ","258" 162 | "Namibia","Namibia","Namibie","NA","NAM","264" 163 | "Nauru","Nauru","Nauru","NR","NRU","674" 164 | "Nepal","Nepal","Népal","NP","NPL","977" 165 | "Nicaragua","Nicaragua","Nicaragua","NI","NIC","505" 166 | "Niger","Niger","Niger","NE","NER","227" 167 | "Nigeria","Nigeria","Nigeria","NG","NGA","234" 168 | "Niue","Niue","Niou","NU","NIU","683" 169 | "Noruega","Norway","Norvège","NO","NOR","47" 170 | "Nueva Caledonia","New Caledonia","Nouvelle-Calédonie","NC","NCL","687" 171 | "Nueva Zelanda","New Zealand","Nouvelle-Zélande","NZ","NZL","64" 172 | "Omán","Oman","Oman","OM","OMN","968" 173 | "Países Bajos","Netherlands","Pays-Bas","NL","NLD","31" 174 | "Pakistán","Pakistan","Pakistan","PK","PAK","92" 175 | "Palau","Palau","Palau","PW","PLW","680" 176 | "Palestina","Palestine","La Palestine","PS","PSE","970" 177 | "Panamá","Panama","Panama","PA","PAN","507" 178 | "Papúa Nueva Guinea","Papua New Guinea","Papouasie-Nouvelle-Guinée","PG","PNG","675" 179 | "Paraguay","Paraguay","Paraguay","PY","PRY","595" 180 | "Perú","Peru","Pérou","PE","PER","51" 181 | "Polinesia Francesa","French Polynesia","Polynésie française","PF","PYF","689" 182 | "Polonia","Poland","Pologne","PL","POL","48" 183 | "Portugal","Portugal","Portugal","PT","PRT","351" 184 | "Puerto Rico","Puerto Rico","Porto Rico","PR","PRI","1" 185 | "Qatar","Qatar","Qatar","QA","QAT","974" 186 | "Reino Unido","United Kingdom","Royaume-Uni","GB","GBR","44" 187 | "República Centroafricana","Central African Republic","République Centrafricaine","CF","CAF","236" 188 | "República Checa","Czech Republic","République Tchèque","CZ","CZE","420" 189 | "República Dominicana","Dominican Republic","République Dominicaine","DO","DOM","1 809" 190 | "República de Sudán del Sur","South Sudan","Soudan du Sud","SS","SSD","211" 191 | "Reunión","Réunion","Réunion","RE","REU","262" 192 | "Ruanda","Rwanda","Rwanda","RW","RWA","250" 193 | "Rumanía","Romania","Roumanie","RO","ROU","40" 194 | "Rusia","Russia","La Russie","RU","RUS","7" 195 | "Sahara Occidental","Western Sahara","Sahara Occidental","EH","ESH","212" 196 | "Samoa","Samoa","Samoa","WS","WSM","685" 197 | "Samoa Americana","American Samoa","Les Samoa américaines","AS","ASM","1 684" 198 | "San Bartolomé","Saint Barthélemy","Saint-Barthélemy","BL","BLM","590" 199 | "San Cristóbal y Nieves","Saint Kitts and Nevis","Saint Kitts et Nevis","KN","KNA","1 869" 200 | "San Marino","San Marino","San Marino","SM","SMR","378" 201 | "San Martín (Francia)","Saint Martin (French part)","Saint-Martin (partie française)","MF","MAF","1 599" 202 | "San Pedro y Miquelón","Saint Pierre and Miquelon","Saint-Pierre-et-Miquelon","PM","SPM","508" 203 | "San Vicente y las Granadinas","Saint Vincent and the Grenadines","Saint-Vincent et Grenadines","VC","VCT","1 784" 204 | "Santa Elena","Ascensión y Tristán de Acuña","Ascensión y Tristan de Acuña","SH","SHN","290" 205 | "Santa Lucía","Saint Lucia","Sainte-Lucie","LC","LCA","1 758" 206 | "Santo Tomé y Príncipe","Sao Tome and Principe","Sao Tomé et Principe","ST","STP","239" 207 | "Senegal","Senegal","Sénégal","SN","SEN","221" 208 | "Serbia","Serbia","Serbie","RS","SRB","381" 209 | "Seychelles","Seychelles","Les Seychelles","SC","SYC","248" 210 | "Sierra Leona","Sierra Leone","Sierra Leone","SL","SLE","232" 211 | "Singapur","Singapore","Singapour","SG","SGP","65" 212 | "Sint Maarten","Sint Maarten","Saint-Martin","SX","SMX",1 721 213 | "Siria","Syria","Syrie","SY","SYR","963" 214 | "Somalia","Somalia","Somalie","SO","SOM","252" 215 | "Sri lanka","Sri Lanka","Sri Lanka","LK","LKA","94" 216 | "Sudáfrica","South Africa","Afrique du Sud","ZA","ZAF","27" 217 | "Sudán","Sudan","Soudan","SD","SDN","249" 218 | "Suecia","Sweden","Suède","SE","SWE","46" 219 | "Suiza","Switzerland","Suisse","CH","CHE","41" 220 | "Surinám","Suriname","Surinam","SR","SUR","597" 221 | "Svalbard y Jan Mayen","Svalbard and Jan Mayen","Svalbard et Jan Mayen","SJ","SJM","47" 222 | "Swazilandia","Swaziland","Swaziland","SZ","SWZ","268" 223 | "Tayikistán","Tajikistan","Le Tadjikistan","TJ","TJK","992" 224 | "Tailandia","Thailand","Thaïlande","TH","THA","66" 225 | "Taiwán","Taiwan","Taiwan","TW","TWN","886" 226 | "Tanzania","Tanzania","Tanzanie","TZ","TZA","255" 227 | "Territorio Británico del Océano Índico","British Indian Ocean Territory","Territoire britannique de l'océan Indien","IO","IOT","246" 228 | "Territorios Australes y Antárticas Franceses","French Southern Territories","Terres australes françaises","TF","ATF","" 229 | "Timor Oriental","East Timor","Timor-Oriental","TL","TLS","670" 230 | "Togo","Togo","Togo","TG","TGO","228" 231 | "Tokelau","Tokelau","Tokélaou","TK","TKL","690" 232 | "Tonga","Tonga","Tonga","TO","TON","676" 233 | "Trinidad y Tobago","Trinidad and Tobago","Trinidad et Tobago","TT","TTO","1 868" 234 | "Tunez","Tunisia","Tunisie","TN","TUN","216" 235 | "Turkmenistán","Turkmenistan","Le Turkménistan","TM","TKM","993" 236 | "Turquía","Turkey","Turquie","TR","TUR","90" 237 | "Tuvalu","Tuvalu","Tuvalu","TV","TUV","688" 238 | "Ucrania","Ukraine","L'Ukraine","UA","UKR","380" 239 | "Uganda","Uganda","Ouganda","UG","UGA","256" 240 | "Uruguay","Uruguay","Uruguay","UY","URY","598" 241 | "Uzbekistán","Uzbekistan","L'Ouzbékistan","UZ","UZB","998" 242 | "Vanuatu","Vanuatu","Vanuatu","VU","VUT","678" 243 | "Venezuela","Venezuela","Venezuela","VE","VEN","58" 244 | "Vietnam","Vietnam","Vietnam","VN","VNM","84" 245 | "Wallis y Futuna","Wallis and Futuna","Wallis et Futuna","WF","WLF","681" 246 | "Yemen","Yemen","Yémen","YE","YEM","967" 247 | "Yibuti","Djibouti","Djibouti","DJ","DJI","253" 248 | "Zambia","Zambia","Zambie","ZM","ZMB","260" 249 | "Zimbabue","Zimbabwe","Zimbabwe","ZW","ZWE","263" 250 | -------------------------------------------------------------------------------- /Bases_Datos/Notebooks/Comando magico.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "raw", 5 | "id": "parallel-solid", 6 | "metadata": {}, 7 | "source": [ 8 | "(bigdata) alvaro@Lenovo:~$ conda install -c conda-forge ipython-sql\n", 9 | "(bigdata) alvaro@Lenovo:~$ conda install -c anaconda sqlalchemy\n", 10 | "(bigdata) alvaro@Lenovo:~$ conda install -c anaconda pymysql\n", 11 | "(bigdata) alvaro@Lenovo:~$ conda install -c anaconda mysqlclient\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "id": "addressed-dominant", 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/plain": [ 23 | "'1.3.23'" 24 | ] 25 | }, 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "output_type": "execute_result" 29 | } 30 | ], 31 | "source": [ 32 | "import sqlalchemy\n", 33 | "sqlalchemy.__version__ " 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "id": "danish-double", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "from sqlalchemy import create_engine" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "id": "selective-rogers", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "%load_ext sql" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 12, 59 | "id": "celtic-relations", 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "Engine(mysql+pymysql://root:***@localhost:3306/test)" 66 | ] 67 | }, 68 | "execution_count": 12, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "create_engine(\"mysql+pymysql://root:Alvaro1960@localhost:3306/test\")" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 14, 80 | "id": "historic-compound", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "'Connected: root@test'" 87 | ] 88 | }, 89 | "execution_count": 14, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "%sql mysql+pymysql://root:Alvaro1960@localhost:3306/test" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 20, 101 | "id": "sealed-champion", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | " * mysql+pymysql://root:***@localhost:3306/test\n", 109 | "3 rows affected.\n" 110 | ] 111 | }, 112 | { 113 | "data": { 114 | "text/html": [ 115 | "\n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | "
FieldTypeNullKeyDefaultExtra
book_idint(11)YESNone
titletextYESNone
statusint(11)YESNone
" 153 | ], 154 | "text/plain": [ 155 | "[('book_id', 'int(11)', 'YES', '', None, ''),\n", 156 | " ('title', 'text', 'YES', '', None, ''),\n", 157 | " ('status', 'int(11)', 'YES', '', None, '')]" 158 | ] 159 | }, 160 | "execution_count": 20, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "%%sql\n", 167 | "\n", 168 | "describe books;" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "prescription-elimination", 174 | "metadata": {}, 175 | "source": [ 176 | "# Pasando el resultado de un query en una variable" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 21, 182 | "id": "square-worker", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "name": "stdout", 187 | "output_type": "stream", 188 | "text": [ 189 | " * mysql+pymysql://root:***@localhost:3306/test\n", 190 | "3 rows affected.\n" 191 | ] 192 | } 193 | ], 194 | "source": [ 195 | "libros = %sql select * from books" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 22, 201 | "id": "arranged-recognition", 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/html": [ 207 | "\n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | "
book_idtitlestatus
100The Catche in the Rye1
200The Catcher of the Rye1
300My Antonia0
" 233 | ], 234 | "text/plain": [ 235 | "[(100, 'The Catche in the Rye', 1),\n", 236 | " (200, 'The Catcher of the Rye', 1),\n", 237 | " (300, 'My Antonia', 0)]" 238 | ] 239 | }, 240 | "execution_count": 22, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "libros" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 23, 252 | "id": "heavy-rider", 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "sql.run.ResultSet" 259 | ] 260 | }, 261 | "execution_count": 23, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "type(libros)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 24, 273 | "id": "competent-relaxation", 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "+---------+------------------------+--------+\n", 281 | "| book_id | title | status |\n", 282 | "+---------+------------------------+--------+\n", 283 | "| 100 | The Catche in the Rye | 1 |\n", 284 | "| 200 | The Catcher of the Rye | 1 |\n", 285 | "| 300 | My Antonia | 0 |\n", 286 | "+---------+------------------------+--------+\n" 287 | ] 288 | } 289 | ], 290 | "source": [ 291 | "print(libros)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 26, 297 | "id": "advance-brush", 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "(100, 'The Catche in the Rye', 1)" 304 | ] 305 | }, 306 | "execution_count": 26, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "libros[0]" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 27, 318 | "id": "optimum-ordinary", 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "data": { 323 | "text/plain": [ 324 | "sqlalchemy.engine.result.RowProxy" 325 | ] 326 | }, 327 | "execution_count": 27, 328 | "metadata": {}, 329 | "output_type": "execute_result" 330 | } 331 | ], 332 | "source": [ 333 | "type(libros[0])" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 30, 339 | "id": "understood-booking", 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "z = []\n", 344 | "z.append(libros[0])" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 31, 350 | "id": "peaceful-monitoring", 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "[(100, 'The Catche in the Rye', 1)]" 357 | ] 358 | }, 359 | "execution_count": 31, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "z" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 32, 371 | "id": "romance-motel", 372 | "metadata": {}, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "(100, 'The Catche in the Rye', 1)" 378 | ] 379 | }, 380 | "execution_count": 32, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "tuple(z[0])" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "id": "understanding-schema", 392 | "metadata": {}, 393 | "source": [ 394 | "## Pandas" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 33, 400 | "id": "intensive-brother", 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "df = libros.DataFrame()" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 34, 410 | "id": "controlling-values", 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/html": [ 416 | "
\n", 417 | "\n", 430 | "\n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | "
book_idtitlestatus
0100The Catche in the Rye1
1200The Catcher of the Rye1
2300My Antonia0
\n", 460 | "
" 461 | ], 462 | "text/plain": [ 463 | " book_id title status\n", 464 | "0 100 The Catche in the Rye 1\n", 465 | "1 200 The Catcher of the Rye 1\n", 466 | "2 300 My Antonia 0" 467 | ] 468 | }, 469 | "execution_count": 34, 470 | "metadata": {}, 471 | "output_type": "execute_result" 472 | } 473 | ], 474 | "source": [ 475 | "df" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 35, 481 | "id": "sought-heater", 482 | "metadata": {}, 483 | "outputs": [ 484 | { 485 | "data": { 486 | "text/plain": [ 487 | "pandas.core.frame.DataFrame" 488 | ] 489 | }, 490 | "execution_count": 35, 491 | "metadata": {}, 492 | "output_type": "execute_result" 493 | } 494 | ], 495 | "source": [ 496 | "type(df)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 1, 502 | "id": "worldwide-sampling", 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "ename": "NameError", 507 | "evalue": "name 'df' is not defined", 508 | "output_type": "error", 509 | "traceback": [ 510 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 511 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 512 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdescribe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 513 | "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" 514 | ] 515 | } 516 | ], 517 | "source": [ 518 | "df.describe()" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "id": "composite-elephant", 525 | "metadata": {}, 526 | "outputs": [], 527 | "source": [] 528 | } 529 | ], 530 | "metadata": { 531 | "kernelspec": { 532 | "display_name": "Python 3", 533 | "language": "python", 534 | "name": "python3" 535 | }, 536 | "language_info": { 537 | "codemirror_mode": { 538 | "name": "ipython", 539 | "version": 3 540 | }, 541 | "file_extension": ".py", 542 | "mimetype": "text/x-python", 543 | "name": "python", 544 | "nbconvert_exporter": "python", 545 | "pygments_lexer": "ipython3", 546 | "version": "3.8.8" 547 | } 548 | }, 549 | "nbformat": 4, 550 | "nbformat_minor": 5 551 | } 552 | -------------------------------------------------------------------------------- /Syllabus/Cuadernos/Syllabus_Big Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "
\n", 10 | " \n", 11 | "
\n", 12 | "\n", 13 | "# Aprendizaje Profundo" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "tags": [] 20 | }, 21 | "source": [ 22 | "#
Curso Sistemas de Información- Syllabus
" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "
Big data y bases de datos
" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Profesor" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Diseño gráfico y Marketing digital\n", 51 | " " 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "1. Maria del Pilar Montenegro Reyes, pmontenegro88@gmail.com " 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## Referencias " 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "1. [Alvaro Montenegro, Big Data, 2021](https://github.com/AprendizajeProfundo/BigData/blob/main/README.md)\n", 73 | "1. [Alvaro Montenegro, Daniel Montenegro, Campo Elías Pardo, Inteligencia Artificial y Aprendizaje Profundo, 2021](https://github.com/AprendizajeProfundo/Diplomado)\n", 74 | "1. [Alvaro Montenegro, Daniel Montenegro, Ciencia de Datos, 2021](https://github.com/AprendizajeProfundo/Ciencia-de-Datos)\n", 75 | "1. [Klassen, Mikhail Russell, Matthew A, Mining the Social Web, O'Reilly Media, Inc (2019)](http://library.lol/main/D1A74C6167E1AE37085CE0AABD71939F)\n", 76 | "1. [Kumar, Anant Singh, Ajit , Graph Database Modeling with neo4, 2020](http://library.lol/main/9703B3108E0745E342CC999B61A19DB0) \n", 77 | "1. [Big Data Specialization, U. California, 2018](https://www.coursera.org/specializations/big-data)\n", 78 | "1. [John Hunt, A Beginners Guide to Python 3 Programming, 2019](http://libgen.rs/search.php?req=A+Beginners+Guide+to+Python+3+Programming+hunt&open=0&res=25&view=simple&phrase=1&column=def)\n", 79 | "1. [John Hunt,Advanced Guide to Python 3 Programming, 2019](http://libgen.rs/search.php?req=Advanced+Guide+to+Python+3+Programming+hunt&open=0&res=25&view=simple&phrase=1&column=def)\n", 80 | "1. [Peter Ghavami, Big Data Analytics Methods, second edition, 2020](http://libgen.rs/search.php?req=big+data+analytics+methods&open=0&res=25&view=simple&phrase=1&column=def)\n", 81 | "1. [Sudeep Tanwar, Sudhanshu Tyagi, Neeraj Kumar Editors, Multimedia Big Data Computing for IoT Applications Concepts, Paradigms and Solutions, 2021](http://libgen.rs/search.php?req=+Multimedia+Big+Data+Computing&open=0&res=25&view=simple&phrase=1&column=def)\n", 82 | "1. [Amit Phaltankar, Juned Ahsan, Michael Harrison, and Liviu Nedov, MongoDB Fundamentals, 2020](http://libgen.rs/search.php?req=MongoDB+Fundamentals-Packt+Publishing+&open=0&res=25&view=simple&phrase=1&column=def)\n", 83 | "1. [Salahaldin Juba; Andrey Volkov, Learning PostgreSQL 11: A beginner’s guide to building high-performance PostgreSQL database solutions, 3rd Edition, 2019](ibgen.rs/search.php?req=PostgreSQL+11_+A+beginner’s+guide+to+building+high-performance+PostgreSQL+&open=0&res=25&view=simple&phrase=1&column=def)\n", 84 | "1. [Upom Malik, Matt Goldwasser, Benjamin Johnston, The Applied SQL Data Analytics Workshop: A Quick, Interactive Approach to Learning Analytics with SQL, 2nd Edition, 2019](http://libgen.rs/search.php?req=Applied+SQL+Data+Analytics+Workshop_+A+Quick%2C+Interactive+Approach+to+Learning+A&open=0&res=25&view=simple&phrase=1&column=def)\n", 85 | "1. [Athul Dev, Spark with Python, 2020](http://libgen.rs/search.php?req=spark+in+action&open=0&res=25&view=simple&phrase=1&column=def)\n", 86 | "1. [Jean-Georges Perrin, Spark in Action, Second Edition 2 ed, 2020](http://libgen.rs/search.php?req=spark+in+action&open=0&res=25&view=simple&phrase=1&column=def)\n", 87 | "1. [Oleg Jarma, Instalación de Anaconda, 2020](https://www.youtube.com/watch?v=yKG-bzLZxyI&t)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Tecnologías del curso " 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "1. Python\n", 102 | "1. Jupyter Lab\n", 103 | "1. Dask, Spark\n", 104 | "1. Postgresql\n", 105 | "1. MariaDB\n", 106 | "1. MongoDB \n", 107 | "1. Neo4j\n", 108 | "1. Github\n", 109 | "1. Colab, Google\n", 110 | "1. WAS, Amazon\n", 111 | "1. Azure, Microsoft\n", 112 | "1. Wordpress\n", 113 | "1. Django" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Fuentes de datos " 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "1. [Best Public Datasets for Machine Learning and Data Science](https://pub.towardsai.net/best-datasets-for-machine-learning-data-science-computer-vision-nlp-ai-c9541058cf4f)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Contenido " 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "- Programación Python\n", 142 | " - [Python, primeros pasos](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/Módulo%202-%20Introducción%20a%20la%20programación/3.%20Programación%20en%20Python/Cuadernos/Intro_Python.ipynb)\n", 143 | " - [Funciones en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Funciones.ipynb)\n", 144 | " - [Colecciones de Objetos en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Colecciones.ipynb)\n", 145 | " - [Módulos en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Paquetes.ipynb)\n", 146 | " - [Concepto de Clase en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/Módulo%202-%20Introducción%20a%20la%20programación/3.%20Programación%20en%20Python/Cuadernos/Intro_Clases_Python.ipynb)\n", 147 | " - [Decoradores en Python](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%202-%20Introducci%C3%B3n%20a%20la%20programaci%C3%B3n/3.%20Programaci%C3%B3n%20en%20Python/Cuadernos/Decorators.ipynb)\n", 148 | " - [Tratamiento de datos con Pandas](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/M%C3%B3dulo%2013-%20Talleres/Cuadernos/Taller_Pandas.ipynb)\n", 149 | " - [Introducción a tensores con Numpy](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Diplomado/blob/master/Temas/Módulo%201-%20Matemáticas%20y%20Estadística/1.%20Matemáticas/Cuadernos/Intro_Tensores_I.ipynb)\n", 150 | " - [Multiprocesos y multihilos](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Python/Cuadernos/05_A_Mulitprocesos_Python_am.ipynb)\n", 151 | "\n", 152 | "\n", 153 | "- Bases de Datos Relacionales\n", 154 | " - [Introducción a bases de datos](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Intro.ipynb)\n", 155 | " - [Modelo entidad-relación](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Modelo_E_R.ipynb)\n", 156 | " - [Implementación de bases de datos]( https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Implementacion.ipynb)\n", 157 | " - [Algebra relacional](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_Rel_Algebra_Relacional.ipynb )\n", 158 | " - [SQL I](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_SQL.ipynb) \n", 159 | " - [SQL II](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_SQL-2.ipynb)\n", 160 | " - [SQL III -Ejemplo Sakila](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Bases_Datos/Notebooks/Bases_Datos_SQL-3.ipynb)\n", 161 | " \n", 162 | " \n", 163 | "- Motor Dask para Big Data en Python\n", 164 | " - [Introducción la procesamiento paralelo con Dask](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/01_dask_delayed_am.ipynb)\n", 165 | " - [Ejecución retrasada (lazzy)](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/01x_lazy_am.ipynb)\n", 166 | " - [Mejores prácticas con ejecución retrasada](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/01_dask_Mejores_Practicas.ipynb)\n", 167 | " - [Colleciones bag](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/02_bag_am.ipynb)\n", 168 | " - [Arrays](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/03_array_am.ipynb)\n", 169 | " - [Dataframes](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/04_dataframe_am.ipynb)\n", 170 | " - [Ejecución distribuida](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/05_distributed_am.ipynb)\n", 171 | " - [Ejecución distribuida avanzada](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/06_distributed_advanced_am.ipynb)\n", 172 | " - [Ejecución con un cluster en la nube - Coiled](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/10_coiled_quickstart.ipynb)\n", 173 | " - [Almacenamiento eficiente de dask-dataframes](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/07_dataframe_storage_am.ipynb)\n", 174 | " - [Aprendizaje de Maquinas paralelo y distribuido](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Dask/Cuadernos/08_machine_learning_am.ipynb) \n", 175 | " - [Almacenamiento hdf5](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Almacenamiento/Cuadernos/hdf5.ipynb)\n", 176 | " \n", 177 | " \n", 178 | "- Spark\n", 179 | " - [Instalacion de Spark: Ubuntu, Jupyterlab](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_install.ipynb)\n", 180 | " - [Introducción a Spark](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_Introduccion.ipynb)\n", 181 | " - [Introducción a Máquinas de Aprendizaje con Spark](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_Machine_Learning.ipynb)\n", 182 | " - [Introducción a árboles de decisión y regresión con Spark](https://nbviewer.jupyter.org/github/AprendizajeProfundo/BigData/blob/main/Spark/Cuadernos/Spark_Machine_Learning-tree.ipynb)\n", 183 | "\n", 184 | "\n", 185 | "- Bases de datos no relacionales\n", 186 | " - Formatos de intercambio de datos: JSON, XML, CSV\n", 187 | " - Bases de datos orientadas a documentos: MongoDBLinkedIn\n", 188 | " - Minado de textos\n", 189 | " - Mindado de correos electrónicos\n", 190 | " - Bases de datos basadas en grafos: Neo4j\n", 191 | "\n", 192 | "\n", 193 | "- Minería de redes sociales\n", 194 | " - Raspado web (web scrapping)\n", 195 | " - Tweeter\n", 196 | " - Facebook\n", 197 | " - Instagram\n", 198 | " - Github\n", 199 | " - LinkedIn\n", 200 | " - Minado de textos\n", 201 | " - Mindado de correos electrónicos\n", 202 | " \n", 203 | " \n", 204 | "- Marcos de trabajo para desarrollo de aplicaciones web (Apps)\n", 205 | " - Wordpress\n", 206 | " - Django\n" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "## Proyectos de curso" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "El curso se organiza en grupo de máximo tres personas. Cada grupo desarrolla un proyectos de minado de una red social, determinada por el profesor.\n", 221 | "\n", 222 | "Cada grupo debe crear una cuenta de desarrollador, entender el uso de la respectiva API y proponer una línea de análisis.\n", 223 | "\n", 224 | "Para el desarrollo del proyecto, diseñará una base de datos, que alimentará con los datos necesarios para los análisis propuestos.\n", 225 | "\n", 226 | "Paralelamente desarrollará una aplicación, en la cual presentaŕa los resultados propuestos.\n", 227 | "\n", 228 | "\n", 229 | "Entregables:\n", 230 | "\n", 231 | "1. Un documento desarrollado en Jupyter book, en donde describe explícitamente el proyecto.\n", 232 | "1. Los cuadernos de Jupyter lab con los códigos desarrollados para el proyecto.\n", 233 | "1. Un video promocional del proyecto, el cual debe ser colgado en al App del proyecto.\n", 234 | "1. La aplicación (App) desarrollada para presentar los resultados previstos del proyecto." 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | " ## Evaluación" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "1. Un proyecto que será desarrollado por máximo tres personas a lo largo del curso. Total 80%.\n", 249 | "2. Seguimiento del proyecto. Asistencias a asesorías extraclase, Total 5%.\n", 250 | "3. Video promocional de cinco minutos sobre el proyecto, Total 5%.\n", 251 | "4. Una exposición final sobre el proyecto. Total 5%.\n", 252 | "5. Exposición de un tema asignado. Total 5%" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [] 266 | } 267 | ], 268 | "metadata": { 269 | "kernelspec": { 270 | "display_name": "Python 3", 271 | "language": "python", 272 | "name": "python3" 273 | }, 274 | "language_info": { 275 | "codemirror_mode": { 276 | "name": "ipython", 277 | "version": 3 278 | }, 279 | "file_extension": ".py", 280 | "mimetype": "text/x-python", 281 | "name": "python", 282 | "nbconvert_exporter": "python", 283 | "pygments_lexer": "ipython3", 284 | "version": "3.8.8" 285 | } 286 | }, 287 | "nbformat": 4, 288 | "nbformat_minor": 4 289 | } 290 | -------------------------------------------------------------------------------- /Dask/images/unmerged_grid_search_graph.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | unmerged 11 | 12 | 13 | data 14 | 15 | Training Data 16 | 17 | 18 | vect1 19 | 20 | CountVectorizer 21 | - ngram_range=(1, 1) 22 | 23 | 24 | data->vect1 25 | 26 | 27 | 28 | 29 | vect2 30 | 31 | CountVectorizer 32 | - ngram_range=(1, 1) 33 | 34 | 35 | data->vect2 36 | 37 | 38 | 39 | 40 | vect3 41 | 42 | CountVectorizer 43 | - ngram_range=(1, 1) 44 | 45 | 46 | data->vect3 47 | 48 | 49 | 50 | 51 | vect4 52 | 53 | CountVectorizer 54 | - ngram_range=(1, 1) 55 | 56 | 57 | data->vect4 58 | 59 | 60 | 61 | 62 | vect5 63 | 64 | CountVectorizer 65 | - ngram_range=(1, 1) 66 | 67 | 68 | data->vect5 69 | 70 | 71 | 72 | 73 | vect6 74 | 75 | CountVectorizer 76 | - ngram_range=(1, 1) 77 | 78 | 79 | data->vect6 80 | 81 | 82 | 83 | 84 | tfidf1 85 | 86 | TfidfTransformer 87 | - norm='l1' 88 | 89 | 90 | vect1->tfidf1 91 | 92 | 93 | 94 | 95 | tfidf2 96 | 97 | TfidfTransformer 98 | - norm='l1' 99 | 100 | 101 | vect2->tfidf2 102 | 103 | 104 | 105 | 106 | tfidf3 107 | 108 | TfidfTransformer 109 | - norm='l1' 110 | 111 | 112 | vect3->tfidf3 113 | 114 | 115 | 116 | 117 | tfidf4 118 | 119 | TfidfTransformer 120 | - norm='l2' 121 | 122 | 123 | vect4->tfidf4 124 | 125 | 126 | 127 | 128 | tfidf5 129 | 130 | TfidfTransformer 131 | - norm='l2' 132 | 133 | 134 | vect5->tfidf5 135 | 136 | 137 | 138 | 139 | tfidf6 140 | 141 | TfidfTransformer 142 | - norm='l2' 143 | 144 | 145 | vect6->tfidf6 146 | 147 | 148 | 149 | 150 | sgd1 151 | 152 | SGDClassifier 153 | - alpha=1e-3 154 | 155 | 156 | tfidf1->sgd1 157 | 158 | 159 | 160 | 161 | sgd2 162 | 163 | SGDClassifier 164 | - alpha=1e-4 165 | 166 | 167 | tfidf2->sgd2 168 | 169 | 170 | 171 | 172 | sgd3 173 | 174 | SGDClassifier 175 | - alpha=1e-5 176 | 177 | 178 | tfidf3->sgd3 179 | 180 | 181 | 182 | 183 | sgd4 184 | 185 | SGDClassifier 186 | - alpha=1e-3 187 | 188 | 189 | tfidf4->sgd4 190 | 191 | 192 | 193 | 194 | sgd5 195 | 196 | SGDClassifier 197 | - alpha=1e-4 198 | 199 | 200 | tfidf5->sgd5 201 | 202 | 203 | 204 | 205 | sgd6 206 | 207 | SGDClassifier 208 | - alpha=1e-5 209 | 210 | 211 | tfidf6->sgd6 212 | 213 | 214 | 215 | 216 | best 217 | 218 | Choose Best Parameters 219 | 220 | 221 | sgd1->best 222 | 223 | 224 | 225 | 226 | sgd2->best 227 | 228 | 229 | 230 | 231 | sgd3->best 232 | 233 | 234 | 235 | 236 | sgd4->best 237 | 238 | 239 | 240 | 241 | sgd5->best 242 | 243 | 244 | 245 | 246 | sgd6->best 247 | 248 | 249 | 250 | 251 | 252 | -------------------------------------------------------------------------------- /Dask/Cuadernos/01_dask_Mejores_Practicas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "deadly-paper", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "#
Diplomado en Big Data
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "necessary-female", 16 | "metadata": {}, 17 | "source": [ 18 | "#
Procesamiento Paralelo. Mejores prácticas
" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "athletic-hawaiian", 24 | "metadata": {}, 25 | "source": [ 26 | "\"Dask\n", 30 | "\n", 31 | "# Mejores prácticas con `dask.delayed`" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "sufficient-universal", 37 | "metadata": {}, 38 | "source": [ 39 | "## Profesores" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "utility-cheat", 45 | "metadata": {}, 46 | "source": [ 47 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 48 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com " 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "liquid-practitioner", 54 | "metadata": {}, 55 | "source": [ 56 | "## Asesora Medios y Marketing digital\n", 57 | " " 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "id": "shaped-taylor", 63 | "metadata": {}, 64 | "source": [ 65 | "4. Maria del Pilar Montenegro, pmontenegro88@gmail.com " 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "id": "pressed-maple", 71 | "metadata": {}, 72 | "source": [ 73 | "## Contenido" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "id": "lesbian-spine", 79 | "metadata": {}, 80 | "source": [ 81 | "* [Introducción](#Introducción)\n", 82 | "* [Bases](Bases)\n", 83 | "* [Paralelización usando delayed](#Paralelización-usando-delayed)\n", 84 | "* [Ejemplo con Pandas groupby](#Ejemplo-con-Pandas-groupby)\n", 85 | "* [Usando decoradores directamente](#Usando-decoradores-directamente)\n", 86 | "* [Cerrar el cliente](#Cerrar-el-cliente)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "id": "declared-begin", 92 | "metadata": {}, 93 | "source": [ 94 | "## Fuente" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "unique-kazakhstan", 100 | "metadata": {}, 101 | "source": [ 102 | "Esta es una traducción libre del tutorial disponible en [dask-tutorial](https://github.com/dask/dask-tutorial)." 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "id": "vital-cardiff", 108 | "metadata": {}, 109 | "source": [ 110 | "## Introducción" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "id": "perfect-deadline", 116 | "metadata": {}, 117 | "source": [ 118 | "Es fácil comenzar con Dask retrasado, pero usarlo bien requiere algo de experiencia. Esta lección contiene sugerencias de mejores prácticas e incluye soluciones a problemas comunes." 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "id": "apart-europe", 124 | "metadata": {}, 125 | "source": [ 126 | "## Llamada retrasada a la función, no al resultado" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "id": "earlier-rebel", 132 | "metadata": {}, 133 | "source": [ 134 | " \n", 135 | " ```python\n", 136 | " # This executes immediately\n", 137 | " dask.delayed(f(x, y))\n", 138 | " ```\n", 139 | " Correcto\n", 140 | " \n", 141 | " ```python\n", 142 | " # This executes delayed\n", 143 | " dask.delayed(f)(x, y)\n", 144 | " ```\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "id": "familiar-sapphire", 150 | "metadata": {}, 151 | "source": [ 152 | "## Programe muchos cálculos a la vez" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "id": "usual-relevance", 158 | "metadata": {}, 159 | "source": [ 160 | " \n", 161 | " ```python\n", 162 | " # Avoid calling compute repeatedly\n", 163 | " results = []\n", 164 | " for x in L:\n", 165 | " y = dask.delayed(f)(x)\n", 166 | " results.append(y.compute())\n", 167 | " results\n", 168 | " ```\n", 169 | " Correcto\n", 170 | " \n", 171 | " ```# Collect many calls for one compute\n", 172 | " results = []\n", 173 | " for x in L:\n", 174 | " y = dask.delayed(f)(x)\n", 175 | " results.append(y)\n", 176 | " results = dask.compute(*results)\n", 177 | " ```\n" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "id": "thirty-christianity", 183 | "metadata": {}, 184 | "source": [ 185 | "## No modifique las entradas" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "id": "architectural-buffalo", 191 | "metadata": {}, 192 | "source": [ 193 | " \n", 194 | " ```python\n", 195 | " # Mutate inputs in functions\n", 196 | " @dask.delayed\n", 197 | " def f(x):\n", 198 | " x += 1\n", 199 | " return x\n", 200 | " ```\n", 201 | " Correcto\n", 202 | " \n", 203 | " ```# Return new values or copies\n", 204 | " @dask.delayed\n", 205 | " def f(x):\n", 206 | " x = x + 1\n", 207 | " return x\n", 208 | " ```\n", 209 | "Correcto\n", 210 | "\n", 211 | " ```# \n", 212 | " @dask.delayed\n", 213 | " def f(x):\n", 214 | " x = copy(x)\n", 215 | " x += 1\n", 216 | " return x\n", 217 | " ```" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "id": "union-pregnancy", 223 | "metadata": {}, 224 | "source": [ 225 | "## Programe muchos cálculos a la vez" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "catholic-parent", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | " # Incorrecto\n", 236 | "# Avoid calling compute repeatedly\n", 237 | "results = []\n", 238 | "for x in L:\n", 239 | " y = dask.delayed(f)(x)\n", 240 | " results.append(y.compute())\n", 241 | " results\n", 242 | " \n", 243 | "# Correcto\n", 244 | " \n", 245 | "# Collect many calls for one compute\n", 246 | "results = []\n", 247 | "for x in L:\n", 248 | " y = dask.delayed(f)(x)\n", 249 | " results.append(y)\n", 250 | "results = dask.compute(*results)\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "id": "veterinary-serial", 256 | "metadata": {}, 257 | "source": [ 258 | "## Evite el estado global" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "id": "pediatric-surprise", 264 | "metadata": {}, 265 | "source": [ 266 | "Idealmente, sus operaciones no deberían depender del estado global. El uso del estado global puede funcionar si solo usa subprocesos, pero cuando se pasa a la computación distribuida o multiprocesamiento, es probable que encuentre errores confusos." 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "id": "particular-calvin", 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "L = []\n", 277 | "\n", 278 | "# This references global variable L\n", 279 | "\n", 280 | "@dask.delayed\n", 281 | "def f(x):\n", 282 | " L.append(x)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "id": "mobile-ready", 288 | "metadata": {}, 289 | "source": [ 290 | "## Evite efectos colaterales" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "id": "minus-agency", 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "# Ensure delayed tasks are computed\n", 301 | "\n", 302 | "x = dask.delayed(f)(1, 2, 3)\n", 303 | "#...\n", 304 | "dask.compute(x, ...)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "id": "corporate-trademark", 310 | "metadata": {}, 311 | "source": [ 312 | "## Divida los cálculos en muchas partes" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "id": "atlantic-initial", 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "# Erróneo\n", 323 | "\n", 324 | "def load(filename):\n", 325 | " ...\n", 326 | "\n", 327 | "\n", 328 | "def process(filename):\n", 329 | " ...\n", 330 | "\n", 331 | "\n", 332 | "def save(filename):\n", 333 | " ...\n", 334 | "\n", 335 | "@dask.delayed\n", 336 | "def f(filenames):\n", 337 | " results = []\n", 338 | " for filename in filenames:\n", 339 | " data = load(filename)\n", 340 | " data = process(data)\n", 341 | " result = save(data)\n", 342 | "\n", 343 | " return results\n", 344 | "\n", 345 | "dask.compute(f(filenames))" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "id": "competitive-passion", 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [ 355 | "# Correcto\n", 356 | "# Break up into many tasks\n", 357 | "\n", 358 | "@dask.delayed\n", 359 | "def load(filename):\n", 360 | " ...\n", 361 | "\n", 362 | "@dask.delayed\n", 363 | "def process(filename):\n", 364 | " ...\n", 365 | "\n", 366 | "@dask.delayed\n", 367 | "def save(filename):\n", 368 | " ...\n", 369 | "\n", 370 | "\n", 371 | "def f(filenames):\n", 372 | " results = []\n", 373 | " for filename in filenames:\n", 374 | " data = load(filename)\n", 375 | " data = process(data)\n", 376 | " result = save(data)\n", 377 | "\n", 378 | " return results\n", 379 | "\n", 380 | "dask.compute(f(filenames))" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "id": "serious-screen", 386 | "metadata": {}, 387 | "source": [ 388 | "## Evite demasiadas tareas retrasadas" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "id": "trained-hearts", 394 | "metadata": {}, 395 | "source": [ 396 | "Cada tarea retrasada tiene una sobrecarga de unos cientos de microsegundos. Por lo general, esto está bien, pero puede convertirse en un problema si aplica dask.delayed demasiado finamente. En este caso, a menudo es mejor dividir sus muchas tareas en lotes o usar una de las colecciones de Dask para ayudarlo." 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "id": "intended-harvey", 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "# Too many tasks\n", 407 | "\n", 408 | "results = []\n", 409 | "for x in range(10000000):\n", 410 | " y = dask.delayed(f)(x)\n", 411 | " results.append(y)\n", 412 | "\n", 413 | " \n", 414 | "# Use collections\n", 415 | "\n", 416 | "import dask.bag as db\n", 417 | "b = db.from_sequence(range(10000000), npartitions=1000)\n", 418 | "b = b.map(f)\n", 419 | "..." 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "id": "secure-islam", 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "# Alternativamente, sin usar bag collection\n", 430 | "\n", 431 | "def batch(seq):\n", 432 | " sub_results = []\n", 433 | " for x in seq:\n", 434 | " sub_results.append(f(x))\n", 435 | " return sub_results\n", 436 | "\n", 437 | " batches = []\n", 438 | " for i in range(0, 10000000, 10000):\n", 439 | " result_batch = dask.delayed(batch)(range(i, i + 10000))\n", 440 | " batches.append(result_batch)" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "id": "retired-antigua", 446 | "metadata": {}, 447 | "source": [ 448 | "## Evite llamar delayed dentro de funciones retrasadas" 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "id": "confused-brooks", 454 | "metadata": {}, 455 | "source": [ 456 | "A menudo, si es nuevo en el uso de Dask retrasado, realiza llamadas `dask.delayed` en todas partes y espera lo mejor. Si bien esto puede funcionar, generalmente es lento y da como resultado soluciones difíciles de entender.\n", 457 | "\n", 458 | "Por lo general, nunca llame a `dask.delayed` dentro de las funciones `dask.delayed`." 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "id": "organic-assets", 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "# Evitar: Delayed function calls delayed\n", 469 | "\n", 470 | "@dask.delayed\n", 471 | "def process_all(L):\n", 472 | " result = []\n", 473 | " for x in L:\n", 474 | " y = dask.delayed(f)(x)\n", 475 | " result.append(y)\n", 476 | " return result\n", 477 | "\n", 478 | "\n", 479 | "\n", 480 | "# Normal function calls delayed\n", 481 | "\n", 482 | "def process_all(L):\n", 483 | " result = []\n", 484 | " for x in L:\n", 485 | " y = dask.delayed(f)(x)\n", 486 | " result.append(y)\n", 487 | " return result" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "id": "documented-scholarship", 493 | "metadata": {}, 494 | "source": [ 495 | "## No llame a dask.delayed en otras colecciones de Dask" 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "id": "wireless-starter", 501 | "metadata": {}, 502 | "source": [ 503 | "Cuando coloca una matriz Dask o Dask DataFrame en una llamada retrasada, esa función recibirá el equivalente de NumPy o Pandas. \n", 504 | "\n", 505 | "Tenga en cuenta que si su matriz es grande, esto podría bloquear a sus trabajadores.\n", 506 | "\n", 507 | "En cambio, es más común usar métodos como `da.map_blocks`." 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "id": "scientific-comfort", 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "# Incorrecto: Call delayed functions on Dask collections\n", 518 | "\n", 519 | "import dask.dataframe as dd\n", 520 | "df = dd.read_csv('/path/to/*.csv')\n", 521 | "\n", 522 | "dask.delayed(train)(df)\n", 523 | "\n", 524 | "#Correcto: Use mapping methods if applicable\n", 525 | "\n", 526 | "import dask.dataframe as dd\n", 527 | "df = dd.read_csv('/path/to/*.csv')\n", 528 | "\n", 529 | "df.map_partitions(train)\n", 530 | "\n", 531 | "# O alternativamente,if the procedure doesn’t fit into a mapping, \n", 532 | "# you can always turn your arrays or dataframes into many delayed objects, for example\n", 533 | "\n", 534 | "import dask.dataframe as dd\n", 535 | "df = dd.read_csv('/path/to/*.csv')\n", 536 | " \n", 537 | "partitions = df.to_delayed()\n", 538 | "delayed_values = [dask.delayed(train)(part)\n", 539 | " for part in partitions]" 540 | ] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "id": "usual-riverside", 545 | "metadata": {}, 546 | "source": [ 547 | "## Evite poner repetidamente grandes entradas en llamadas retrasadas" 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "id": "regulation-pennsylvania", 553 | "metadata": {}, 554 | "source": [ 555 | "Cada vez que pase un resultado concreto (cualquier cosa que no se retrase), Dask lo codificará de forma predeterminada para darle un nombre. Esto es bastante rápido (alrededor de 500 MB / s) pero puede ser lento si lo hace una y otra vez. En cambio, también es mejor retrasar sus datos.\n", 556 | "\n", 557 | "Esto es especialmente importante cuando se usa un clúster distribuido para evitar enviar sus datos por separado para cada llamada de función." 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "id": "automatic-bobby", 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [ 567 | "# No haga esto\n", 568 | "\n", 569 | "x = np.array(...) # some large array\n", 570 | "\n", 571 | "results = [dask.delayed(train)(x, i)\n", 572 | " for i in range(1000)]\n", 573 | "\n", 574 | "# En su lugar haga esto\n", 575 | "\n", 576 | "x = np.array(...) # some large array\n", 577 | "x = dask.delayed(x) # delay the data once\n", 578 | "\n", 579 | "results = [dask.delayed(train)(x, i)\n", 580 | " for i in range(1000)]" 581 | ] 582 | } 583 | ], 584 | "metadata": { 585 | "kernelspec": { 586 | "display_name": "Python 3", 587 | "language": "python", 588 | "name": "python3" 589 | }, 590 | "language_info": { 591 | "codemirror_mode": { 592 | "name": "ipython", 593 | "version": 3 594 | }, 595 | "file_extension": ".py", 596 | "mimetype": "text/x-python", 597 | "name": "python", 598 | "nbconvert_exporter": "python", 599 | "pygments_lexer": "ipython3", 600 | "version": "3.8.8" 601 | } 602 | }, 603 | "nbformat": 4, 604 | "nbformat_minor": 5 605 | } 606 | --------------------------------------------------------------------------------