├── images ├── ml_map.png ├── anaconda.png ├── cacheme.png ├── editors.png ├── ecosystem.png ├── python_org.png ├── python_xkcd.png ├── tshirt_quote.jpg ├── aeropython_logo.png ├── ecif_book_cover.jpg ├── logo_python_letras.png ├── libraries-vanderplas.png ├── DataScientist-Continuum.png └── jakevdp_python_data_science_handbook.jpg ├── style ├── FluxBold.ttf └── style.css ├── environment.yml ├── scripts └── mi_primer_script.py ├── utils └── empty_nb.py ├── LICENSE ├── .gitignore ├── README.md ├── notebooks_vacios ├── 0000_Plantilla.ipynb ├── 0103-Pandas-ejercicio.ipynb ├── 0102_Pandas-Operaciones-con-DataFrames.ipynb ├── 0001_Introduccion.ipynb ├── 0201-ScikitLearn-Intro-Clasificacion.ipynb └── 0101_Pandas-Carga-datos-y-manipulacion.ipynb ├── templates └── 0000_Plantilla.ipynb └── notebooks ├── 0301_Como_seguir_con_Python.ipynb ├── zz-AEMET-get_data.ipynb └── 0001_Introduccion.ipynb /images/ml_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/ml_map.png -------------------------------------------------------------------------------- /images/anaconda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/anaconda.png -------------------------------------------------------------------------------- /images/cacheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/cacheme.png -------------------------------------------------------------------------------- /images/editors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/editors.png -------------------------------------------------------------------------------- /style/FluxBold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/style/FluxBold.ttf -------------------------------------------------------------------------------- /images/ecosystem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/ecosystem.png -------------------------------------------------------------------------------- /images/python_org.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/python_org.png -------------------------------------------------------------------------------- /images/python_xkcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/python_xkcd.png -------------------------------------------------------------------------------- /images/tshirt_quote.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/tshirt_quote.jpg -------------------------------------------------------------------------------- /images/aeropython_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/aeropython_logo.png -------------------------------------------------------------------------------- /images/ecif_book_cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/ecif_book_cover.jpg -------------------------------------------------------------------------------- /images/logo_python_letras.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/logo_python_letras.png -------------------------------------------------------------------------------- /images/libraries-vanderplas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/libraries-vanderplas.png -------------------------------------------------------------------------------- /images/DataScientist-Continuum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/DataScientist-Continuum.png -------------------------------------------------------------------------------- /images/jakevdp_python_data_science_handbook.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAChemE/python-analisis-modelado-datos/HEAD/images/jakevdp_python_data_science_handbook.jpg -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: cacheme-datos 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.6 7 | - jupyter 8 | - pandas=0.22 9 | - scikit-learn=0.19 10 | - matplotlib=2 11 | - numpy 12 | - scipy 13 | - bokeh=0.12 14 | 15 | -------------------------------------------------------------------------------- /scripts/mi_primer_script.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | print("Hola gente del curso de Python para análisis y modela de datos") 4 | print("¿Cuántos sois hoy en clase?") 5 | 6 | number = input() 7 | number = int(number) 8 | root = math.sqrt(number) 9 | 10 | print("Ufff! eso es un montón! espero que aprendáis mucho") 11 | print("Por cierto, la raiz de %i es %f" %(number, root)) 12 | -------------------------------------------------------------------------------- /utils/empty_nb.py: -------------------------------------------------------------------------------- 1 | import nbformat 2 | 3 | 4 | def empty_notebook(fname): 5 | with open(fname, 'r', encoding='utf-8') as fp: 6 | nb = nbformat.read(fp, as_version=4) 7 | 8 | for cell in nb.cells: 9 | if cell['cell_type'] == 'code': 10 | source = cell['source'].lower() 11 | if ('# preserve' in source) or ('#preserve' in source): 12 | continue 13 | else: 14 | # Don't preserve cell 15 | cell['outputs'].clear() 16 | cell['execution_count'] = None 17 | cell['source'] = '\n'.join([l for l in source.splitlines() if l.startswith('#')]) 18 | 19 | return nb 20 | 21 | 22 | if __name__ == '__main__': 23 | import glob 24 | import os.path 25 | 26 | for fname in glob.glob("notebooks/*.ipynb"): 27 | new_fname = os.path.join("notebooks_vacios", os.path.basename(fname)) 28 | with open(new_fname, 'w', encoding='utf-8') as fp: 29 | nbformat.write(empty_notebook(fname), fp) 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Computer Aided Chemical Engineering 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # PyCharm 104 | .idea/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Curso de introducción al análisis y modelado de datos con Python 2 | 3 | ## pandas & scikit-learn 4 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/CAChemE/python-analisis-modelado-datos/master) 5 | or [Google CoLab](https://github.com/CAChemE/python-analisis-modelado-datos/wiki/Google-Colaboratory) 6 | 7 | La mejor forma de aprender a programar es haciendo algo útil, por lo que esta introducción a Python se centrará alrededor de una tarea común: solucionar problemas de análisis y modelado de datos comunes en el entorno académico e industrial. Python dispone de dos librerías principales para ello, pandas y scikit-learn. La primera, pandas, permite el manejo eficiente y sistemático de datos con formato de tabla. La segunda, scikit-learn, reúne los principales algoritmos de Machine Learning (aprendizaje automático) bajo una misma interfaz. Ambas librerías disponen de licencia totalmente libre y gratuita, incluso para empresas. No sorprende, por tanto, que empresas del calibre de Google o Microsoft no solo usen si no que financien dichos proyectos. 8 | 9 | [https://cacheme.org/curso-introduccion-datos-python/](https://cacheme.org/curso-introduccion-datos-python/) 10 | 11 | ## Organización: 12 | 13 | - Jueves 22 y viernes 23 de febrero de 2018 14 | - De 15:30 a 18:30 h 15 | - en el edificio Politécnica (Laboratorio L24, [ver mapa](https://www.sigua.ua.es/index.html?id=0016P2002)) 16 | 17 | ## Contenido: 18 | 19 | 1. Lectura y escritura desde distintos formatos con pandas 20 | 2. Manejo de DataFrames: acceso a datos, filtrado y operaciones 21 | 3. Visualizción: líneas, scatter, histogramas, boxplots... 22 | 4. Operaciones de agrupación, reducción y pivotado 23 | 5. Introducción al Aprendizaje Automático: tipos de problemas 24 | 6. Funcionamiento básico de scikit-learn 25 | 7. Clasificación, reducción de dimensionalidad y clustering 26 | 27 | ## Instructores: 28 | 29 | * [Alejandro Sáez Mollejo](https://www.linkedin.com/in/alejandrosaezm/) saezm.alex@gmail.com 30 | * [Mabel Delgado Babiano](https://www.linkedin.com/in/mabeldelgadob/): mabeldb@gmail.com 31 | -------------------------------------------------------------------------------- /notebooks_vacios/0000_Plantilla.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Título del notebook" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "_Descripción_\n", 32 | "\n", 33 | "---" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Sección 1" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "explicación " 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## sección 1.1" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### sección 1.1.1" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "##### sección 1.1.1.1" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Sección 2" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "###### Autores" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "---\n", 90 | "_Las siguientes celdas contienen configuración del Notebook_\n", 91 | "\n", 92 | "_Para visualizar y utlizar los enlaces a Twitter el notebook debe ejecutarse como [seguro](http://ipython.org/ipython-doc/dev/notebook/security.html)_\n", 93 | "\n", 94 | " File > Trusted Notebook" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# esta celda da el estilo al notebook" 104 | ] 105 | } 106 | ], 107 | "metadata": { 108 | "anaconda-cloud": {}, 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.6.4" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 1 129 | } 130 | -------------------------------------------------------------------------------- /style/style.css: -------------------------------------------------------------------------------- 1 | /* This template is inspired in the one used by Lorena Barba 2 | in the numerical-mooc repository: https://github.com/numerical-mooc/numerical-mooc 3 | We thank her work and hope you also enjoy the look of the notobooks with this style */ 4 | 5 | 6 | 7 | 145 | 161 | -------------------------------------------------------------------------------- /notebooks_vacios/0103-Pandas-ejercicio.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Ejercicios de análisis de datos con pandas\n", 18 | "\n", 19 | "*Fuente: https://github.com/PyDataMadrid2016/Conference-Info/tree/master/workshops_materials/20160408_1100_Pandas_for_beginners/tutorial por Kiko Correoso, licencia MIT*\n", 20 | "\n", 21 | "En la carpeta de datos tenemos un fichero que se llama *model.txt* que contiene datos de medidas de viento: velocidad, orientación, temperatura..." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Ejercicios\n", 57 | "\n", 58 | "Sobre el conjunto de datos `model`:\n", 59 | "\n", 60 | "> 1. Representar la matriz `scatter` de la velocidad y orientación del viento de los primeros mil registros.\n", 61 | "2. Misma matriz scatter para los 1000 registros con mayor velocidad, ordenados.\n", 62 | "3. Histograma de la velocidad del viento con 36 particiones.\n", 63 | "4. Histórico de la velocidad media, con los datos agrupados por años y meses.\n", 64 | "5. Tabla de velocidades medias en función del año (filas) y del mes (columnas).\n", 65 | "6. Gráfica con los históricos de cada año, agrupados por meses, superpuestos." 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Representamos la matriz _scatter_ de la velocidad y orientación del viento de los primeros mil registros:" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Misma matriz _scatter_ para los 1000 registros con mayor velocidad:" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Histórico de la velocidad media:" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "Media móvil de los datos agrupados por mes y año:" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [] 172 | } 173 | ], 174 | "metadata": { 175 | "anaconda-cloud": {}, 176 | "kernelspec": { 177 | "display_name": "Python 3", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.6.4" 192 | }, 193 | "latex_metadata": { 194 | "author": "Synergic Partners", 195 | "title": "Conceptos Básicos" 196 | }, 197 | "name": "_merged" 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 1 201 | } 202 | -------------------------------------------------------------------------------- /templates/0000_Plantilla.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Título del notebook" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "_Descripción_\n", 32 | "\n", 33 | "---" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Sección 1" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "explicación " 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## sección 1.1" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### sección 1.1.1" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "##### sección 1.1.1.1" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Sección 2" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "###### Autores" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "---\n", 90 | "_Las siguientes celdas contienen configuración del Notebook_\n", 91 | "\n", 92 | "_Para visualizar y utlizar los enlaces a Twitter el notebook debe ejecutarse como [seguro](http://ipython.org/ipython-doc/dev/notebook/security.html)_\n", 93 | "\n", 94 | " File > Trusted Notebook" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/html": [ 105 | "/* This template is inspired in the one used by Lorena Barba\n", 106 | "in the numerical-mooc repository: https://github.com/numerical-mooc/numerical-mooc\n", 107 | "We thank her work and hope you also enjoy the look of the notobooks with this style */\n", 108 | "\n", 109 | "\n", 110 | "\n", 111 | "\n", 249 | "\n" 265 | ], 266 | "text/plain": [ 267 | "" 268 | ] 269 | }, 270 | "execution_count": 8, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "# Esta celda da el estilo al notebook\n", 277 | "from IPython.core.display import HTML\n", 278 | "css_file = '../style/style.css'\n", 279 | "HTML(open(css_file, \"r\").read())" 280 | ] 281 | } 282 | ], 283 | "metadata": { 284 | "anaconda-cloud": {}, 285 | "kernelspec": { 286 | "display_name": "Python 3", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.6.4" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 1 305 | } 306 | -------------------------------------------------------------------------------- /notebooks/0301_Como_seguir_con_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# ¿Cómo seguir con Python?" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "_A continuación, pretendemos dar algunas ideas sobre cómo seguir aprendiendo Python y sus posibles herramientas orientas a ciencia e ingeniería. \n", 32 | "También, hablaremos del ecosistema Python, de su comunidad y de cómo involucrarse y establecer contacto con otras personas interesadas en este lenguaje de programación.\n", 33 | "Por último, pondremos distintos enlaces a diferentes canales. _" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Algunos libros y enlaces" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "#### Introducción a Python\n", 48 | "\n", 49 | "* [\"Automate the Boring Stuff with Python\" by Al Sweigart](http://automatetheboringstuff.com/) \n", 50 | "(Free to read under a Creative Commons license) \n", 51 | "* [Recopilatorio de recursos \"Grupo de Iniciación de Python Madrid\"](https://python-madrid.github.io/learn-doc/)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "#### Ciencia, Ingeniería, Data Science\n", 59 | "* [\"Effective Computation in Physics\" by Anthony Scopatz, Kathryn Huff](http://shop.oreilly.com/product/0636920033424)\n", 60 | "* [\"Python Data Science Handbook\" by Jake VanderPlas](http://shop.oreilly.com/product/0636920034919) \n", 61 | "(contenido disponible en [GitHub](https://github.com/jakevdp/PythonDataScienceHandbook))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Ecosistema Python" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "\"logo\"" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "#### Webs interesantes\n", 83 | "* [Python Software Foundation](https://www.python.org/)\n", 84 | "* [Pybonacci](http://www.pybonacci.org/)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "#### Comunidades interesantes y/o cercanas\n", 92 | "* [Python España](https://www.es.python.org/)\n", 93 | "* [Python Alicante](https://www.meetup.com/python_alc/)\n", 94 | "* [Cacheme](https://cacheme.org/)\n", 95 | "* [AeroPython](https://github.com/AeroPython)\n", 96 | "* [PyLadies Madrid](https://www.meetup.com/PyLadiesMadrid/)\n", 97 | "* [Otras comunidades](https://www.es.python.org/pages/comunidades.html)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "#### Pythonistas relevantes en Twitter\n", 105 | "https://twitter.com/AeroPython/lists/relevant-pythonistas" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "#### Grupos de Telegram\n", 113 | "\n", 114 | "* Python España\n", 115 | "* Python Alicante\n", 116 | "* Python Científico\n", 117 | "* AeroPython" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "###### Juan Luis Cano, Álex Sáez, Mabel Delgado" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "---\n", 132 | "_Las siguientes celdas contienen configuración del Notebook_\n", 133 | "\n", 134 | "_Para visualizar y utlizar los enlaces a Twitter el notebook debe ejecutarse como [seguro](http://ipython.org/ipython-doc/dev/notebook/security.html)_\n", 135 | "\n", 136 | " File > Trusted Notebook" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 8, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/html": [ 147 | "/* This template is inspired in the one used by Lorena Barba\n", 148 | "in the numerical-mooc repository: https://github.com/numerical-mooc/numerical-mooc\n", 149 | "We thank her work and hope you also enjoy the look of the notobooks with this style */\n", 150 | "\n", 151 | "\n", 152 | "\n", 153 | "\n", 291 | "\n" 307 | ], 308 | "text/plain": [ 309 | "" 310 | ] 311 | }, 312 | "execution_count": 8, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "# Esta celda da el estilo al notebook\n", 319 | "from IPython.core.display import HTML\n", 320 | "css_file = '../style/style.css'\n", 321 | "HTML(open(css_file, \"r\").read())" 322 | ] 323 | } 324 | ], 325 | "metadata": { 326 | "anaconda-cloud": {}, 327 | "kernelspec": { 328 | "display_name": "Python 3", 329 | "language": "python", 330 | "name": "python3" 331 | }, 332 | "language_info": { 333 | "codemirror_mode": { 334 | "name": "ipython", 335 | "version": 3 336 | }, 337 | "file_extension": ".py", 338 | "mimetype": "text/x-python", 339 | "name": "python", 340 | "nbconvert_exporter": "python", 341 | "pygments_lexer": "ipython3", 342 | "version": "3.6.4" 343 | } 344 | }, 345 | "nbformat": 4, 346 | "nbformat_minor": 1 347 | } 348 | -------------------------------------------------------------------------------- /notebooks_vacios/0102_Pandas-Operaciones-con-DataFrames.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Pandas: Funciones rolling y operaciones con DataFrames" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "En este notebook trabajaremos sobre los mismos datos que la introducción a pandas. Veremos como usar el método rolling para obtener resultados como la media móvil, la desviación típica móvil... Además veremos como agrupar los datos usando `groupby` y cómo pivotar tablas." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Cargando los datos" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# carga de los datos: data_all\n", 55 | "# columnas: [0, 1, 3, 4, 5, 6, 8, 10, 11, 12, 13]\n", 56 | "# ['dir', 'date', 'name', 'precip', 'pmax', 'pmin', 'mag_max', 'tmax', 'tmed', 'tmin', 'mag_med']" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "
Para acordarnos de cómo parsear las fechas: http://strftime.org/
" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# dejamos en data sólo alicante/alacant" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "### Funciones \"rolling\" " 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Ya sabemos obtener representaciones gráficas de nuestros de datos. Visualicemos, por ejemplo, la evolución de la temperatura media a lo largo de los años 2015 y 2016:" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "# temperatua media en el 205-2016" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Se puede apreciar la oscilación a lo largo del año. Sin embargo, las variaciones locales de temperatura de determinados días \"ensucian\" nuestra representación. Una opción es \"suavizar\" la curva utilizando una media móvil" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "Pandas proporciona métodos para calcular magnitudes como medias móviles usando el método `rolling`:" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# calcular la media de la columna tmed" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# media centrada (semanal/mensual/trimestral)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "# pintar media centrada (semanal/mensual/trimestral)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## Creando agrupaciones " 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "En muchas ocasiones queremos realizar agrupaciones de datos en base a determinados valores como son fechas, o etiquetas (por ejemplo, datos que pertenecen a un mismo ensayo o lugar)." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# agrupar por estación metereológica" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# media" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "# media y desviación estándar" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "### Creando agrupaciones: analizando el mes típico" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "En muchas ocasiones queremos realizar agrupaciones de datos en base a determinados valores como son fechas, o etiquetas (por ejemplo, datos que pertenecen a un mismo ensayo o lugar).\n", 192 | "\n", 193 | "En este caso, imaginemos que nos interesa obtener una representación del \"mes típico\" o \"día típico\"" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "Podemos agrupar nuestros datos utilizando `groupby`:" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "# agruparemos por año y mes: creemos dos columnas nuevas" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "# creamos la agrupación" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "# podemos ver los grupos que se han creado" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "Con estos grupos podemos hacer hacer varias cosas:\n", 235 | "\n", 236 | "* Acceder a sus datos individualmente (por ejemplo, comprobar qué pasó cada día de marzo de 2016) \n", 237 | "* Realizar una reducción de datos, para comparar diversos grupos (por, ejemplo caracterizar el tiempo de cada mes a lo largo de los años)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "# accedemos a un grupo" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "# hacemos una agregación de los datos:" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "### Pivotando tablas" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "En ocasiones podemos querer ver nuestros datos de forma diferente o necesitamos organizarlos así para utilizar determinadas funciones de `pandas`. Una necesidad típica es la de pivotar una tabla.\n", 270 | "\n", 271 | " Imagina que queremos acceder a los mismos datos que en el caso anterior, pero que ahora queremos ver los años en las filas y para cada variable (TMAX, TMED...) los calores de cada mes en una columna. ¿Cómo lo harías?" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "# dejar los años como índices y ver la media mensual en cada columna" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "La línea anterior no es sencilla y no se escribe de una sola vez sin errores (sobre todo si estás empezando). Esto es una ejemplo de que `pandas` es una librería potente, pero que lleva tiempo aprender. Pasarás muchas horas peleando contra problemas de este tipo, pero afortunadamente mucha gente lo ha pasado mal antes y su experiencia ha quedado plasmada en cientos de **preguntas de `stack overflow`** y en la **documentación de `pandas`**" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "#### Otro ejemplo" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "# seleccionando sólo una estación" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "# agrupando por estación y mes" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "# pivotando para que el mes sea el índice" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "# pintándolo" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": {}, 350 | "source": [ 351 | "### Visualizaciones especiales" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "#### scatter" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "# scatter_matrix" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "#### lag_plot " 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "---\n", 396 | "\n", 397 | "Hemos aprendido:\n", 398 | "* A utilizar las funciones rolling\n", 399 | "* A agrupar datos de un DataFrame utilizando sus columnas:\n", 400 | " - A agrupar con más de una variable\n", 401 | " - A acceder a distintos grupos\n", 402 | " - A obtener una reducción de datos para cada grupo\n", 403 | "* A salvar nuestros datos\n", 404 | "* A utilizar algunas representaciones especiales" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "###### Juan Luis Cano, Alejandro Sáez, Mabel Delgado" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "---\n", 419 | "_Las siguientes celdas contienen configuración del Notebook_\n", 420 | "\n", 421 | "_Para visualizar y utlizar los enlaces a Twitter el notebook debe ejecutarse como [seguro](http://ipython.org/ipython-doc/dev/notebook/security.html)_\n", 422 | "\n", 423 | " File > Trusted Notebook" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "# esta celda da el estilo al notebook" 433 | ] 434 | } 435 | ], 436 | "metadata": { 437 | "anaconda-cloud": {}, 438 | "kernelspec": { 439 | "display_name": "Python 3", 440 | "language": "python", 441 | "name": "python3" 442 | }, 443 | "language_info": { 444 | "codemirror_mode": { 445 | "name": "ipython", 446 | "version": 3 447 | }, 448 | "file_extension": ".py", 449 | "mimetype": "text/x-python", 450 | "name": "python", 451 | "nbconvert_exporter": "python", 452 | "pygments_lexer": "ipython3", 453 | "version": "3.6.4" 454 | } 455 | }, 456 | "nbformat": 4, 457 | "nbformat_minor": 1 458 | } 459 | -------------------------------------------------------------------------------- /notebooks_vacios/0001_Introduccion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Introducción" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "A continuación, haremos una rápida introducción al lenguaje Python y al intérprete IPython, así como a Jupyter Notebook, la herramienta que vamos a usar en este curso.\n", 32 | "Detallaremos cuáles son las bondades de Python frente a otros lenguajes, y veremos como ejecutar un script y cuáles son los tipos y estructuras básicas de este lenguaje.\n", 33 | "\n", 34 | "**¡Comenzamos!**\n", 35 | "\n", 36 | "---" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "
" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## ¿Qué es Python?" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "* Lenguaje de programación interpretado, dinámico y fácil de aprender\n", 58 | "* Creado por [Guido van Rossum](https://es.wikipedia.org/wiki/Guido_van_Rossum) en 1991\n", 59 | "* Ampliamente utilizado en ciencia e ingeniería\n", 60 | "* Multitud de bibliotecas para realizar diferentes tareas." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "### Ventajas de Python\n", 68 | "\n", 69 | "* **Libre y gratuito**:\n", 70 | " - Posibilidad de estudiar su funcionamiento y corregirlo o mejorarlo\n", 71 | " - Sin restricciones para su uso o distribución, incluso para uso comercial\n", 72 | " \n", 73 | "* **Multiplataforma**: Windows, Mac OS, Linux.\n", 74 | "\n", 75 | "* **Propósito general**:\n", 76 | " - Interfaz con otros lenguajes (glue language): reescritura de partes críticas en lenguajes compilados\n", 77 | " - Cálculo numérico y cálculo simbólico en el mismo lenguaje\n", 78 | " - Scripts de sistema operativo\n", 79 | " - Interfaces gráficas de usuario (GUIs)\n", 80 | " - Servicios web" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "![xkcd](../images/python_xkcd.png)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Desventajas de Python\n", 104 | "\n", 105 | "* Código accesible no significa código fácil de entender o arreglar\n", 106 | "* Algunas bibliotecas son mantenidas por equipos muy reducidos de voluntarios\n", 107 | "* Paradoja de la elección: muchas opciones disponibles, documentación y recursos dispersos\n", 108 | "* Poco material de aprendizaje en español (estamos trabajando en ello)\n", 109 | "* Debilidad en áreas como teoría de control" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### Python en el ámbito científico ingenieril" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "![ecosystem](../images/ecosystem.png)\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Python para análisis de datos" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "![](../images/DataScientist-Continuum.png)\n", 138 | "Original: [Embracing Open Data Science in your Organization](https://speakerdeck.com/chdoig/embracing-open-data-science-in-your-organization) - Christine Doig (Continum Analytics)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "## Instalación de Python" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Diferentes distribuciones:" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "* [PYTHON.ORG](https://www.python.org/)\n", 160 | "![python_org](../images/python_org.png)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "* [ANACONDA](https://anaconda.org/) \n", 168 | "![anaconda](../images/anaconda.png)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "Diferentes versiones:\n", 176 | "* 2.X\n", 177 | "* 3.X" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "## Entornos de desarrollo" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "El método más simple es usar un editor (tu preferido) y ejecutar el script desde la línea de comandos. Pero existen también __IDE__s (_integrated development environment_ pensados para facilitar la escritura de código y tener al alcance de la mano otras herramientas como _profilers_, _debuggers_, _explorador de variables_... Entre los más adecuados para la programación científica se encuentran [Spyder](http://code.google.com/p/spyderlib/) (instalado con Anaconda) y [PyCharm](https://www.jetbrains.com/pycharm/).\n", 192 | "\n", 193 | "![editors](../images/editors.png)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "#### Jupyter Notebook" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "__Será nuestra herramienta de trabajo durante el curso__. Esto que estás leyendo ahora no es más que un jupyter notebook, que como vemos, contiene celdas que además de código, pueden contener texto e imágenes. Pero veamos primero cómo funciona.\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "##### ¿Como trabajar?\n", 215 | "\n", 216 | "__Para iniciar el jupyter notebook a través de la terminal se utiliza el comando__ \n", 217 | "\n", 218 | "```jupyter notebook```\n", 219 | "\n", 220 | "en la pantalla principal podemos ver una ruta y una lista de notebooks. Cada notebook es un archivo que está almacenado en el ordenador en la ruta que aparece. Si en esa carpeta no hay notebooks, veremos un mensaje indicando que la lista de notebooks está vacía.\n", 221 | "\n", 222 | "\n", 223 | "\n", 224 | "Lo notebok están divididos en celdas. Cada celda de código está marcada por la palabra `In []` y están **numeradas**. Tan solo tenemos que escribir el código en ella y hacer click arriba en Cell -> Run, el triángulo (\"Run cell\") o usar el atajo ___`shift + Enter`___. El resultado de la celda se muestra en el campo `Out []`, también numerado y coincidiendo con la celda que acabamos de ejecutar. Esto es importante, como ya veremos luego.\n", 225 | "\n", 226 | "Si en la barra superior seleccionas Markdown (o usas el atajo `Esc-M`) en lugar de Code puedes escribir texto, si quieres volver a escribir código puedes volver a seleccionar Code (o usar el atajo `Esc-Y`)." 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "## Programando con Python" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### El zen de Python" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "### Tipos de datos:" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "##### integer" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "##### float" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "##### string " 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "##### boolean " 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "##### list " 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "##### tuple" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "##### set " 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "##### dict" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "### Operaciones básicas:" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "##### Operadores aritméticos (+, -, \\*, /, **, %, //)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": null, 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "##### Operadores de comparación (==, !=, >, <, >=, <=)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "##### Operadores de asignación (= , +=, -=, *=, /=, %=, **=, //=)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "##### Otros operadores (and, or, in, not in, is, not is...)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "### Estructuras de control:" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "##### Condicionales - if/elif/else" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": { 471 | "collapsed": true 472 | }, 473 | "outputs": [], 474 | "source": [] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "##### Bucles - for" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [] 496 | }, 497 | { 498 | "cell_type": "markdown", 499 | "metadata": {}, 500 | "source": [ 501 | "##### Bucles - while" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": null, 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "##### Otros comandos control de flujo - pass, continue, break" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": { 522 | "collapsed": true 523 | }, 524 | "outputs": [], 525 | "source": [] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "metadata": {}, 538 | "outputs": [], 539 | "source": [] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "### Funciones" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": { 552 | "collapsed": true 553 | }, 554 | "outputs": [], 555 | "source": [] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": null, 560 | "metadata": {}, 561 | "outputs": [], 562 | "source": [] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": {}, 567 | "source": [ 568 | "---\n", 569 | "\n", 570 | "Hemos aprendido:\n", 571 | "* ¿Qué es Python? Ventajas e inconvenientes\n", 572 | "* Distribuciones y versiones disponibles\n", 573 | "* Entornos de desarrollo\n", 574 | "* Tipos de datos\n", 575 | "* Tipos de operaciones\n", 576 | "* Control de flujo (blucles, condicional)\n", 577 | "* Funciones" 578 | ] 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "metadata": {}, 583 | "source": [ 584 | "###### Juan Luis Cano, Alejandro Sáez, Mabel Delgado" 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": {}, 590 | "source": [ 591 | "---\n", 592 | "_Las siguientes celdas contienen configuración del Notebook_\n", 593 | "\n", 594 | "_Para visualizar y utlizar los enlaces a Twitter el notebook debe ejecutarse como [seguro](http://ipython.org/ipython-doc/dev/notebook/security.html)_\n", 595 | "\n", 596 | " File > Trusted Notebook" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": null, 602 | "metadata": {}, 603 | "outputs": [], 604 | "source": [ 605 | "# esta celda da el estilo al notebook" 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": null, 611 | "metadata": { 612 | "collapsed": true 613 | }, 614 | "outputs": [], 615 | "source": [] 616 | } 617 | ], 618 | "metadata": { 619 | "anaconda-cloud": {}, 620 | "kernelspec": { 621 | "display_name": "Python 3", 622 | "language": "python", 623 | "name": "python3" 624 | }, 625 | "language_info": { 626 | "codemirror_mode": { 627 | "name": "ipython", 628 | "version": 3 629 | }, 630 | "file_extension": ".py", 631 | "mimetype": "text/x-python", 632 | "name": "python", 633 | "nbconvert_exporter": "python", 634 | "pygments_lexer": "ipython3", 635 | "version": "3.6.4" 636 | } 637 | }, 638 | "nbformat": 4, 639 | "nbformat_minor": 1 640 | } 641 | -------------------------------------------------------------------------------- /notebooks_vacios/0201-ScikitLearn-Intro-Clasificacion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Scikit-Learn: Introducción y Problema de Clasificación." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "En los últimos tiempos se habla mucho de _machine learning_, _deep learning_, _reinforcement learning_, muchas más cosas que contienen la palabra _learning_ y, por supuesto, _Big Data_. Todo ello motivado por los avances en capacidad de cálculo de los últimos años, y la popularización de lenguajes de alto nivel, que han permitido entrar de lleno en la fiebre de hacer que las máquinas aprendan. \n", 32 | "\n", 33 | "En esta clase veremos una breve introducción al machine learning, y aprenderemos a utilizar el paquete `scikit-learn` de Python, con el objetivo de crear modelos predictivos a partir de nuestros datos de una manera rápida y sencilla. En concreto, veremos cómo resolver el problema de clasificación." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## ¿En qué consiste el machine learning?" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "El machine learning es una rama de la inteligencia artificial, cuyo objetivo es desarrollar técnicas para enseñar a las máquinas a llevar a cabo ciertas tareas, mostrándoles previamente algunos ejemplos y cómo o cómo no llevar a cabo la tarea de forma exitosa. Por lo tanto, se busca crear programas capaces de generalizar comportamientos a partir de una información suministrada en forma de ejemplos, aprendiendo de ellos, y sin que hayan sido programados a mano punto a punto y detalle a detalle para realizar exitosamente esa tarea.\n", 48 | "\n", 49 | "Los diferentes algoritmos de aprendizaje automático, se agrupan en dos grandes grupos:\n", 50 | "\n", 51 | "* **Aprendizaje supervisado**, cuando tengo datos _etiquetados_, es decir: conozco la variable a predecir de un cierto número de observaciones. Pasándole esta información al algoritmo, este será capaz de predecir dicha variable cuando reciba observaciones nuevas. Por lo tanto, se produce una función que establece una correspondencia entre las entradas y las salidas deseadas del sistema. \n", 52 | "Además, dentro de este grupo, tendremos dos tipos de problemas dependiendo de la naturaleza de la variable a predecir:\n", 53 | " - **Clasificación**, si la variable a predecir es discreta o categórica (sí/no, color de ojos, etc)\n", 54 | " - **Regresión**, si la variable a predecir es continua.\n", 55 | "\n", 56 | " \n", 57 | "* **Aprendizaje no supervisado**, cuando no tenemos datos _etiquetados_ y por tanto no tengo ninguna información _a priori_ sobre las categorías de esos ejemplos. Por lo tanto, en este caso, los algoritmos deben ser capaces de descubrir patrones en los datos y agruparlos. Si bien, tendremos que manualmente inspeccionar el resultado después y ver qué sentido podemos darle a esos grupos.\n", 58 | "Dentro de este grupo podemos distinguir:\n", 59 | " - **Clustering**, agrupamiento automáticos de objetos similares en sets..\n", 60 | " - **Reducción de la dimensionalidad**, reducir el número de variables aleatorias a considerar.\n", 61 | "\n", 62 | "En función de la naturaleza de nuestro problema, `scikit-learn` proporciona una gran variedad de algoritmos que podemos elegir.\n", 63 | "\n", 64 | "![Machine Learning map](../images/ml_map.png)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## ¿Qué es scikit-learn?" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "`scikit-learn` es una libreria que nos proporciona un conjunto de algoritmos de machine learning, que incluyen regresión, clasificación, reducción de la dimensionalidad y clustering.\n", 79 | "\n", 80 | "Se articula sobre la librería `NumPy` y `SciPy` y nos permite enfrentarnos a la resolución de estos problemas a través de un a APi limpia y bien hecha. En ese sentido, se trabaja igual que con SciPy, es decir, se importan explícitamente los módulos que se necesitan de la librería.\n", 81 | "\n", 82 | "Hay que indicar que no está especialmente diseñada para datasets super grandes, pero hay cada vez más mejoras en ese área. " 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 1, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/html": [ 93 | "" 94 | ], 95 | "text/plain": [ 96 | "" 97 | ] 98 | }, 99 | "execution_count": 1, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "# preserve\n", 106 | "from IPython.display import HTML\n", 107 | "HTML('')" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "#### ¿Cómo se trabaja con scikit-learn?" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "El proceso para usar `scikit-learn` es el siguiente:\n", 122 | "\n", 123 | "1. Separar los datos en matriz de características `features` y variable a predecir `target`\n", 124 | "2. Seleccionar el modelo `estimator`.\n", 125 | "3. Elegir los hiperparámetros\n", 126 | "4. Ajustar o entrenar el modelo (`model.fit`)\n", 127 | "5. Predecir con datos nuevos (`model.predict`)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Ejemplos" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "# importamos las librerías que usamos de forma habitual" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## Problema de Regresión" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "En primer lugar vamos a resolver un problema muy sencillo de regresión, que consiste en ajustar una recta a unos datos. Esto difícilmente se puede llamar _machine learning_, pero nos servirá para ver cómo es la forma de trabajar con `scikit-learn`." 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "Como partida, fabricamos unos datos distribuidos a lo largo de una recta con un poco de ruido y los pintamos para ver el resultado." 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "A continuación, importamos el estimador de Regresión Lineal y creamos nuestro modelo." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "
Tenemos que hacer este `reshape` para transformar nuestro vector en una matriz de columnas. Rara vez tendremos que repetir este paso, puesto que en la práctica siempre tendremos varias variables.
" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "El siguiente paso es ajustar nuestro modelo." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "Y una vez hecho esto, ya podemos calcular predicciones para los mismos datos" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "Ahora, lo que vamos a hacer es calcular un tipo de error asocido a esta predicción, usando el módulo `sklearn.metrics`:" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "Y ahora predecimos con datos nuevos y vemos el resultado" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "scrolled": true 291 | }, 292 | "outputs": [], 293 | "source": [] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "### Problema de Clasificación" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "En `scikit-learn` tenemos disponibles muchos datasets clásicos de ejemplo que podemos utilizar para practicar. Uno de ellos es el dataset MNIST, que consiste en imágenes escaneadas de números escritos a mano por funcionarios de los EEUU, y que pueden ser de 10 posibles clases diferentes." 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "Para cargarlo, importamos la función correspondiente de `sklearn.datasets`:" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "# importamos los datasets" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "Un dataset es un objeto parecido a un diccionario que almacena los datos y algunos metadatos asociados." 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "# cargamos el dataset the digits que es con el que vamos a trabajar." 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "Los datos de las muestras, están almacenados en `.data`, que siempre es un array 2D de `n_samples` por `n_features`." 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "# vemos el contenido de los datos" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "# vemos cuantas muestras y características tenemos " 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "Tenemos 1797 muestras, y cada una está caracterizada por 64 valores. En este caso, cada muestra original consiste en una imagen de (8,8), es decir, 64 características, a la que se puede acceder por índice, por ejemplo:" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "# acceso a una muestra" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "Por otro lado, en el caso de problemas supervisados, se almacenan en `.target` una o más variables de respuesta, que en nuestro ejemplo consisten en un número para cada muestra, y que correponde con el dígito que estamos intentando aprender." 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "# vemos los targets" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "# vemos cuantos targets tenemos (mismos que muestras)" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "Además, podemos ver cuáles son los posibles valores que toman estos targets." 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [] 420 | }, 421 | { 422 | "cell_type": "markdown", 423 | "metadata": {}, 424 | "source": [ 425 | "Por último, podemos extraer información global sobre el dataset de la siguiente forma:" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": {}, 438 | "source": [ 439 | "Ya tenemos los datos separados en matriz de características y vector de predicción. En este caso, tendremos 64 = 8x8 características (un valor numérico por cada pixel de la imagen) y una variable a predecir que será el número en sí y que irá de 0 a 8." 440 | ] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": {}, 445 | "source": [ 446 | "Vamos a visualizar una de las imágenes como ejemplo para hacernos una idea. " 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "# elegimos por ejemplo los datos asociados a la muestra 42" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [ 464 | "# vemos qué número sabemos que almacena esta muestra" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "# hacemos un reshape a la muestra para poder represetnarla \n", 474 | "#(sabemos que viene en un array 1d, pero se corresponde con uno 2d de 8x8)" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [ 483 | "# pintamos el resultado\n", 484 | "# (si todo ha ido bien, deberíamos ver el valor indicado por label_num_ej)" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": {}, 490 | "source": [ 491 | "Ten en cuenta que nosotros sabemos qué número es cada imagen porque somos humanos y podemos leerlas. El ordenador lo sabe porque están etiquetadas, pero ¿qué pasa si viene una imagen nueva? \n", 492 | "\n", 493 | "El objetivo por lo tanto es, dada una imagen, predecir qué dígito representa, y como hemos indicado a la hora de explicar el proceso a seguir, el siguiente paso es construir un modelo de clasificación.\n", 494 | "\n", 495 | "Cada algoritmo está expuesto desde scikit-learn a través de un objeto `\"Estimador\"`. Por ejemplo, en este caso vamos a elegir un modelo de regresión logística:" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [ 504 | "# importamos el modelo" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": {}, 511 | "outputs": [], 512 | "source": [ 513 | "# creamos la instancia del modelo" 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": {}, 519 | "source": [ 520 | "Una vez importado y creado, lo que hacemos es ajustar nuestro modelo con él, usando `fit`." 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "# ajustamos el modelo" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "Y tras ajustar el modelo, vamos a calcular sus predicciones para los mismos datos de entrenamiento, usando `predict`." 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": {}, 549 | "source": [ 550 | "Por últimos, vamos a comparar esas predicciones con los datos reales, para ver qué tal ha sido el ajuste. Para ello usamos `sklearn.metrics` para medir la eficacia del algoritmo." 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": null, 556 | "metadata": {}, 557 | "outputs": [], 558 | "source": [] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "¡Parece que hemos acertado prácticamente todas! Más tarde volveremos sobre este porcentaje de éxito, que bien podría ser engañoso. De momento, representemos otra medida de éxito que es la matriz de confusión, y que nos indica el nñumero de observaciones Cij, que sabemos que tendrían que ir en el grupo i, pero que que se ha predecido que están en el grupo j." 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": null, 584 | "metadata": {}, 585 | "outputs": [], 586 | "source": [] 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": {}, 591 | "source": [ 592 | "¡Y ya está! Lo básico de `scikit-learn` está aquí. Lo próximo será usar diferentes tipos de modelos y examinar con rigor su rendimiento para poder seleccionar el que mejor funcione para nuestros datos." 593 | ] 594 | }, 595 | { 596 | "cell_type": "markdown", 597 | "metadata": {}, 598 | "source": [ 599 | "---\n", 600 | "Hemos aprendido:\n", 601 | "* En que consiste el machine learning.\n", 602 | "* Como empezar a usar la librería scikit-learn\n", 603 | "* Resolver un problema de clasificación y otro de regresión." 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "###### Juan Luis Cano, Alejandro Sáez, Mabel Delgado" 611 | ] 612 | }, 613 | { 614 | "cell_type": "markdown", 615 | "metadata": {}, 616 | "source": [ 617 | "---\n", 618 | "_Las siguientes celdas contienen configuración del Notebook_\n", 619 | "\n", 620 | "_Para visualizar y utlizar los enlaces a Twitter el notebook debe ejecutarse como [seguro](http://ipython.org/ipython-doc/dev/notebook/security.html)_\n", 621 | "\n", 622 | " File > Trusted Notebook" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [ 631 | "# esta celda da el estilo al notebook" 632 | ] 633 | } 634 | ], 635 | "metadata": { 636 | "kernelspec": { 637 | "display_name": "Python 3", 638 | "language": "python", 639 | "name": "python3" 640 | }, 641 | "language_info": { 642 | "codemirror_mode": { 643 | "name": "ipython", 644 | "version": 3 645 | }, 646 | "file_extension": ".py", 647 | "mimetype": "text/x-python", 648 | "name": "python", 649 | "nbconvert_exporter": "python", 650 | "pygments_lexer": "ipython3", 651 | "version": "3.6.4" 652 | } 653 | }, 654 | "nbformat": 4, 655 | "nbformat_minor": 2 656 | } 657 | -------------------------------------------------------------------------------- /notebooks/zz-AEMET-get_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# API-REST: AEMET OPEN DATA " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "En este notebook veremos otro ejemplo de uso de la api open data de AEMET. En este caso obtendremos parámetros medidos por una estación meteorológica." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import requests\n", 24 | "import datetime\n", 25 | "import time" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import urllib3\n", 35 | "urllib3.disable_warnings()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# Cargamos la api key \n", 45 | "# Get API KEY in AEMET: https://opendata.aemet.es/centrodedescargas/inicio\n", 46 | "api_key = open(\"../../apikey-aemet.txt\").read().rstrip()\n", 47 | "querystring = {\"api_key\": api_key}" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "def generate_chunks(start_date, final_date, step):\n", 57 | " day_step = datetime.timedelta(days=1)\n", 58 | " \n", 59 | " next_date = start_date + step\n", 60 | " \n", 61 | " while next_date < final_date:\n", 62 | " yield (start_date, next_date)\n", 63 | " start_date = next_date + day_step\n", 64 | " next_date = start_date + step\n", 65 | " \n", 66 | " yield (start_date, final_date)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "[(datetime.datetime(2017, 1, 1, 0, 0), datetime.datetime(2017, 1, 31, 0, 0)),\n", 78 | " (datetime.datetime(2017, 2, 1, 0, 0), datetime.datetime(2017, 3, 3, 0, 0)),\n", 79 | " (datetime.datetime(2017, 3, 4, 0, 0), datetime.datetime(2017, 3, 15, 0, 0))]" 80 | ] 81 | }, 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "start_date = datetime.datetime(2017, 1, 1, 0, 0, 0)\n", 89 | "final_date = datetime.datetime(2017, 3, 15, 0, 0, 0)\n", 90 | "step = datetime.timedelta(days=30)\n", 91 | "list(generate_chunks(start_date, final_date, step))" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "def get_daily_data_url(start, end, station):\n", 101 | " url = (\"https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/\"\n", 102 | " \"fechaini/{start}/fechafin/{end}/estacion/{station}\".format(\n", 103 | " start=start_date.strftime('%Y-%m-%dT%H:%M:%SUTC'),\n", 104 | " end=final_date.strftime('%Y-%m-%dT%H:%M:%SUTC'),\n", 105 | " station=station)\n", 106 | " )\n", 107 | " return url" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 7, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "def make_request(url):\n", 117 | " request_again = True\n", 118 | " sleep_secs = 1\n", 119 | " while request_again:\n", 120 | " r = requests.get(url, params=querystring, verify=False)\n", 121 | " print(r)\n", 122 | " if r.status_code == requests.codes.OK:\n", 123 | " request_again = False\n", 124 | " data_url = r.json()['datos']\n", 125 | " r_data = requests.get(data_url, params=querystring, verify=False)\n", 126 | " raw_data = r_data.json()\n", 127 | " else:\n", 128 | " print(f\"failed, sleep {sleep_secs}s and try again\")\n", 129 | " time.sleep(sleep_secs)\n", 130 | " sleep_secs += 1\n", 131 | " \n", 132 | " return raw_data" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 8, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "'https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-01-01T00%3A00%3A00UTC/fechafin/2017-01-30T00%3A00%3A00UTC/estacion/8025'" 144 | ] 145 | }, 146 | "execution_count": 8, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "(\"https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/\"\n", 153 | " \"fechaini/2017-01-01T00%3A00%3A00UTC/fechafin/2017-01-30T00%3A00%3A00UTC/estacion/8025\")" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 9, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-01-01T00:00:00UTC/fechafin/2015-01-31T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 166 | "\n", 167 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-02-01T00:00:00UTC/fechafin/2015-03-03T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 168 | "\n", 169 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-03-04T00:00:00UTC/fechafin/2015-04-03T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 170 | "\n", 171 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-04-04T00:00:00UTC/fechafin/2015-05-04T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 172 | "\n", 173 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-05-05T00:00:00UTC/fechafin/2015-06-04T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 174 | "\n", 175 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-06-05T00:00:00UTC/fechafin/2015-07-05T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 176 | "\n", 177 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-07-06T00:00:00UTC/fechafin/2015-08-05T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 178 | "\n", 179 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-08-06T00:00:00UTC/fechafin/2015-09-05T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 180 | "\n", 181 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-09-06T00:00:00UTC/fechafin/2015-10-06T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 182 | "\n", 183 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-10-07T00:00:00UTC/fechafin/2015-11-06T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 184 | "\n", 185 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-11-07T00:00:00UTC/fechafin/2015-12-07T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 186 | "\n", 187 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2015-12-08T00:00:00UTC/fechafin/2016-01-07T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 188 | "\n", 189 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-01-08T00:00:00UTC/fechafin/2016-02-07T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 190 | "\n", 191 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-02-08T00:00:00UTC/fechafin/2016-03-09T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 192 | "\n", 193 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-03-10T00:00:00UTC/fechafin/2016-04-09T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 194 | "\n", 195 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-04-10T00:00:00UTC/fechafin/2016-05-10T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 196 | "\n", 197 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-05-11T00:00:00UTC/fechafin/2016-06-10T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 198 | "\n", 199 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-06-11T00:00:00UTC/fechafin/2016-07-11T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 200 | "\n", 201 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-07-12T00:00:00UTC/fechafin/2016-08-11T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 202 | "\n", 203 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-08-12T00:00:00UTC/fechafin/2016-09-11T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 204 | "\n", 205 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-09-12T00:00:00UTC/fechafin/2016-10-12T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 206 | "\n", 207 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-10-13T00:00:00UTC/fechafin/2016-11-12T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 208 | "\n", 209 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-11-13T00:00:00UTC/fechafin/2016-12-13T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 210 | "\n", 211 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2016-12-14T00:00:00UTC/fechafin/2017-01-13T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 212 | "\n", 213 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-01-14T00:00:00UTC/fechafin/2017-02-13T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 214 | "\n", 215 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-02-14T00:00:00UTC/fechafin/2017-03-16T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 216 | "\n", 217 | "failed, sleep 1s and try again\n", 218 | "\n", 219 | "failed, sleep 2s and try again\n", 220 | "\n", 221 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-03-17T00:00:00UTC/fechafin/2017-04-16T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 222 | "\n", 223 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-04-17T00:00:00UTC/fechafin/2017-05-17T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 224 | "\n", 225 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-05-18T00:00:00UTC/fechafin/2017-06-17T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 226 | "\n", 227 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-06-18T00:00:00UTC/fechafin/2017-07-18T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 228 | "\n", 229 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-07-19T00:00:00UTC/fechafin/2017-08-18T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 230 | "\n", 231 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-08-19T00:00:00UTC/fechafin/2017-09-18T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 232 | "\n", 233 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-09-19T00:00:00UTC/fechafin/2017-10-19T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 234 | "\n", 235 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-10-20T00:00:00UTC/fechafin/2017-11-19T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 236 | "\n", 237 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-11-20T00:00:00UTC/fechafin/2017-12-20T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 238 | "\n", 239 | "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/2017-12-21T00:00:00UTC/fechafin/2017-12-31T00:00:00UTC/estacion/8025,8019,08370,7247X\n", 240 | "\n" 241 | ] 242 | } 243 | ], 244 | "source": [ 245 | "start_date = datetime.datetime(2015, 1, 1, 0, 0, 0)\n", 246 | "final_date = datetime.datetime(2017, 12, 31, 0, 0, 0)\n", 247 | "step = datetime.timedelta(days=30)\n", 248 | "\n", 249 | "chunks = generate_chunks(start_date, final_date, step)\n", 250 | "\n", 251 | "station='8025,8019,08370,7247X'\n", 252 | "\n", 253 | "data = []\n", 254 | "\n", 255 | "for start_date, final_date in chunks:\n", 256 | " url = get_daily_data_url(start_date, final_date, station)\n", 257 | " print(url)\n", 258 | " req_data = make_request(url)\n", 259 | " data = data + req_data\n", 260 | " \n", 261 | " time.sleep(1.5)\n", 262 | "\n", 263 | "# Convert to numeric\n", 264 | "for d in data:\n", 265 | " for param in ['prec', 'presMax', 'presMin', 'racha', 'sol', 'tmax', 'tmed', 'tmin', 'velmedia', 'altitud', 'dir']:\n", 266 | " try:\n", 267 | " d[param] = float(d[param].replace(',', '.'))\n", 268 | " except:\n", 269 | " d[param] = None" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 10, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "3262" 281 | ] 282 | }, 283 | "execution_count": 10, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "len(data)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 11, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "{'altitud': 81.0,\n", 301 | " 'dir': 14.0,\n", 302 | " 'fecha': '2015-01-01',\n", 303 | " 'horaPresMax': 'Varias',\n", 304 | " 'horaPresMin': '00',\n", 305 | " 'horaracha': '14:00',\n", 306 | " 'horatmax': '13:20',\n", 307 | " 'horatmin': '07:30',\n", 308 | " 'indicativo': '8025',\n", 309 | " 'nombre': 'ALICANTE/ALACANT',\n", 310 | " 'prec': 0.0,\n", 311 | " 'presMax': 1027.2,\n", 312 | " 'presMin': 1019.8,\n", 313 | " 'provincia': 'ALICANTE',\n", 314 | " 'racha': 4.7,\n", 315 | " 'sol': 8.6,\n", 316 | " 'tmax': 16.0,\n", 317 | " 'tmed': 9.6,\n", 318 | " 'tmin': 3.2,\n", 319 | " 'velmedia': 1.4}" 320 | ] 321 | }, 322 | "execution_count": 11, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "data[0]" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 12, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "import pandas as pd" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 13, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "df = pd.DataFrame(data)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 14, 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "data": { 356 | "text/html": [ 357 | "
\n", 358 | "\n", 371 | "\n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | "
altituddirfechahoraPresMaxhoraPresMinhorarachahoratmaxhoratminindicativonombreprecpresMaxpresMinprovinciarachasoltmaxtmedtminvelmedia
081.014.02015-01-01Varias0014:0013:2007:308025ALICANTE/ALACANT0.01027.21019.8ALICANTE4.78.616.09.63.21.4
1575.05.02015-01-01NaNNaN06:0015:0006:507247XPINOSO0.0NaNNaNALICANTE4.4NaN12.13.7-4.71.1
243.0NaN2015-01-01Varias00NaN12:4621:308019ALICANTE-ELCHE AEROPUERTO0.01033.51026.0ALICANTENaN6.815.910.85.71.9
381.019.02015-01-02110414:0014:0007:208025ALICANTE/ALACANT0.01028.81025.3ALICANTE4.28.818.110.02.01.1
4575.099.02015-01-02NaNNaNVarias14:3006:507247XPINOSO0.0NaNNaNALICANTE2.8NaN16.25.8-4.61.1
\n", 515 | "
" 516 | ], 517 | "text/plain": [ 518 | " altitud dir fecha horaPresMax horaPresMin horaracha horatmax \\\n", 519 | "0 81.0 14.0 2015-01-01 Varias 00 14:00 13:20 \n", 520 | "1 575.0 5.0 2015-01-01 NaN NaN 06:00 15:00 \n", 521 | "2 43.0 NaN 2015-01-01 Varias 00 NaN 12:46 \n", 522 | "3 81.0 19.0 2015-01-02 11 04 14:00 14:00 \n", 523 | "4 575.0 99.0 2015-01-02 NaN NaN Varias 14:30 \n", 524 | "\n", 525 | " horatmin indicativo nombre prec presMax presMin \\\n", 526 | "0 07:30 8025 ALICANTE/ALACANT 0.0 1027.2 1019.8 \n", 527 | "1 06:50 7247X PINOSO 0.0 NaN NaN \n", 528 | "2 21:30 8019 ALICANTE-ELCHE AEROPUERTO 0.0 1033.5 1026.0 \n", 529 | "3 07:20 8025 ALICANTE/ALACANT 0.0 1028.8 1025.3 \n", 530 | "4 06:50 7247X PINOSO 0.0 NaN NaN \n", 531 | "\n", 532 | " provincia racha sol tmax tmed tmin velmedia \n", 533 | "0 ALICANTE 4.7 8.6 16.0 9.6 3.2 1.4 \n", 534 | "1 ALICANTE 4.4 NaN 12.1 3.7 -4.7 1.1 \n", 535 | "2 ALICANTE NaN 6.8 15.9 10.8 5.7 1.9 \n", 536 | "3 ALICANTE 4.2 8.8 18.1 10.0 2.0 1.1 \n", 537 | "4 ALICANTE 2.8 NaN 16.2 5.8 -4.6 1.1 " 538 | ] 539 | }, 540 | "execution_count": 14, 541 | "metadata": {}, 542 | "output_type": "execute_result" 543 | } 544 | ], 545 | "source": [ 546 | "df.head()" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 15, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "interesting_cols = [\n", 556 | " \"dir\",\n", 557 | " \"fecha\",\n", 558 | " # \"horaPresMax\",\n", 559 | " # \"horaPresMin\",\n", 560 | " # \"horaracha\",\n", 561 | " # \"horatmax\",\n", 562 | " # \"horatmin\",\n", 563 | " \"indicativo\",\n", 564 | " \"nombre\",\n", 565 | " \"prec\",\n", 566 | " \"presMax\",\n", 567 | " \"presMin\",\n", 568 | " \"provincia\",\n", 569 | " \"racha\",\n", 570 | " \"sol\",\n", 571 | " \"tmax\",\n", 572 | " \"tmed\",\n", 573 | " \"tmin\",\n", 574 | " \"velmedia\"\n", 575 | "]" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 16, 581 | "metadata": {}, 582 | "outputs": [], 583 | "source": [ 584 | "CSV_FILE = '../data/alicante_climate_AEMET.csv'" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": 17, 590 | "metadata": {}, 591 | "outputs": [], 592 | "source": [ 593 | "df.to_csv(CSV_FILE, index=False, sep='\\t', columns=interesting_cols)" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 18, 599 | "metadata": {}, 600 | "outputs": [], 601 | "source": [ 602 | "with open(CSV_FILE) as f:\n", 603 | " lines = f.readlines()\n", 604 | " now = time.strftime(\"%c\")\n", 605 | " final = [\"created on: \" + now + '\\n'*2] + lines\n", 606 | "\n", 607 | "with open(CSV_FILE, mode='w') as f:\n", 608 | " f.writelines(final)" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 19, 614 | "metadata": {}, 615 | "outputs": [], 616 | "source": [ 617 | "df = df.loc[df['indicativo'] == '8025']" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 20, 623 | "metadata": {}, 624 | "outputs": [], 625 | "source": [ 626 | "CSV_FILE = '../data/alicante_city_climate_AEMET.csv'" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": 21, 632 | "metadata": {}, 633 | "outputs": [], 634 | "source": [ 635 | "df.to_csv(CSV_FILE, index=False, sep='\\t', columns=interesting_cols)" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": 22, 641 | "metadata": {}, 642 | "outputs": [], 643 | "source": [ 644 | "with open(CSV_FILE) as f:\n", 645 | " lines = f.readlines()\n", 646 | " now = time.strftime(\"%c\")\n", 647 | " final = [\"created on: \" + now + '\\n'*2] + lines\n", 648 | "\n", 649 | "with open(CSV_FILE, mode='w') as f:\n", 650 | " f.writelines(final)" 651 | ] 652 | } 653 | ], 654 | "metadata": { 655 | "kernelspec": { 656 | "display_name": "Python 3", 657 | "language": "python", 658 | "name": "python3" 659 | }, 660 | "language_info": { 661 | "codemirror_mode": { 662 | "name": "ipython", 663 | "version": 3 664 | }, 665 | "file_extension": ".py", 666 | "mimetype": "text/x-python", 667 | "name": "python", 668 | "nbconvert_exporter": "python", 669 | "pygments_lexer": "ipython3", 670 | "version": "3.6.4" 671 | } 672 | }, 673 | "nbformat": 4, 674 | "nbformat_minor": 2 675 | } 676 | -------------------------------------------------------------------------------- /notebooks_vacios/0101_Pandas-Carga-datos-y-manipulacion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Pandas: Carga y manipulación básica de datos" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "_Hasta ahora hemos visto las diferentes estructuras para almacenamiento de datos que nos ofrece Python, como; integer, real, complex, boolen, list, tuple, dictionary... Sin embargo, también se pueden utilizar arrays a través del paquete `NumPy`, matrices dispersas que nos proporciona el paquete `sparse` de `SciPy`, y otros tipos de estructuras._\n", 32 | "\n", 33 | "_En este notebook, vamos a presentar y empezar a trabajar con el paquete `pandas`. En concreto, nos basaremos en algunos problemas para ver las características de sus estructuras de datos, y para aprender a cargar datos y empezar a manipularnos._\n", 34 | "\n", 35 | "---" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## ¿Qué es pandas?" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "`pandas` es una libreria que nos proporciona estructuras de datos y herramientas para realizar análisis de grandes volúmenes de datos de manera rápida.\n", 50 | "\n", 51 | "Se articula sobre la librería `NumPy`, y nos permite enfrentarnos a situaciones en las que tenemos que manejar datos reales, que requieren seguir un proceso de carga, limpieza, filtrado y reducción, y su posterior análisis y representación.\n", 52 | "\n", 53 | "Es de gran utilidad en la industria del Big Data, pues un grandísmo porcentaje del tiempo de trabajo de un Data Scientist, está asociado a la limpieza y preparación de los datos (ver [artículo](https://www.forbes.com/sites/gilpress/2016/03/23/data-preparation-most-time-consuming-least-enjoyable-data-science-task-survey-says/#a5231076f637)), y pandas nos ayuda mucho en esta tarea. " 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "De manera estándar y por convenio, pandas se importa de la siguiente forma:" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Cargando los datos" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "Trabajaremos sobre un fichero de datos metereológicos de AEMET obtenido de su portal de datos abiertos a través de la API (ver notebook adjunto). https://opendata.aemet.es/centrodedescargas/inicio" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 2, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/html": [ 92 | "" 93 | ], 94 | "text/plain": [ 95 | "" 96 | ] 97 | }, 98 | "execution_count": 2, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "# preserve\n", 105 | "from IPython.display import HTML\n", 106 | "HTML('')" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Vemos que pinta tiene el fichero:" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "# en linux\n", 123 | "#!head ../data/alicante_city_climate_aemet.csv" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# en windows\n", 133 | "# !more ..\\data\\alicante_city_climate_aemet.csv" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "Vemos que los datos no están en formato CSV, aunque sí que tienen algo de estructura.\n", 141 | "\n", 142 | "¿Qué sucede si intentamos cargarlos con pandas?" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "Tenemos que hacer los siguientes cambios:\n", 157 | "* Separar los campos por tabuladores.\n", 158 | "* Saltar las primeras lineas.\n", 159 | "* Descartar columnas que no nos interesan.\n", 160 | "* Dar nombre a las nuevas columnas.\n", 161 | "* Convertir las fechas al formato correcto.\n", 162 | "* Definir la fecha como índice." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "
Para acordarnos de cómo parsear las fechas: http://strftime.org/
" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "## Explorando los datos" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# recuperar los tipos de datos de cada columna" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "# recuperar los índices" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "# muestro solo las primers 4 líneas" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "# muestro sólo las últimas 6 líneas" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "# muestro sólo determinadas líneas (slicing)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# ordeno de índice más antiguo a más moderno" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "# ordeno de mayor a menor la temperatura media" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "scrolled": true 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "# información general del dataset. \n", 258 | "# cuidado, para cada columna son las filas con elementos" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# numero de filas y columnas en el dataset" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "# contamos cuantos elementos tenemos sin valor" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "# contamos cuantos elementos tenemos con valor" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "### Descripción estadística" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Se pueden pedir los datos estadísticos asociados al dataframe." 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "# descripción estadística" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "Por defecto, los elementos con NA no se tienen en cuenta a la hora de calcular los valores. Se puede comprobar viendo como cambian los datos cuando se sustitiyen con ceros." 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "# cambiar na por ceros y volver a ver la descripción estadística\n", 325 | "# recuerda que esto no cambia data en realidad, porque no lo hemos guardado" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "Otra forma de acceder a los datos estadísticos, es pedirlos de forma directa." 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "# media" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "# cuantil" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "## Accediendo a los datos." 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "Tenemos dos funciones principales para acceder a los datos, que son `.loc` que permite acceder por etiquetas, y `.iloc`que permite acceder por índices. " 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "##### columnas" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "Hay varias formas de acceder a las columnas: por nombre o por atributo (si no contienen espacios ni caracteres especiales). Además, también se puede usar `.loc` (basado en etiquetas), `.iloc` (basado en posiciones enteras).\n" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": {}, 385 | "outputs": [], 386 | "source": [ 387 | "# accediendo a una columna por el nombre (label) y obteniendo una serie" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": null, 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [ 396 | "# accediendo a una columna por el nombre (label) y obteniendo un dataframe." 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [ 405 | "# accediendo a varias columnas por el nombre (label)" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "# accediendo a una columna por el atributo" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "# accediendo a una columna por índice y obteniendo una serie" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "# accediendo a una columna por índice y obteniendo un dataframe" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "# accediendo a varias columnas por índice" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": [ 450 | "# accediendo a una columna por el label y obteniendo una serie" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "# accediendo a una columna por el label y obteniendo un dataframe" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "# accediendo a varias columnas por le label" 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "##### filas" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": {}, 481 | "source": [ 482 | "Para acceder a las filas tenemos dos métodos: `.loc` (basado en etiquetas), `.iloc` (basado en posiciones enteras)." 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "# accediendo a una fila por etiqueta y obteniendo una serie" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "# accediendo a una fila por etiqueta y obteniendo un dataframe" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "metadata": {}, 507 | "outputs": [], 508 | "source": [ 509 | "# accediendo a varias filas por etiqueta" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "# accediendo a una fila por posición entera y obteniendo una serie" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": {}, 525 | "outputs": [], 526 | "source": [ 527 | "# accediendo a una fila por posición entera y obteniendo un dataframe" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": null, 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [ 536 | "# accediendo a varias filas por posición entera" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "metadata": {}, 542 | "source": [ 543 | "##### filas y columnas " 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "metadata": {}, 550 | "outputs": [], 551 | "source": [ 552 | "# accediendo a files y columnas por etiquetas" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": null, 558 | "metadata": {}, 559 | "outputs": [], 560 | "source": [ 561 | "# accediendo a filas y columnas por posiciones enteras" 562 | ] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": {}, 567 | "source": [ 568 | "## Filtrado de datos" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": null, 574 | "metadata": {}, 575 | "outputs": [], 576 | "source": [ 577 | "# busco duplicados en las fechas" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": null, 583 | "metadata": {}, 584 | "outputs": [], 585 | "source": [ 586 | "# tmax > 37" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": null, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "# 0 Trusted Notebook" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 54, 769 | "metadata": {}, 770 | "outputs": [ 771 | { 772 | "data": { 773 | "text/html": [ 774 | "/* This template is inspired in the one used by Lorena Barba\n", 775 | "in the numerical-mooc repository: https://github.com/numerical-mooc/numerical-mooc\n", 776 | "We thank her work and hope you also enjoy the look of the notobooks with this style */\n", 777 | "\n", 778 | "\n", 779 | "\n", 780 | "\n", 918 | "\n" 934 | ], 935 | "text/plain": [ 936 | "" 937 | ] 938 | }, 939 | "execution_count": 54, 940 | "metadata": {}, 941 | "output_type": "execute_result" 942 | } 943 | ], 944 | "source": [ 945 | "#preserve\n", 946 | "# Esta celda da el estilo al notebook\n", 947 | "from IPython.core.display import HTML\n", 948 | "css_file = '../style/style.css'\n", 949 | "HTML(open(css_file, \"r\").read())" 950 | ] 951 | } 952 | ], 953 | "metadata": { 954 | "anaconda-cloud": {}, 955 | "kernelspec": { 956 | "display_name": "Python 3", 957 | "language": "python", 958 | "name": "python3" 959 | }, 960 | "language_info": { 961 | "codemirror_mode": { 962 | "name": "ipython", 963 | "version": 3 964 | }, 965 | "file_extension": ".py", 966 | "mimetype": "text/x-python", 967 | "name": "python", 968 | "nbconvert_exporter": "python", 969 | "pygments_lexer": "ipython3", 970 | "version": "3.6.4" 971 | } 972 | }, 973 | "nbformat": 4, 974 | "nbformat_minor": 1 975 | } 976 | -------------------------------------------------------------------------------- /notebooks/0001_Introduccion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Curso de introducción al análisis y modelado de datos con Python \n", 8 | "\n", 9 | "\"logo\"\n", 10 | "\"logo\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "---" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Introducción" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "A continuación, haremos una rápida introducción al lenguaje Python y al intérprete IPython, así como a Jupyter Notebook, la herramienta que vamos a usar en este curso.\n", 32 | "Detallaremos cuáles son las bondades de Python frente a otros lenguajes, y veremos como ejecutar un script y cuáles son los tipos y estructuras básicas de este lenguaje.\n", 33 | "\n", 34 | "**¡Comenzamos!**\n", 35 | "\n", 36 | "---" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "
" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## ¿Qué es Python?" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "* Lenguaje de programación interpretado, dinámico y fácil de aprender\n", 58 | "* Creado por [Guido van Rossum](https://es.wikipedia.org/wiki/Guido_van_Rossum) en 1991\n", 59 | "* Ampliamente utilizado en ciencia e ingeniería\n", 60 | "* Multitud de bibliotecas para realizar diferentes tareas." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "### Ventajas de Python\n", 68 | "\n", 69 | "* **Libre y gratuito**:\n", 70 | " - Posibilidad de estudiar su funcionamiento y corregirlo o mejorarlo\n", 71 | " - Sin restricciones para su uso o distribución, incluso para uso comercial\n", 72 | " \n", 73 | "* **Multiplataforma**: Windows, Mac OS, Linux.\n", 74 | "\n", 75 | "* **Propósito general**:\n", 76 | " - Interfaz con otros lenguajes (glue language): reescritura de partes críticas en lenguajes compilados\n", 77 | " - Cálculo numérico y cálculo simbólico en el mismo lenguaje\n", 78 | " - Scripts de sistema operativo\n", 79 | " - Interfaces gráficas de usuario (GUIs)\n", 80 | " - Servicios web" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 1, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "import antigravity" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "![xkcd](../images/python_xkcd.png)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Desventajas de Python\n", 106 | "\n", 107 | "* Código accesible no significa código fácil de entender o arreglar\n", 108 | "* Algunas bibliotecas son mantenidas por equipos muy reducidos de voluntarios\n", 109 | "* Paradoja de la elección: muchas opciones disponibles, documentación y recursos dispersos\n", 110 | "* Poco material de aprendizaje en español (estamos trabajando en ello)\n", 111 | "* Debilidad en áreas como teoría de control" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Python en el ámbito científico ingenieril" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "![ecosystem](../images/ecosystem.png)\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "### Python para análisis de datos" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "![](../images/DataScientist-Continuum.png)\n", 140 | "Original: [Embracing Open Data Science in your Organization](https://speakerdeck.com/chdoig/embracing-open-data-science-in-your-organization) - Christine Doig (Continum Analytics)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Instalación de Python" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "Diferentes distribuciones:" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "* [PYTHON.ORG](https://www.python.org/)\n", 162 | "![python_org](../images/python_org.png)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "* [ANACONDA](https://anaconda.org/) \n", 170 | "![anaconda](../images/anaconda.png)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "Diferentes versiones:\n", 178 | "* 2.X\n", 179 | "* 3.X" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "## Entornos de desarrollo" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "El método más simple es usar un editor (tu preferido) y ejecutar el script desde la línea de comandos. Pero existen también __IDE__s (_integrated development environment_ pensados para facilitar la escritura de código y tener al alcance de la mano otras herramientas como _profilers_, _debuggers_, _explorador de variables_... Entre los más adecuados para la programación científica se encuentran [Spyder](http://code.google.com/p/spyderlib/) (instalado con Anaconda) y [PyCharm](https://www.jetbrains.com/pycharm/).\n", 194 | "\n", 195 | "![editors](../images/editors.png)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "#### Jupyter Notebook" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "__Será nuestra herramienta de trabajo durante el curso__. Esto que estás leyendo ahora no es más que un jupyter notebook, que como vemos, contiene celdas que además de código, pueden contener texto e imágenes. Pero veamos primero cómo funciona.\n" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "##### ¿Como trabajar?\n", 217 | "\n", 218 | "__Para iniciar el jupyter notebook a través de la terminal se utiliza el comando__ \n", 219 | "\n", 220 | "```jupyter notebook```\n", 221 | "\n", 222 | "en la pantalla principal podemos ver una ruta y una lista de notebooks. Cada notebook es un archivo que está almacenado en el ordenador en la ruta que aparece. Si en esa carpeta no hay notebooks, veremos un mensaje indicando que la lista de notebooks está vacía.\n", 223 | "\n", 224 | "\n", 225 | "\n", 226 | "Lo notebok están divididos en celdas. Cada celda de código está marcada por la palabra `In []` y están **numeradas**. Tan solo tenemos que escribir el código en ella y hacer click arriba en Cell -> Run, el triángulo (\"Run cell\") o usar el atajo ___`shift + Enter`___. El resultado de la celda se muestra en el campo `Out []`, también numerado y coincidiendo con la celda que acabamos de ejecutar. Esto es importante, como ya veremos luego.\n", 227 | "\n", 228 | "Si en la barra superior seleccionas Markdown (o usas el atajo `Esc-M`) en lugar de Code puedes escribir texto, si quieres volver a escribir código puedes volver a seleccionar Code (o usar el atajo `Esc-Y`)." 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## Programando con Python" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "### El zen de Python" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 2, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "The Zen of Python, by Tim Peters\n", 255 | "\n", 256 | "Beautiful is better than ugly.\n", 257 | "Explicit is better than implicit.\n", 258 | "Simple is better than complex.\n", 259 | "Complex is better than complicated.\n", 260 | "Flat is better than nested.\n", 261 | "Sparse is better than dense.\n", 262 | "Readability counts.\n", 263 | "Special cases aren't special enough to break the rules.\n", 264 | "Although practicality beats purity.\n", 265 | "Errors should never pass silently.\n", 266 | "Unless explicitly silenced.\n", 267 | "In the face of ambiguity, refuse the temptation to guess.\n", 268 | "There should be one-- and preferably only one --obvious way to do it.\n", 269 | "Although that way may not be obvious at first unless you're Dutch.\n", 270 | "Now is better than never.\n", 271 | "Although never is often better than *right* now.\n", 272 | "If the implementation is hard to explain, it's a bad idea.\n", 273 | "If the implementation is easy to explain, it may be a good idea.\n", 274 | "Namespaces are one honking great idea -- let's do more of those!\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "import this" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "### Tipos de datos:" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "##### integer" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 3, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "1" 305 | ] 306 | }, 307 | "execution_count": 3, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "a = 1\n", 314 | "a" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "##### float" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 4, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "data": { 331 | "text/plain": [ 332 | "3.5" 333 | ] 334 | }, 335 | "execution_count": 4, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "a = 3.5\n", 342 | "a" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "##### string " 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 5, 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "'mi curso'" 361 | ] 362 | }, 363 | "execution_count": 5, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "a = 'mi curso'\n", 370 | "a" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "##### boolean " 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 6, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "True False\n" 390 | ] 391 | } 392 | ], 393 | "source": [ 394 | "a = True\n", 395 | "b = False\n", 396 | "print(a, b)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "##### list " 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 7, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/plain": [ 414 | "[1, '3', 1, 2, 'a', True]" 415 | ] 416 | }, 417 | "execution_count": 7, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "my_list = [1, '3', 1, 2, 'a', True] \n", 424 | "my_list" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 8, 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "data": { 434 | "text/plain": [ 435 | "[1, '3', 1, 2, 'a', True, 99]" 436 | ] 437 | }, 438 | "execution_count": 8, 439 | "metadata": {}, 440 | "output_type": "execute_result" 441 | } 442 | ], 443 | "source": [ 444 | "my_list.append(99)\n", 445 | "my_list" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "##### tuple" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 9, 458 | "metadata": {}, 459 | "outputs": [ 460 | { 461 | "data": { 462 | "text/plain": [ 463 | "(1, '3', 1, 2, 'a', True)" 464 | ] 465 | }, 466 | "execution_count": 9, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "my_tuple = (1, '3', 1, 2, 'a', True) \n", 473 | "my_tuple" 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "metadata": {}, 479 | "source": [ 480 | "##### set " 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 10, 486 | "metadata": {}, 487 | "outputs": [ 488 | { 489 | "data": { 490 | "text/plain": [ 491 | "{1, 2, '3', 'a'}" 492 | ] 493 | }, 494 | "execution_count": 10, 495 | "metadata": {}, 496 | "output_type": "execute_result" 497 | } 498 | ], 499 | "source": [ 500 | "my_set = {1, '3', 1, 2, 'a', True}\n", 501 | "my_set" 502 | ] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "metadata": {}, 507 | "source": [ 508 | "##### dict" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 11, 514 | "metadata": {}, 515 | "outputs": [ 516 | { 517 | "data": { 518 | "text/plain": [ 519 | "{'altura': 1.8, 'edad': 18, 'equipo': None, 'puesto': 'alero'}" 520 | ] 521 | }, 522 | "execution_count": 11, 523 | "metadata": {}, 524 | "output_type": "execute_result" 525 | } 526 | ], 527 | "source": [ 528 | "my_dic = {'edad': 18, 'altura': 1.80, 'puesto': 'alero', 'equipo' : None} \n", 529 | "my_dic" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "### Operaciones básicas:" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "metadata": {}, 542 | "source": [ 543 | "##### Operadores aritméticos (+, -, \\*, /, **, %, //)" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 12, 549 | "metadata": {}, 550 | "outputs": [ 551 | { 552 | "name": "stdout", 553 | "output_type": "stream", 554 | "text": [ 555 | "1.5 aaaa 55.75\n" 556 | ] 557 | } 558 | ], 559 | "source": [ 560 | "a = 3/2\n", 561 | "b = 4 * 'a'\n", 562 | "c = (15**2 - 2)/4\n", 563 | "\n", 564 | "print(a, b, c)" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "##### Operadores de comparación (==, !=, >, <, >=, <=)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 13, 577 | "metadata": {}, 578 | "outputs": [ 579 | { 580 | "name": "stdout", 581 | "output_type": "stream", 582 | "text": [ 583 | "False True True\n" 584 | ] 585 | } 586 | ], 587 | "source": [ 588 | "a = 7\n", 589 | "b = 3\n", 590 | "\n", 591 | "print(a== b, a != b, a > b)" 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "##### Operadores de asignación (= , +=, -=, *=, /=, %=, **=, //=)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 14, 604 | "metadata": {}, 605 | "outputs": [ 606 | { 607 | "name": "stdout", 608 | "output_type": "stream", 609 | "text": [ 610 | "a = None b = 15\n" 611 | ] 612 | } 613 | ], 614 | "source": [ 615 | "a = 15\n", 616 | "b = None \n", 617 | "a, b = b, a\n", 618 | "print('a = ', a, ' b =', b)" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": 15, 624 | "metadata": {}, 625 | "outputs": [ 626 | { 627 | "data": { 628 | "text/plain": [ 629 | "16" 630 | ] 631 | }, 632 | "execution_count": 15, 633 | "metadata": {}, 634 | "output_type": "execute_result" 635 | } 636 | ], 637 | "source": [ 638 | "c = 1\n", 639 | "c += 15\n", 640 | "c" 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "metadata": {}, 646 | "source": [ 647 | "##### Otros operadores (and, or, in, not in, is, not is...)" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": 16, 653 | "metadata": {}, 654 | "outputs": [ 655 | { 656 | "data": { 657 | "text/plain": [ 658 | "False" 659 | ] 660 | }, 661 | "execution_count": 16, 662 | "metadata": {}, 663 | "output_type": "execute_result" 664 | } 665 | ], 666 | "source": [ 667 | "a = 5\n", 668 | "a is None" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 17, 674 | "metadata": {}, 675 | "outputs": [ 676 | { 677 | "data": { 678 | "text/plain": [ 679 | "True" 680 | ] 681 | }, 682 | "execution_count": 17, 683 | "metadata": {}, 684 | "output_type": "execute_result" 685 | } 686 | ], 687 | "source": [ 688 | "b = [10, 3, 5, 7]\n", 689 | "a in b" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "### Estructuras de control:" 697 | ] 698 | }, 699 | { 700 | "cell_type": "markdown", 701 | "metadata": {}, 702 | "source": [ 703 | "##### Condicionales - if/elif/else" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 18, 709 | "metadata": { 710 | "collapsed": true 711 | }, 712 | "outputs": [], 713 | "source": [ 714 | "a = 15" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 19, 720 | "metadata": {}, 721 | "outputs": [ 722 | { 723 | "name": "stdout", 724 | "output_type": "stream", 725 | "text": [ 726 | "a es mayor que 1\n" 727 | ] 728 | } 729 | ], 730 | "source": [ 731 | "if a > 1:\n", 732 | " print('a es mayor que 1')\n", 733 | "elif a < 1:\n", 734 | " print('a es menor que 1')\n", 735 | "else:\n", 736 | " print('a es igual a 1')" 737 | ] 738 | }, 739 | { 740 | "cell_type": "markdown", 741 | "metadata": {}, 742 | "source": [ 743 | "##### Bucles - for" 744 | ] 745 | }, 746 | { 747 | "cell_type": "code", 748 | "execution_count": 20, 749 | "metadata": {}, 750 | "outputs": [ 751 | { 752 | "name": "stdout", 753 | "output_type": "stream", 754 | "text": [ 755 | "El elemento 0 es: oxigeno\n", 756 | "El elemento 1 es: azufre\n", 757 | "El elemento 2 es: hidrogeno\n" 758 | ] 759 | } 760 | ], 761 | "source": [ 762 | "mis_elementos= ['oxigeno', 'azufre', 'hidrogeno']\n", 763 | "\n", 764 | "for i, elem in enumerate(mis_elementos):\n", 765 | " print('El elemento {0} es: {1}'.format(i, elem))" 766 | ] 767 | }, 768 | { 769 | "cell_type": "markdown", 770 | "metadata": {}, 771 | "source": [ 772 | "##### Bucles - while" 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": 21, 778 | "metadata": {}, 779 | "outputs": [ 780 | { 781 | "name": "stdout", 782 | "output_type": "stream", 783 | "text": [ 784 | "0\n", 785 | "1\n", 786 | "2\n", 787 | "3\n", 788 | "4\n" 789 | ] 790 | } 791 | ], 792 | "source": [ 793 | "contador = 0\n", 794 | "while contador < 5:\n", 795 | " print(contador)\n", 796 | " contador += 1" 797 | ] 798 | }, 799 | { 800 | "cell_type": "markdown", 801 | "metadata": {}, 802 | "source": [ 803 | "##### Otros comandos control de flujo - pass, continue, break" 804 | ] 805 | }, 806 | { 807 | "cell_type": "code", 808 | "execution_count": 22, 809 | "metadata": { 810 | "collapsed": true 811 | }, 812 | "outputs": [], 813 | "source": [ 814 | "a = 25\n", 815 | "\n", 816 | "if a in {13, 25}:\n", 817 | " pass" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": 23, 823 | "metadata": {}, 824 | "outputs": [ 825 | { 826 | "name": "stdout", 827 | "output_type": "stream", 828 | "text": [ 829 | "1\n", 830 | "2\n", 831 | "4\n", 832 | "5\n" 833 | ] 834 | } 835 | ], 836 | "source": [ 837 | "contador = 0\n", 838 | "while contador < 5:\n", 839 | " contador += 1\n", 840 | " if contador == 3:\n", 841 | " continue\n", 842 | " print(contador)" 843 | ] 844 | }, 845 | { 846 | "cell_type": "code", 847 | "execution_count": 24, 848 | "metadata": {}, 849 | "outputs": [ 850 | { 851 | "name": "stdout", 852 | "output_type": "stream", 853 | "text": [ 854 | "None\n" 855 | ] 856 | } 857 | ], 858 | "source": [ 859 | "my_list = [1, 2, None, 4]\n", 860 | "\n", 861 | "sum = 0\n", 862 | "for elem in my_list:\n", 863 | " \n", 864 | " if elem is None:\n", 865 | " sum = None\n", 866 | " break\n", 867 | " \n", 868 | " sum += elem\n", 869 | "\n", 870 | "print(sum) \n", 871 | " " 872 | ] 873 | }, 874 | { 875 | "cell_type": "markdown", 876 | "metadata": {}, 877 | "source": [ 878 | "### Funciones" 879 | ] 880 | }, 881 | { 882 | "cell_type": "code", 883 | "execution_count": 2, 884 | "metadata": { 885 | "collapsed": true 886 | }, 887 | "outputs": [], 888 | "source": [ 889 | "def sumale_tres(value):\n", 890 | " value += 3\n", 891 | " return value" 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": 3, 897 | "metadata": {}, 898 | "outputs": [ 899 | { 900 | "data": { 901 | "text/plain": [ 902 | "18" 903 | ] 904 | }, 905 | "execution_count": 3, 906 | "metadata": {}, 907 | "output_type": "execute_result" 908 | } 909 | ], 910 | "source": [ 911 | "res = sumale_tres(15)\n", 912 | "res" 913 | ] 914 | }, 915 | { 916 | "cell_type": "markdown", 917 | "metadata": {}, 918 | "source": [ 919 | "---\n", 920 | "\n", 921 | "Hemos aprendido:\n", 922 | "* ¿Qué es Python? Ventajas e inconvenientes\n", 923 | "* Distribuciones y versiones disponibles\n", 924 | "* Entornos de desarrollo\n", 925 | "* Tipos de datos\n", 926 | "* Tipos de operaciones\n", 927 | "* Control de flujo (blucles, condicional)\n", 928 | "* Funciones" 929 | ] 930 | }, 931 | { 932 | "cell_type": "markdown", 933 | "metadata": {}, 934 | "source": [ 935 | "###### Juan Luis Cano, Alejandro Sáez, Mabel Delgado" 936 | ] 937 | }, 938 | { 939 | "cell_type": "markdown", 940 | "metadata": {}, 941 | "source": [ 942 | "---\n", 943 | "_Las siguientes celdas contienen configuración del Notebook_\n", 944 | "\n", 945 | "_Para visualizar y utlizar los enlaces a Twitter el notebook debe ejecutarse como [seguro](http://ipython.org/ipython-doc/dev/notebook/security.html)_\n", 946 | "\n", 947 | " File > Trusted Notebook" 948 | ] 949 | }, 950 | { 951 | "cell_type": "code", 952 | "execution_count": 25, 953 | "metadata": {}, 954 | "outputs": [ 955 | { 956 | "data": { 957 | "text/html": [ 958 | "/* This template is inspired in the one used by Lorena Barba\n", 959 | "in the numerical-mooc repository: https://github.com/numerical-mooc/numerical-mooc\n", 960 | "We thank her work and hope you also enjoy the look of the notobooks with this style */\n", 961 | "\n", 962 | "\n", 963 | "\n", 964 | "\n", 1102 | "\n" 1118 | ], 1119 | "text/plain": [ 1120 | "" 1121 | ] 1122 | }, 1123 | "execution_count": 25, 1124 | "metadata": {}, 1125 | "output_type": "execute_result" 1126 | } 1127 | ], 1128 | "source": [ 1129 | "# Esta celda da el estilo al notebook\n", 1130 | "from IPython.core.display import HTML\n", 1131 | "css_file = '../style/style.css'\n", 1132 | "HTML(open(css_file, \"r\").read())" 1133 | ] 1134 | }, 1135 | { 1136 | "cell_type": "code", 1137 | "execution_count": null, 1138 | "metadata": { 1139 | "collapsed": true 1140 | }, 1141 | "outputs": [], 1142 | "source": [] 1143 | } 1144 | ], 1145 | "metadata": { 1146 | "anaconda-cloud": {}, 1147 | "kernelspec": { 1148 | "display_name": "Python 3", 1149 | "language": "python", 1150 | "name": "python3" 1151 | }, 1152 | "language_info": { 1153 | "codemirror_mode": { 1154 | "name": "ipython", 1155 | "version": 3 1156 | }, 1157 | "file_extension": ".py", 1158 | "mimetype": "text/x-python", 1159 | "name": "python", 1160 | "nbconvert_exporter": "python", 1161 | "pygments_lexer": "ipython3", 1162 | "version": "3.6.4" 1163 | } 1164 | }, 1165 | "nbformat": 4, 1166 | "nbformat_minor": 1 1167 | } 1168 | --------------------------------------------------------------------------------