├── runtime.txt ├── Cuadernos ├── Datos │ └── readme ├── Imagenes │ ├── readme │ ├── GD_3D.png │ ├── bird.jpg │ ├── shell.jpg │ ├── Padding.png │ ├── ladron.png │ ├── maxpool.png │ ├── one_hot.png │ ├── LSTM3-gate.png │ ├── cnn-mnist.png │ ├── coastline.png │ ├── dilation.png │ ├── keras_01.png │ ├── maxpool.webp │ ├── mlp-mnist.png │ ├── perceptron.pdf │ ├── Data-Science.jpg │ ├── GAN_trainig.png │ ├── Gan_esquema.png │ ├── LSTM3-C-line.png │ ├── LSTM3-chain.png │ ├── RNC_Tipica.webp │ ├── auto_encod_2.png │ ├── latent_codes.png │ ├── residual_NN.png │ ├── time_series.png │ ├── ANN_Capa_Oculta.pdf │ ├── CNN_cats_dogs.png │ ├── GRU_Arquitect.png │ ├── LSTM3-SimpleRNN.png │ ├── LSTM3-focus-C.png │ ├── LSTM3-focus-f.png │ ├── LSTM3-focus-i.png │ ├── LSTM3-focus-o.png │ ├── MnistExamples.png │ ├── cnn-procedure.png │ ├── filling_space.png │ ├── mnist-samples.png │ ├── reshape_input.png │ ├── text_generation.png │ ├── ANN_mnist_748_10.jpg │ ├── ANN_mnist_784_10.pdf │ ├── LSTM2-notation_1.png │ ├── convolution_compu.jpg │ ├── Convolution_discrete.png │ ├── Convolution_kernel.png │ ├── Data-Science-Process.jpg │ ├── GRU_Arquit_interna.png │ ├── 800px-Hilbert_curve.svg.png │ ├── Re_Neuronal_capa_oculta.pdf │ ├── reparametrizacion_truco.png │ ├── text_generation_sampling.png │ ├── text_generation_training.png │ ├── Convolution_multiple_kernels.png │ └── Analogy_of_Biological_Network_with_Artificial_Neural_Network.jpg ├── Readme.txt ├── output_18_0.png ├── output_20_1.png ├── output_6_0.png ├── Hello_World_ML.md ├── Auto_Encoder_clasifier.ipynb ├── Intro_GRU.ipynb ├── AutoDiff_tf_2.ipynb ├── Auto_Encoders.ipynb ├── Logistic_Reg_AutoDiff.ipynb ├── Convolucion_Redes.ipynb ├── Autodif_XOR_func.ipynb └── Regresion_Lineal_tf_2.ipynb ├── Imagenes ├── Imagenes.md ├── SGBD.jpeg ├── SGBD.png ├── resol_n_n.jpeg ├── abstraccion.jpeg ├── autor_libro.jpeg ├── enum_atrib.jpeg ├── No_es_entidad.jpeg ├── atributos_e_r.jpeg ├── atributos_e_r.png ├── boxplots_region.png ├── instancias_n_m.jpeg ├── muchos_muchos.jpeg ├── recursiva_e_r.jpeg ├── Relacion_uno_uno.jpeg ├── dependencia_e_r.jpeg ├── entidad_correcta.jpeg ├── rel_ciudad_depto.jpeg ├── relacion_binaria.jpeg ├── trajectory_plots.png ├── Relación_recursiva.jpeg ├── Representacion_1_N.jpeg ├── convenio_notacion.jpeg ├── multiple_relacion.jpeg ├── planta_perosna_1_1.jpeg ├── uno_muchos_entidad.jpeg ├── Relacion_entre_tablas.jpeg ├── resol_muchos_muchos_2.jpeg ├── Ecuación_cuadrática.svg.png └── Grafica_relacion_binaria.jpeg ├── Codigos ├── Readme.txt ├── Keras_api_funcional_resnet │ ├── readme │ ├── sampler-cifar10-2.1.0.py │ ├── cnn-functional-2.1.1.py │ ├── cnn-y-network-2.1.2.py │ ├── densenet-cifar10-2.4.1.py │ └── resnet-cifar10-2.2.1.py ├── Autoencoders │ ├── readme │ ├── autoencoder-mnist-3.2.1.py │ ├── classifier-autoencoder-mnist-3.3.1.py │ ├── denoising-autoencoder-mnist-3.3.1.py │ ├── colorization-autoencoder-cifar10-3.4.1.py │ └── autoencoder-2dim-mnist-3.2.2.py ├── append_to_path.ipynb └── optimizer.py ├── requirements.txt ├── README.md ├── Syllabus_Ciencia_de_datos.ipynb └── Syllabus_Ciencia_Datos.ipynb /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.7.* 2 | -------------------------------------------------------------------------------- /Cuadernos/Datos/readme: -------------------------------------------------------------------------------- 1 | datos 2 | -------------------------------------------------------------------------------- /Imagenes/Imagenes.md: -------------------------------------------------------------------------------- 1 | Carpeta de imagenes 2 | -------------------------------------------------------------------------------- /Cuadernos/Imagenes/readme: -------------------------------------------------------------------------------- 1 | Aqui estan las imágenes 2 | -------------------------------------------------------------------------------- /Codigos/Readme.txt: -------------------------------------------------------------------------------- 1 | Aquí van los módulos de programación. 2 | -------------------------------------------------------------------------------- /Cuadernos/Readme.txt: -------------------------------------------------------------------------------- 1 | Aquí van los cuadernos relacionados con la ciencia de datos. 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jaxlib==0.1.* 2 | numpy==1.18.* 3 | matplotlib==3.* 4 | seaborn==0.8.* 5 | pandas 6 | -------------------------------------------------------------------------------- /Imagenes/SGBD.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/SGBD.jpeg -------------------------------------------------------------------------------- /Imagenes/SGBD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/SGBD.png -------------------------------------------------------------------------------- /Imagenes/resol_n_n.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/resol_n_n.jpeg -------------------------------------------------------------------------------- /Cuadernos/output_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/output_18_0.png -------------------------------------------------------------------------------- /Cuadernos/output_20_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/output_20_1.png -------------------------------------------------------------------------------- /Cuadernos/output_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/output_6_0.png -------------------------------------------------------------------------------- /Imagenes/abstraccion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/abstraccion.jpeg -------------------------------------------------------------------------------- /Imagenes/autor_libro.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/autor_libro.jpeg -------------------------------------------------------------------------------- /Imagenes/enum_atrib.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/enum_atrib.jpeg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/GD_3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/GD_3D.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/bird.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/bird.jpg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/shell.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/shell.jpg -------------------------------------------------------------------------------- /Imagenes/No_es_entidad.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/No_es_entidad.jpeg -------------------------------------------------------------------------------- /Imagenes/atributos_e_r.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/atributos_e_r.jpeg -------------------------------------------------------------------------------- /Imagenes/atributos_e_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/atributos_e_r.png -------------------------------------------------------------------------------- /Imagenes/boxplots_region.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/boxplots_region.png -------------------------------------------------------------------------------- /Imagenes/instancias_n_m.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/instancias_n_m.jpeg -------------------------------------------------------------------------------- /Imagenes/muchos_muchos.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/muchos_muchos.jpeg -------------------------------------------------------------------------------- /Imagenes/recursiva_e_r.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/recursiva_e_r.jpeg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Padding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Padding.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/ladron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/ladron.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/maxpool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/maxpool.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/one_hot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/one_hot.png -------------------------------------------------------------------------------- /Imagenes/Relacion_uno_uno.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/Relacion_uno_uno.jpeg -------------------------------------------------------------------------------- /Imagenes/dependencia_e_r.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/dependencia_e_r.jpeg -------------------------------------------------------------------------------- /Imagenes/entidad_correcta.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/entidad_correcta.jpeg -------------------------------------------------------------------------------- /Imagenes/rel_ciudad_depto.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/rel_ciudad_depto.jpeg -------------------------------------------------------------------------------- /Imagenes/relacion_binaria.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/relacion_binaria.jpeg -------------------------------------------------------------------------------- /Imagenes/trajectory_plots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/trajectory_plots.png -------------------------------------------------------------------------------- /Codigos/Keras_api_funcional_resnet/readme: -------------------------------------------------------------------------------- 1 | Codigos para enteder la API funcional de Keras 2 | 3 | Códigos de Redes residuales Resnet 4 | -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-gate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-gate.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/cnn-mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/cnn-mnist.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/coastline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/coastline.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/dilation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/dilation.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/keras_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/keras_01.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/maxpool.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/maxpool.webp -------------------------------------------------------------------------------- /Cuadernos/Imagenes/mlp-mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/mlp-mnist.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/perceptron.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/perceptron.pdf -------------------------------------------------------------------------------- /Imagenes/Relación_recursiva.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/Relación_recursiva.jpeg -------------------------------------------------------------------------------- /Imagenes/Representacion_1_N.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/Representacion_1_N.jpeg -------------------------------------------------------------------------------- /Imagenes/convenio_notacion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/convenio_notacion.jpeg -------------------------------------------------------------------------------- /Imagenes/multiple_relacion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/multiple_relacion.jpeg -------------------------------------------------------------------------------- /Imagenes/planta_perosna_1_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/planta_perosna_1_1.jpeg -------------------------------------------------------------------------------- /Imagenes/uno_muchos_entidad.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/uno_muchos_entidad.jpeg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Data-Science.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Data-Science.jpg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/GAN_trainig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/GAN_trainig.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Gan_esquema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Gan_esquema.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-C-line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-C-line.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-chain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-chain.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/RNC_Tipica.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/RNC_Tipica.webp -------------------------------------------------------------------------------- /Cuadernos/Imagenes/auto_encod_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/auto_encod_2.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/latent_codes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/latent_codes.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/residual_NN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/residual_NN.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/time_series.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/time_series.png -------------------------------------------------------------------------------- /Imagenes/Relacion_entre_tablas.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/Relacion_entre_tablas.jpeg -------------------------------------------------------------------------------- /Imagenes/resol_muchos_muchos_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/resol_muchos_muchos_2.jpeg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/ANN_Capa_Oculta.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/ANN_Capa_Oculta.pdf -------------------------------------------------------------------------------- /Cuadernos/Imagenes/CNN_cats_dogs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/CNN_cats_dogs.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/GRU_Arquitect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/GRU_Arquitect.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-SimpleRNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-SimpleRNN.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-focus-C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-focus-C.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-focus-f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-focus-f.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-focus-i.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-focus-i.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM3-focus-o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM3-focus-o.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/MnistExamples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/MnistExamples.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/cnn-procedure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/cnn-procedure.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/filling_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/filling_space.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/mnist-samples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/mnist-samples.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/reshape_input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/reshape_input.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/text_generation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/text_generation.png -------------------------------------------------------------------------------- /Imagenes/Ecuación_cuadrática.svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/Ecuación_cuadrática.svg.png -------------------------------------------------------------------------------- /Imagenes/Grafica_relacion_binaria.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Imagenes/Grafica_relacion_binaria.jpeg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/ANN_mnist_748_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/ANN_mnist_748_10.jpg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/ANN_mnist_784_10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/ANN_mnist_784_10.pdf -------------------------------------------------------------------------------- /Cuadernos/Imagenes/LSTM2-notation_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/LSTM2-notation_1.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/convolution_compu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/convolution_compu.jpg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Convolution_discrete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Convolution_discrete.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Convolution_kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Convolution_kernel.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Data-Science-Process.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Data-Science-Process.jpg -------------------------------------------------------------------------------- /Cuadernos/Imagenes/GRU_Arquit_interna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/GRU_Arquit_interna.png -------------------------------------------------------------------------------- /Codigos/Autoencoders/readme: -------------------------------------------------------------------------------- 1 | Códigos para auto-encoders. Ejemplo Mnist 2 | Aplicaciones en limpieza de ruid en imágenes 3 | Coloración de imágenes blanco y negro 4 | -------------------------------------------------------------------------------- /Cuadernos/Imagenes/800px-Hilbert_curve.svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/800px-Hilbert_curve.svg.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Re_Neuronal_capa_oculta.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Re_Neuronal_capa_oculta.pdf -------------------------------------------------------------------------------- /Cuadernos/Imagenes/reparametrizacion_truco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/reparametrizacion_truco.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/text_generation_sampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/text_generation_sampling.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/text_generation_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/text_generation_training.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Convolution_multiple_kernels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Convolution_multiple_kernels.png -------------------------------------------------------------------------------- /Cuadernos/Imagenes/Analogy_of_Biological_Network_with_Artificial_Neural_Network.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AprendizajeProfundo/Ciencia-de-Datos/HEAD/Cuadernos/Imagenes/Analogy_of_Biological_Network_with_Artificial_Neural_Network.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ciencia de Datos 2 | Lugar donde subir, contribuir y compartir materiales diseñados para el aprendizaje profundo universal de la ciencia de datos. 3 | 4 | [Syllabus Ciencia de Datos](./Syllabus_Ciencia_de_datos.ipynb) 5 | 6 | Bases de datos relacionales[[nbviewer]](https://nbviewer.jupyter.org/github/AprendizajeProfundo/Ciencia-de-Datos/blob/master/Cuadernos/Bases_Datos_Relacionales.ipynb) [[HTML]](https://htmlpreview.github.io/?https://github.com/AprendizajeProfundo/Ciencia-de-Datos/blob/master/Cuadernos/Bases_Datos_Relacionales.html) 7 | 8 | Tensores y probabilidad [HTML](https://htmlpreview.github.io/?https://github.com/AprendizajeProfundo/Ciencia-de-Datos/blob/master/Cuadernos/Tensor_Distribucion_Prob.html) 9 | -------------------------------------------------------------------------------- /Codigos/append_to_path.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "/home/alvaro/modules\n", 13 | "/home/alvaro/anaconda3/envs/m_learning/lib/python37.zip\n", 14 | "/home/alvaro/anaconda3/envs/m_learning/lib/python3.7\n", 15 | "/home/alvaro/anaconda3/envs/m_learning/lib/python3.7/lib-dynload\n", 16 | "\n", 17 | "/home/alvaro/anaconda3/envs/m_learning/lib/python3.7/site-packages\n", 18 | "/home/alvaro/anaconda3/envs/m_learning/lib/python3.7/site-packages/IPython/extensions\n", 19 | "/home/alvaro/.ipython\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "import sys\n", 25 | "# to append an element to the path\n", 26 | "#sys.path.append('/home/alvaro/modules')\n", 27 | "# to remove an element from the path\n", 28 | "#sys.path.remove('/home/alvaro/Modules')\n", 29 | "\n", 30 | "# list the elements in path\n", 31 | "for p in sys.path:\n", 32 | " print(p)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 9, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [] 48 | } 49 | ], 50 | "metadata": { 51 | "kernelspec": { 52 | "display_name": "Python 3", 53 | "language": "python", 54 | "name": "python3" 55 | }, 56 | "language_info": { 57 | "codemirror_mode": { 58 | "name": "ipython", 59 | "version": 3 60 | }, 61 | "file_extension": ".py", 62 | "mimetype": "text/x-python", 63 | "name": "python", 64 | "nbconvert_exporter": "python", 65 | "pygments_lexer": "ipython3", 66 | "version": "3.7.4" 67 | } 68 | }, 69 | "nbformat": 4, 70 | "nbformat_minor": 4 71 | } 72 | -------------------------------------------------------------------------------- /Codigos/Keras_api_funcional_resnet/sampler-cifar10-2.1.0.py: -------------------------------------------------------------------------------- 1 | '''Demonstrates how to sample and plot CIFAR10 images 2 | using Keras API 3 | ''' 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | # numpy package 10 | import numpy as np 11 | import math 12 | 13 | # keras mnist module 14 | from keras.datasets import cifar10 15 | 16 | # for plotting 17 | import matplotlib.pyplot as plt 18 | 19 | 20 | # load dataset 21 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 22 | 23 | class_id = 0 24 | class_count = 0 25 | images = None 26 | for i in range(100): 27 | while True: 28 | index = np.random.randint(0, x_train.shape[0], size=1) 29 | image = x_train[index] 30 | if y_train[index] == class_id: 31 | break 32 | 33 | if images is None: 34 | images = image 35 | else: 36 | images = np.concatenate([images, image], axis=0) 37 | class_count += 1 38 | if class_count == 10: 39 | class_id += 1 40 | class_count = 0 41 | 42 | print(images.shape) 43 | 44 | plt.figure(figsize=(10, 10)) 45 | num_images = images.shape[0] 46 | image_size = images.shape[1] 47 | rows = int(math.sqrt(num_images)) 48 | row_names = ['{}'.format(row) for row in ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']] 49 | index = 0 50 | for i in range(num_images): 51 | ax = plt.subplot(rows, rows, i + 1) 52 | image = images[i, :, :, :] 53 | image = np.reshape(image, [image_size, image_size, 3]) 54 | plt.imshow(image) 55 | # plt.axis('off') 56 | ax.set_xticklabels([]) 57 | ax.set_yticklabels([]) 58 | ax.grid(False) 59 | ax.xaxis.set_ticks_position('none') 60 | ax.yaxis.set_ticks_position('none') 61 | if (i % rows) == 0: 62 | ax.set_ylabel(row_names[index], rotation=45, size='large') 63 | ax.yaxis.labelpad = 20 64 | print(row_names[index]) 65 | index += 1 66 | 67 | # plt.tight_layout() 68 | plt.savefig("cifar10-samples.png") 69 | plt.show() 70 | plt.close('all') 71 | -------------------------------------------------------------------------------- /Codigos/Keras_api_funcional_resnet/cnn-functional-2.1.1.py: -------------------------------------------------------------------------------- 1 | ''' Using Functional API to build CNN 2 | 3 | ~99.3% test accuracy 4 | ''' 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from tensorflow.keras.layers import Dense, Dropout, Input 12 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten 13 | from tensorflow.keras.models import Model 14 | from tensorflow.keras.datasets import mnist 15 | from tensorflow.keras.utils import to_categorical 16 | 17 | 18 | # load MNIST dataset 19 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 20 | 21 | # from sparse label to categorical 22 | num_labels = len(np.unique(y_train)) 23 | y_train = to_categorical(y_train) 24 | y_test = to_categorical(y_test) 25 | 26 | # reshape and normalize input images 27 | image_size = x_train.shape[1] 28 | x_train = np.reshape(x_train,[-1, image_size, image_size, 1]) 29 | x_test = np.reshape(x_test,[-1, image_size, image_size, 1]) 30 | x_train = x_train.astype('float32') / 255 31 | x_test = x_test.astype('float32') / 255 32 | 33 | # network parameters 34 | input_shape = (image_size, image_size, 1) 35 | batch_size = 128 36 | kernel_size = 3 37 | filters = 64 38 | dropout = 0.3 39 | 40 | # use functional API to build cnn layers 41 | inputs = Input(shape=input_shape) 42 | y = Conv2D(filters=filters, 43 | kernel_size=kernel_size, 44 | activation='relu')(inputs) 45 | y = MaxPooling2D()(y) 46 | y = Conv2D(filters=filters, 47 | kernel_size=kernel_size, 48 | activation='relu')(y) 49 | y = MaxPooling2D()(y) 50 | y = Conv2D(filters=filters, 51 | kernel_size=kernel_size, 52 | activation='relu')(y) 53 | # image to vector before connecting to dense layer 54 | y = Flatten()(y) 55 | # dropout regularization 56 | y = Dropout(dropout)(y) 57 | outputs = Dense(num_labels, activation='softmax')(y) 58 | 59 | # build the model by supplying inputs/outputs 60 | model = Model(inputs=inputs, outputs=outputs) 61 | # network model in text 62 | model.summary() 63 | 64 | # classifier loss, Adam optimizer, classifier accuracy 65 | model.compile(loss='categorical_crossentropy', 66 | optimizer='adam', 67 | metrics=['accuracy']) 68 | 69 | # train the model with input images and labels 70 | model.fit(x_train, 71 | y_train, 72 | validation_data=(x_test, y_test), 73 | epochs=20, 74 | batch_size=batch_size) 75 | 76 | # model accuracy on test dataset 77 | score = model.evaluate(x_test, 78 | y_test, 79 | batch_size=batch_size, 80 | verbose=0) 81 | print("\nTest accuracy: %.1f%%" % (100.0 * score[1])) 82 | -------------------------------------------------------------------------------- /Codigos/Keras_api_funcional_resnet/cnn-y-network-2.1.2.py: -------------------------------------------------------------------------------- 1 | '''Implements a Y-Network using Functional API 2 | 3 | ~99.3% test accuracy 4 | ''' 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | from tensorflow.keras.layers import Dense, Dropout, Input 13 | from tensorflow.keras.layers import Conv2D, MaxPooling2D 14 | from tensorflow.keras.layers import Flatten, concatenate 15 | from tensorflow.keras.models import Model 16 | from tensorflow.keras.datasets import mnist 17 | from tensorflow.keras.utils import to_categorical 18 | from tensorflow.keras.utils import plot_model 19 | 20 | # load MNIST dataset 21 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 22 | 23 | # from sparse label to categorical 24 | num_labels = len(np.unique(y_train)) 25 | y_train = to_categorical(y_train) 26 | y_test = to_categorical(y_test) 27 | 28 | # reshape and normalize input images 29 | image_size = x_train.shape[1] 30 | x_train = np.reshape(x_train,[-1, image_size, image_size, 1]) 31 | x_test = np.reshape(x_test,[-1, image_size, image_size, 1]) 32 | x_train = x_train.astype('float32') / 255 33 | x_test = x_test.astype('float32') / 255 34 | 35 | # network parameters 36 | input_shape = (image_size, image_size, 1) 37 | batch_size = 32 38 | kernel_size = 3 39 | dropout = 0.4 40 | n_filters = 32 41 | 42 | # left branch of Y network 43 | left_inputs = Input(shape=input_shape) 44 | x = left_inputs 45 | filters = n_filters 46 | # 3 layers of Conv2D-Dropout-MaxPooling2D 47 | # number of filters doubles after each layer (32-64-128) 48 | for i in range(3): 49 | x = Conv2D(filters=filters, 50 | kernel_size=kernel_size, 51 | padding='same', 52 | activation='relu')(x) 53 | x = Dropout(dropout)(x) 54 | x = MaxPooling2D()(x) 55 | filters *= 2 56 | 57 | # right branch of Y network 58 | right_inputs = Input(shape=input_shape) 59 | y = right_inputs 60 | filters = n_filters 61 | # 3 layers of Conv2D-Dropout-MaxPooling2D 62 | # number of filters doubles after each layer (32-64-128) 63 | for i in range(3): 64 | y = Conv2D(filters=filters, 65 | kernel_size=kernel_size, 66 | padding='same', 67 | activation='relu', 68 | dilation_rate=2)(y) 69 | y = Dropout(dropout)(y) 70 | y = MaxPooling2D()(y) 71 | filters *= 2 72 | 73 | # merge left and right branches outputs 74 | y = concatenate([x, y]) 75 | # feature maps to vector before connecting to Dense 76 | y = Flatten()(y) 77 | y = Dropout(dropout)(y) 78 | outputs = Dense(num_labels, activation='softmax')(y) 79 | 80 | # build the model in functional API 81 | model = Model([left_inputs, right_inputs], outputs) 82 | # verify the model using graph 83 | plot_model(model, to_file='cnn-y-network.png', show_shapes=True) 84 | # verify the model using layer text description 85 | model.summary() 86 | 87 | # classifier loss, Adam optimizer, classifier accuracy 88 | model.compile(loss='categorical_crossentropy', 89 | optimizer='adam', 90 | metrics=['accuracy']) 91 | 92 | # train the model with input images and labels 93 | model.fit([x_train, x_train], 94 | y_train, 95 | validation_data=([x_test, x_test], y_test), 96 | epochs=20, 97 | batch_size=batch_size) 98 | 99 | # model accuracy on test dataset 100 | score = model.evaluate([x_test, x_test], 101 | y_test, 102 | batch_size=batch_size, 103 | verbose=0) 104 | print("\nTest accuracy: %.1f%%" % (100.0 * score[1])) 105 | -------------------------------------------------------------------------------- /Codigos/optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as onp 2 | 3 | class optim(): 4 | ### learning rate schedules 5 | def make_schedule(self,scalar_or_schedule): 6 | #if callable(scalar_or_schedule): 7 | if scalar_or_schedule == 'exponential_decay': 8 | return self.exponential_decay 9 | elif scalar_or_schedule =='inverse_time_decay': 10 | return self.inverse_time_decay 11 | elif scalar_or_schedule =='polynomial_decay': 12 | return self.polynomial_decay 13 | elif scalar_or_schedule =='piecewise_constant': 14 | return self.piecewise_constant 15 | # 16 | elif onp.ndim(scalar_or_schedule) == 0: 17 | return self.constant(scalar_or_schedule) 18 | else: 19 | raise TypeError(type(scalar_or_schedule)) 20 | 21 | def constant(self,step_size): 22 | def schedule(i): 23 | return step_size 24 | return schedule 25 | 26 | def exponential_decay(self,step_size, decay_steps, decay_rate): 27 | def schedule(i): 28 | return step_size * decay_rate ** (i / decay_steps) 29 | return schedule 30 | 31 | def inverse_time_decay(self,step_size, decay_steps, decay_rate, staircase=False): 32 | if staircase: 33 | def schedule(i): 34 | return step_size / (1 + decay_rate * onp.floor(i / decay_steps)) 35 | else: 36 | def schedule(i): 37 | return step_size / (1 + decay_rate * i / decay_steps) 38 | return schedule 39 | 40 | 41 | def polynomial_decay(self, step_size, decay_steps, final_step_size, power=1.0): 42 | def schedule(step_num): 43 | step_num = onp.minimum(step_num, decay_steps) 44 | step_mult = (1 - step_num / decay_steps) ** power 45 | return step_mult * (step_size - final_step_size) + final_step_size 46 | return schedule 47 | 48 | 49 | def piecewise_constant(self, boundaries, values): 50 | boundaries = onp.array(boundaries) 51 | values = onp.array(values) 52 | if not boundaries.ndim == values.ndim == 1: 53 | raise ValueError("boundaries and values must be sequences") 54 | if not boundaries.shape[0] == values.shape[0] - 1: 55 | raise ValueError("boundaries length must be one longer than values length") 56 | 57 | def schedule(i): 58 | return values[onp.sum(i > boundaries)] 59 | return schedule 60 | 61 | 62 | ## main methods of the class 63 | def __init__(self,initial_param, step_size = 1): 64 | self.param = initial_param 65 | self.step_size = self.make_schedule(step_size) 66 | self.state = [self.param, self.step_size] 67 | 68 | def set_params(self): 69 | # to be overloaded 70 | pass 71 | 72 | def update(self,i, gradient): 73 | # to be overload 74 | pass 75 | 76 | def get_params(self): 77 | return self.param 78 | 79 | def get_state(self): 80 | #to be overload 81 | return self.state 82 | 83 | 84 | 85 | 86 | 87 | 88 | # end class optimizer 89 | 90 | 91 | 92 | 93 | class adam(optim): 94 | def __init__(self,initial_param,alpha=0.001): 95 | optim.__init__(self,initial_param, alpha) 96 | self.alpha = self.step_size 97 | self.beta_1 = 0.9 98 | self.beta_2 = 0.999 99 | self.eps =1.0e-8 100 | #self.param = initial_param 101 | self.m = onp.zeros_like(initial_param) 102 | self.v = onp.zeros_like(initial_param) 103 | 104 | def set_params(self, alpha=0.001,beta_1=0.9, beta_2=0.999, eps =1.0e-8): 105 | self.alpha = alpha 106 | self.beta_1 = beta_1 107 | self.beta_2 = beta_2 108 | self.eps = eps 109 | 110 | def update(self,i, gradient): 111 | self.m = (1 - self.beta_1) * gradient + self.beta_1 * self.m # First moment estimate. 112 | self.v = (1 - self.beta_2) * (gradient ** 2) + self.beta_2 * self.v # Second moment estimate. 113 | mhat = self.m / (1 - self.beta_1 ** (i + 1)) # Bias correction. 114 | vhat = self.v / (1 - self.beta_2 ** (i + 1)) 115 | self.param = self.param - self.alpha * mhat / (onp.sqrt(vhat) + self.eps) 116 | 117 | 118 | def get_state(self): 119 | self.state = [self.param,self.m,self.v, self.step_size] 120 | return self.state 121 | 122 | # end class adam -------------------------------------------------------------------------------- /Codigos/Autoencoders/autoencoder-mnist-3.2.1.py: -------------------------------------------------------------------------------- 1 | '''Example of autoencoder model on MNIST dataset 2 | 3 | This autoencoder has modular design. The encoder, decoder and autoencoder 4 | are 3 models that share weights. For example, after training the 5 | autoencoder, the encoder can be used to generate latent vectors 6 | of input data for low-dim visualization like PCA or TSNE. 7 | ''' 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | from tensorflow.keras.layers import Dense, Input 14 | from tensorflow.keras.layers import Conv2D, Flatten 15 | from tensorflow.keras.layers import Reshape, Conv2DTranspose 16 | from tensorflow.keras.models import Model 17 | from tensorflow.keras.datasets import mnist 18 | from tensorflow.keras.utils import plot_model 19 | from tensorflow.keras import backend as K 20 | 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | 24 | # load MNIST dataset 25 | (x_train, _), (x_test, _) = mnist.load_data() 26 | 27 | # reshape to (28, 28, 1) and normalize input images 28 | image_size = x_train.shape[1] 29 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) 30 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) 31 | x_train = x_train.astype('float32') / 255 32 | x_test = x_test.astype('float32') / 255 33 | 34 | # network parameters 35 | input_shape = (image_size, image_size, 1) 36 | batch_size = 32 37 | kernel_size = 3 38 | latent_dim = 16 39 | # encoder/decoder number of CNN layers and filters per layer 40 | layer_filters = [32, 64] 41 | 42 | # build the autoencoder model 43 | # first build the encoder model 44 | inputs = Input(shape=input_shape, name='encoder_input') 45 | x = inputs 46 | # stack of Conv2D(32)-Conv2D(64) 47 | for filters in layer_filters: 48 | x = Conv2D(filters=filters, 49 | kernel_size=kernel_size, 50 | activation='relu', 51 | strides=2, 52 | padding='same')(x) 53 | 54 | # shape info needed to build decoder model 55 | # so we don't do hand computation 56 | # the input to the decoder's first 57 | # Conv2DTranspose will have this shape 58 | # shape is (7, 7, 64) which is processed by 59 | # the decoder back to (28, 28, 1) 60 | shape = K.int_shape(x) 61 | 62 | # generate latent vector 63 | x = Flatten()(x) 64 | latent = Dense(latent_dim, name='latent_vector')(x) 65 | 66 | # instantiate encoder model 67 | encoder = Model(inputs, 68 | latent, 69 | name='encoder') 70 | encoder.summary() 71 | plot_model(encoder, 72 | to_file='encoder.png', 73 | show_shapes=True) 74 | 75 | # build the decoder model 76 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input') 77 | # use the shape (7, 7, 64) that was earlier saved 78 | x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) 79 | # from vector to suitable shape for transposed conv 80 | x = Reshape((shape[1], shape[2], shape[3]))(x) 81 | 82 | # stack of Conv2DTranspose(64)-Conv2DTranspose(32) 83 | for filters in layer_filters[::-1]: 84 | x = Conv2DTranspose(filters=filters, 85 | kernel_size=kernel_size, 86 | activation='relu', 87 | strides=2, 88 | padding='same')(x) 89 | 90 | # reconstruct the input 91 | outputs = Conv2DTranspose(filters=1, 92 | kernel_size=kernel_size, 93 | activation='sigmoid', 94 | padding='same', 95 | name='decoder_output')(x) 96 | 97 | # instantiate decoder model 98 | decoder = Model(latent_inputs, outputs, name='decoder') 99 | decoder.summary() 100 | plot_model(decoder, to_file='decoder.png', show_shapes=True) 101 | 102 | # autoencoder = encoder + decoder 103 | # instantiate autoencoder model 104 | autoencoder = Model(inputs, 105 | decoder(encoder(inputs)), 106 | name='autoencoder') 107 | autoencoder.summary() 108 | plot_model(autoencoder, 109 | to_file='autoencoder.png', 110 | show_shapes=True) 111 | 112 | # Mean Square Error (MSE) loss function, Adam optimizer 113 | autoencoder.compile(loss='mse', optimizer='adam') 114 | 115 | # train the autoencoder 116 | autoencoder.fit(x_train, 117 | x_train, 118 | validation_data=(x_test, x_test), 119 | epochs=1, 120 | batch_size=batch_size) 121 | 122 | # predict the autoencoder output from test data 123 | x_decoded = autoencoder.predict(x_test) 124 | 125 | # display the 1st 8 test input and decoded images 126 | imgs = np.concatenate([x_test[:8], x_decoded[:8]]) 127 | imgs = imgs.reshape((4, 4, image_size, image_size)) 128 | imgs = np.vstack([np.hstack(i) for i in imgs]) 129 | plt.figure() 130 | plt.axis('off') 131 | plt.title('Input: 1st 2 rows, Decoded: last 2 rows') 132 | plt.imshow(imgs, interpolation='none', cmap='gray') 133 | plt.savefig('input_and_decoded.png') 134 | plt.show() 135 | -------------------------------------------------------------------------------- /Codigos/Autoencoders/classifier-autoencoder-mnist-3.3.1.py: -------------------------------------------------------------------------------- 1 | ''' Autoencoder with Classifier 2 | ''' 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | import numpy as np 8 | import keras 9 | from keras.layers import Activation, Dense, Dropout, Input, BatchNormalization 10 | from keras.layers import Conv2D, MaxPooling2D, Flatten 11 | from keras.layers import Reshape, Conv2DTranspose, UpSampling2D 12 | from keras.models import Model 13 | from keras.datasets import mnist 14 | from keras.utils import to_categorical 15 | from keras.utils import plot_model 16 | from keras.callbacks import TensorBoard 17 | from keras import backend as K 18 | 19 | import math 20 | import matplotlib.pyplot as plt 21 | 22 | # MNIST dataset 23 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 24 | 25 | num_labels = np.amax(y_train) + 1 26 | y_train = to_categorical(y_train) 27 | y_test = to_categorical(y_test) 28 | 29 | image_size = x_train.shape[1] 30 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) 31 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) 32 | x_train = x_train.astype('float32') / 255 33 | x_test = x_test.astype('float32') / 255 34 | 35 | # Network parameters 36 | input_shape = (image_size, image_size, 1) 37 | batch_size = 128 38 | kernel_size = 3 39 | pool_size = 2 40 | dropout = 0.4 41 | filters = 16 42 | latent_dim = 16 43 | 44 | # Build the autoencoder model 45 | # First build the encoder model 46 | inputs = Input(shape=input_shape, name='encoder_input') 47 | x = inputs 48 | # Stack of BN-ReLU-Conv2D-MaxPooling blocks 49 | for i in range(2): 50 | x = BatchNormalization()(x) 51 | x = Activation('relu')(x) 52 | filters = filters * 2 53 | x = Conv2D(filters=filters, kernel_size=kernel_size, 54 | padding='same')(x) 55 | x = MaxPooling2D()(x) 56 | 57 | # Shape info needed to build decoder model 58 | shape = x.shape.as_list() 59 | 60 | # Generate a 16-dim latent vector 61 | x = Flatten()(x) 62 | latent = Dense(latent_dim, name='latent_vector')(x) 63 | 64 | # Instantiate encoder model 65 | encoder = Model(inputs, latent, name='encoder') 66 | encoder.summary() 67 | plot_model(encoder, to_file='classifier-encoder.png', show_shapes=True) 68 | 69 | # Build the Decoder model 70 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input') 71 | x = Dense(shape[1]*shape[2]*shape[3])(latent_inputs) 72 | x = Reshape((shape[1], shape[2], shape[3]))(x) 73 | 74 | # Stack of BN-ReLU-Transposed Conv2D-UpSampling blocks 75 | for i in range(2): 76 | x = BatchNormalization()(x) 77 | x = Activation('relu')(x) 78 | x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, 79 | padding='same')(x) 80 | x = UpSampling2D()(x) 81 | filters = int(filters / 2) 82 | 83 | x = Conv2DTranspose(filters=1, kernel_size=kernel_size, 84 | padding='same')(x) 85 | 86 | outputs = Activation('sigmoid', name='decoder_output')(x) 87 | 88 | # Instantiate Decoder model 89 | decoder = Model(latent_inputs, outputs, name='decoder') 90 | decoder.summary() 91 | plot_model(decoder, to_file='classifier-decoder.png', show_shapes=True) 92 | 93 | # Classifier Model 94 | latent_inputs = Input(shape=(latent_dim,), name='classifier_input') 95 | x = Dense(512)(latent_inputs) 96 | x = Activation('relu')(x) 97 | x = Dropout(0.4)(x) 98 | x = Dense(256)(x) 99 | x = Activation('relu')(x) 100 | x = Dropout(0.4)(x) 101 | x = Dense(num_labels)(x) 102 | classifier_outputs = Activation('softmax', name='classifier_output')(x) 103 | classifier = Model(latent_inputs, classifier_outputs, name='classifier') 104 | classifier.summary() 105 | plot_model(classifier, to_file='classifier.png', show_shapes=True) 106 | 107 | # Autoencoder = Encoder + Classifier/Decoder 108 | # Instantiate autoencoder model 109 | autoencoder = Model(inputs, 110 | [classifier(encoder(inputs)), decoder(encoder(inputs))], 111 | name='autodecoder') 112 | autoencoder.summary() 113 | plot_model(autoencoder, to_file='classifier-autoencoder.png', show_shapes=True) 114 | 115 | # Mean Square Error (MSE) loss function, Adam optimizer 116 | autoencoder.compile(loss=['categorical_crossentropy', 'mse'], 117 | optimizer='adam', 118 | metrics=['accuracy', 'mse']) 119 | 120 | # Train the autoencoder for 1 epoch 121 | autoencoder.fit(x_train, [y_train, x_train], 122 | validation_data=(x_test, [y_test, x_test]), 123 | epochs=2, batch_size=batch_size, 124 | callbacks=[TensorBoard(log_dir='/tmp/autoencoder')]) 125 | 126 | # Predict the Autoencoder output from test data 127 | y_predicted, x_decoded = autoencoder.predict(x_test) 128 | print(np.argmax(y_predicted[:8], axis=1)) 129 | 130 | # Display the 1st 8 input and decoded images 131 | imgs = np.concatenate([x_test[:8], x_decoded[:8]]) 132 | imgs = imgs.reshape((4, 4, image_size, image_size)) 133 | imgs = np.vstack([np.hstack(i) for i in imgs]) 134 | plt.figure() 135 | plt.axis('off') 136 | plt.title('Input: 1st 2 rows, Decoded: last 2 rows') 137 | plt.imshow(imgs, interpolation='none', cmap='gray') 138 | plt.savefig('input_and_decoded.png') 139 | plt.show() 140 | 141 | # latent = encoder.predict(x_test) 142 | # print("Variance:", K.var(latent)) 143 | -------------------------------------------------------------------------------- /Codigos/Autoencoders/denoising-autoencoder-mnist-3.3.1.py: -------------------------------------------------------------------------------- 1 | '''Trains a denoising autoencoder on MNIST dataset. 2 | 3 | Denoising is one of the classic applications of autoencoders. 4 | The denoising process removes unwanted noise that corrupted the 5 | true data. 6 | 7 | Noise + Data ---> Denoising Autoencoder ---> Data 8 | 9 | Given a training dataset of corrupted data as input and 10 | true data as output, a denoising autoencoder can recover the 11 | hidden structure to generate clean data. 12 | 13 | This example has modular design. The encoder, decoder and autoencoder 14 | are 3 models that share weights. For example, after training the 15 | autoencoder, the encoder can be used to generate latent vectors 16 | of input data for low-dim visualization like PCA or TSNE. 17 | ''' 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | from tensorflow.keras.layers import Dense, Input 24 | from tensorflow.keras.layers import Conv2D, Flatten 25 | from tensorflow.keras.layers import Reshape, Conv2DTranspose 26 | from tensorflow.keras.models import Model 27 | from tensorflow.keras import backend as K 28 | from tensorflow.keras.datasets import mnist 29 | import numpy as np 30 | import matplotlib.pyplot as plt 31 | from PIL import Image 32 | 33 | np.random.seed(1337) 34 | 35 | # load MNIST dataset 36 | (x_train, _), (x_test, _) = mnist.load_data() 37 | 38 | # reshape to (28, 28, 1) and normalize input images 39 | image_size = x_train.shape[1] 40 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) 41 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) 42 | x_train = x_train.astype('float32') / 255 43 | x_test = x_test.astype('float32') / 255 44 | 45 | # generate corrupted MNIST images by adding noise with normal dist 46 | # centered at 0.5 and std=0.5 47 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape) 48 | x_train_noisy = x_train + noise 49 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape) 50 | x_test_noisy = x_test + noise 51 | 52 | # adding noise may exceed normalized pixel values>1.0 or <0.0 53 | # clip pixel values >1.0 to 1.0 and <0.0 to 0.0 54 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 55 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 56 | 57 | # network parameters 58 | input_shape = (image_size, image_size, 1) 59 | batch_size = 32 60 | kernel_size = 3 61 | latent_dim = 16 62 | # encoder/decoder number of CNN layers and filters per layer 63 | layer_filters = [32, 64] 64 | 65 | # build the autoencoder model 66 | # first build the encoder model 67 | inputs = Input(shape=input_shape, name='encoder_input') 68 | x = inputs 69 | 70 | # stack of Conv2D(32)-Conv2D(64) 71 | for filters in layer_filters: 72 | x = Conv2D(filters=filters, 73 | kernel_size=kernel_size, 74 | strides=2, 75 | activation='relu', 76 | padding='same')(x) 77 | 78 | # shape info needed to build decoder model so we don't do hand computation 79 | # the input to the decoder's first Conv2DTranspose will have this shape 80 | # shape is (7, 7, 64) which can be processed by the decoder back to (28, 28, 1) 81 | shape = K.int_shape(x) 82 | 83 | # generate the latent vector 84 | x = Flatten()(x) 85 | latent = Dense(latent_dim, name='latent_vector')(x) 86 | 87 | # instantiate encoder model 88 | encoder = Model(inputs, latent, name='encoder') 89 | encoder.summary() 90 | 91 | # build the decoder model 92 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input') 93 | # use the shape (7, 7, 64) that was earlier saved 94 | x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) 95 | # from vector to suitable shape for transposed conv 96 | x = Reshape((shape[1], shape[2], shape[3]))(x) 97 | 98 | # stack of Conv2DTranspose(64)-Conv2DTranspose(32) 99 | for filters in layer_filters[::-1]: 100 | x = Conv2DTranspose(filters=filters, 101 | kernel_size=kernel_size, 102 | strides=2, 103 | activation='relu', 104 | padding='same')(x) 105 | 106 | # reconstruct the denoised input 107 | outputs = Conv2DTranspose(filters=1, 108 | kernel_size=kernel_size, 109 | padding='same', 110 | activation='sigmoid', 111 | name='decoder_output')(x) 112 | 113 | # instantiate decoder model 114 | decoder = Model(latent_inputs, outputs, name='decoder') 115 | decoder.summary() 116 | 117 | # autoencoder = encoder + decoder 118 | # instantiate autoencoder model 119 | autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') 120 | autoencoder.summary() 121 | 122 | # Mean Square Error (MSE) loss function, Adam optimizer 123 | autoencoder.compile(loss='mse', optimizer='adam') 124 | 125 | # train the autoencoder 126 | autoencoder.fit(x_train_noisy, 127 | x_train, 128 | validation_data=(x_test_noisy, x_test), 129 | epochs=10, 130 | batch_size=batch_size) 131 | 132 | # predict the autoencoder output from corrupted test images 133 | x_decoded = autoencoder.predict(x_test_noisy) 134 | 135 | # 3 sets of images with 9 MNIST digits 136 | # 1st rows - original images 137 | # 2nd rows - images corrupted by noise 138 | # 3rd rows - denoised images 139 | rows, cols = 3, 9 140 | num = rows * cols 141 | imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]]) 142 | imgs = imgs.reshape((rows * 3, cols, image_size, image_size)) 143 | imgs = np.vstack(np.split(imgs, rows, axis=1)) 144 | imgs = imgs.reshape((rows * 3, -1, image_size, image_size)) 145 | imgs = np.vstack([np.hstack(i) for i in imgs]) 146 | imgs = (imgs * 255).astype(np.uint8) 147 | plt.figure() 148 | plt.axis('off') 149 | plt.title('Original images: top rows, ' 150 | 'Corrupted Input: middle rows, ' 151 | 'Denoised Input: third rows') 152 | plt.imshow(imgs, interpolation='none', cmap='gray') 153 | Image.fromarray(imgs).save('corrupted_and_denoised.png') 154 | plt.show() 155 | -------------------------------------------------------------------------------- /Cuadernos/Hello_World_ML.md: -------------------------------------------------------------------------------- 1 | 2 | # Tabla de Contenidos 3 | 4 | [Construyendo Red](#construye_red) 5 | 6 | # **Importando Librerías Necesarias** 7 | 8 | 9 | ```python 10 | import tensorflow as tf 11 | import numpy as np 12 | import pandas as pd 13 | import matplotlib.pyplot as plt 14 | from tensorflow import keras 15 | ``` 16 | 17 | # **Simulando Datos** 18 | 19 | 20 | ```python 21 | np.random.seed(23) 22 | # Crear Eje x 23 | x=np.linspace(-10,10,100) 24 | #Crear Observaciones 25 | y=2*x+3*np.random.normal(0,1,size=len(x)) 26 | 27 | #Crear DataFrame 28 | datos=pd.DataFrame(np.vstack((x,y)).T, columns=['x','y']) 29 | #Ver datos 30 | datos 31 | ``` 32 | 33 |
34 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 |
xy
0-10.000000-17.999036
1-9.797980-19.518520
2-9.595960-21.524777
3-9.393939-15.941977
4-9.191919-16.278823
.........
959.19191918.960197
969.39393913.618120
979.59596020.575697
989.79798016.339859
9910.00000025.470135
113 |

100 rows × 2 columns

114 |
115 | 116 | 117 | 118 | # **Visualizar Datos** 119 | 120 | 121 | ```python 122 | plt.figure(figsize=(15,8)) 123 | plt.title("Visualización de Datos",fontsize=25) 124 | plt.plot(x,y,'o') 125 | plt.legend(["Datos"],fontsize=15) 126 | plt.xlabel("$x$",fontsize=15) 127 | plt.ylabel("$y$",fontsize=15) 128 | plt.grid() 129 | plt.show() 130 | ``` 131 | 132 | 133 | ![png](output_6_0.png) 134 | 135 | 136 | 137 | # **Construyendo Red Neuronal (Modelamiento/Diseño)** 138 | [Contenido](#TOC) 139 | 140 | 141 | ```python 142 | # Crear Modelo (Diseño/Arquitectura) 143 | 144 | model = keras.Sequential([ 145 | keras.layers.Dense(units=1,input_shape=(1,)) 146 | ]) 147 | ``` 148 | 149 | # **Compilando Red Neuronal** 150 | 151 | 152 | ```python 153 | # Compilar Modelo 154 | 155 | model.compile(optimizer='sgd', loss='mean_squared_error') 156 | ``` 157 | 158 | # **Mirando el Resumen de la Red Neuronal** 159 | 160 | 161 | ```python 162 | # Resumen para tener control de los parámetros a "entrenar" 163 | model.summary() 164 | ``` 165 | 166 | Model: "sequential_3" 167 | _________________________________________________________________ 168 | Layer (type) Output Shape Param # 169 | ================================================================= 170 | dense_3 (Dense) (None, 1) 2 171 | ================================================================= 172 | Total params: 2 173 | Trainable params: 2 174 | Non-trainable params: 0 175 | _________________________________________________________________ 176 | 177 | 178 | # **Entrenando Red Neuronal (Fancy para "Hacer la Regresión")** 179 | 180 | 181 | ```python 182 | model.fit(x, y, epochs=100,verbose=0) 183 | ``` 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | # **Visualizar Resultados de la Red** 193 | 194 | 195 | ```python 196 | #Obtener Pesos de la Red 197 | w=model.get_weights() 198 | 199 | print("\nObjeto Pesos:",w) 200 | print('\nNumber of Weights -> '+ str(len(w))) 201 | print('\nw1 = ' + str(w[0][0]) +'(Weight)') 202 | print('w0 = ' + str(w[1])+'("Weight"->Bias)') 203 | print('\nThe Model is: y = w1*x+w0:') 204 | print('\ny = {}*x+{}'.format(w[0].item(),w[1].item())) 205 | 206 | # Crear Modelo a las Malas 207 | x = np.linspace(x[0],x[-1],100) 208 | # Anonymous Function 209 | reg = lambda x: np.transpose(w[0]*x+w[1]) 210 | ``` 211 | 212 | 213 | Objeto Pesos: [array([[1.967447]], dtype=float32), array([0.36357442], dtype=float32)] 214 | 215 | Number of Weights -> 2 216 | 217 | w1 = [1.967447](Weight) 218 | w0 = [0.36357442]("Weight"->Bias) 219 | 220 | The Model is: y = w1*x+w0: 221 | 222 | y = 1.96744704246521*x+0.36357441544532776 223 | 224 | 225 | # **Predecir valores DENTRO de los datos (Ver Regresión)** 226 | 227 | 228 | ```python 229 | plt.figure(figsize=(15,8)) 230 | plt.plot(x,reg(x),'r-',label='Modelo: y={:.2f}x{:.2f}'.format(w[0].item(),w[1].item())) 231 | plt.plot(x,y,'o', label='Datos') 232 | plt.title('Plotting Model vs Data',fontsize=20) 233 | plt.xlabel("$x$",fontsize=15) 234 | plt.ylabel("$y$",fontsize=15) 235 | plt.legend(loc=0,fontsize=15) 236 | plt.grid() 237 | plt.show() 238 | ``` 239 | 240 | 241 | ![png](output_18_0.png) 242 | 243 | 244 | # **Predecir valores FUERA de los datos (Usar Modelo)** 245 | 246 | 247 | ```python 248 | # Predicción 249 | 250 | #Punto fuera de los datos de entrenamiento (MUY IMPORTANTE) 251 | x_test=[11.0] 252 | 253 | # Predecir valor usando el Modelo generado del entrenamiento 254 | pred = model.predict(x_test) 255 | print(' La Predicción en x = 11 es '+str(pred)) 256 | 257 | # Aumentar Resolución del Modelo Fuera de los Datos 258 | x_model=np.linspace(-12,12,100) 259 | 260 | 261 | plt.figure(figsize=(15,8)) 262 | plt.plot(x_model,reg(x_model),'r-',label='Modelo') 263 | plt.plot(x,y,'o', label='Datos') 264 | plt.plot(x_test,pred,'ko', label='Predicción') 265 | plt.title('Prediciendo el Futuro',fontsize=20) 266 | plt.legend(loc=0,fontsize=15) 267 | plt.grid() 268 | plt.show() 269 | ``` 270 | 271 | La Predicción en x = 11 es [[22.005493]] 272 | 273 | 274 | 275 | ![png](output_20_1.png) 276 | 277 | -------------------------------------------------------------------------------- /Syllabus_Ciencia_de_datos.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Curso de Ciencia de Datos

\n", 8 | "\n", 9 | "

Autor

\n", 10 | "\n", 11 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 12 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n", 13 | "\n", 14 | "

Fork

\n", 15 | "\n", 16 | "

Referencias

\n", 17 | "\n", 18 | "1. Christofer. M. Bishop, *Pattern Recognition and machine Learning*, first edition,Springer, 2006\n", 19 | "2. Solomon Kullback, *Information Theory and Statistics*, Dover Publications, Inc, 1968\n", 20 | "3. Robert B. Ash, *Information Theory*, Dover Publications, Inc, 1990.\n", 21 | "4. James V Stone, [Information Theory: A Tutorial Introduction](https://arxiv.org/pdf/1802.05968.pdf)\n", 22 | "\n", 23 | "\n", 24 | "

Lenguajes de Programación

\n", 25 | "\n", 26 | "Python, R, Tensorflow, sobre JupyterLab." 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "

Evaluación

\n", 34 | "\n", 35 | "1. Un proyecto que será desarrollado por máximo dos personas a lo largo del curso. Total 80%\n", 36 | "3. Una exposición final sobre el proyecto. Total 20%." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "

Contenido del curso

\n", 44 | "\n", 45 | "

Preliminares

\n", 46 | "\n", 47 | "1. [Teoría de la Información.](./Cuadernos/Teoria_Informacion.ipynb)\n", 48 | "2. [Diferenciación automática](./Cuadernos/JAX_AutoDiff.ipynb), [Diferenciación automática usando Tensorflow 2.1](./Cuadernos/AutoDiff_tf_2.ipynb)\n", 49 | "3. [Métodos de Optimización estocástica modernos](./Cuadernos/Optimizacion_M_Aprendizaje.ipynb)\n", 50 | "4. [Tensores y Algebra de Tensores](./Cuadernos/Intro_tensores.ipynb), [Introducción a Numpy](./Cuadernos/Intro_Numpy.ipynb)\n", 51 | "\n", 52 | "

Conceptos de dimensión

\n", 53 | "\n", 54 | "1. El curso de la dimensionalidad.\n", 55 | "2. Distribuciones de probabilidad en dimensiones grandes.\n", 56 | "3. Sobre transformaciones topológicas. Concepto de sumergimiento (embedding) en dimensiones más altas.\n", 57 | "3. Dimensión intrínsica de un conjunto de datos.\n", 58 | "4. Codificación y decodificación. Pérdida de información.\n", 59 | "5. Reducción de datos normales. Análisis de componentes Principales (ACP)\n", 60 | "6. PCA probabilístico.\n", 61 | "7. Reducción de datos no normales. Análisis de componentes Independientes (ICA)\n", 62 | "8. Isomap.\n", 63 | "\n", 64 | "

Visualización de datos de altas dimensiones

\n", 65 | "\n", 66 | "1. Planos Factoriales\n", 67 | "2. Escalamiento Multidimensional (MDS)\n", 68 | "3. Mapas Auto Organizados. (SOM)\n", 69 | "\n", 70 | "

Modelos de Regresión

\n", 71 | "\n", 72 | "1. [Modelos lineales de regresión](./Cuadernos/Regresion_Lineal_tf_2.ipynb), [Regresión Lineal Múltiple](./Cuadernos/Regresion_Lineal_Multiple_tf_2.ipynb)\n", 73 | "2. Estimación secuencial en modelos lineales de regresión\n", 74 | "3. Regularización\n", 75 | "4. Modelos lineales Bayesianos\n", 76 | "5. Modelos lineales Generalizados\n", 77 | "6. Modelos Gamlss\n", 78 | "\n", 79 | "

Modelos de Clasificación

\n", 80 | "\n", 81 | "1. [Modelos Lineales de clasificación](./Cuadernos/Logistic_Reg_AutoDiff.ipynb), [Ejemplo: Red Neuronal XOR](./Cuadernos/Autodif_XOR_func.ipynb), [Regresión Logística con Tensorflow](./Cuadernos/Logistic_Reg_tf2.ipynb)\n", 82 | "2. Métodos Kernel\n", 83 | "\n", 84 | "

Aprendizaje no supervisado

\n", 85 | "\n", 86 | "1. Reglas de asociación\n", 87 | "2. Análisis de clusters\n", 88 | "3. Mapas auto-organizados\n", 89 | "\n", 90 | "

Aprendizaje supervisado

\n", 91 | "\n", 92 | "1. Máquinas de soporte vectorial (SVM)\n", 93 | "2. [Redes Neuronales](./Cuadernos/Intro_RNA.ipynb), [Red Neuronal desde cero usando keras](./Cuadernos/RNA_desde_cero_intro_keras.ipynb)\n", 94 | "3. Aprendizaje profundo\n", 95 | "4. [Redes convolucionadas](./Cuadernos/Convolucion_Redes.ipynb),[Visualizacion de filtros e imagenes filtradas](./Cuadernos/Convolucion_Redes_pretraining.ipynb), [Reconocimiento de digitos](./Cuadernos/Convolution_mnist.ipynb),[Reconocimiento de imagenes](Convolucion_mnist_fashion.ipynb)\n", 96 | "5. [Redes recurrentes LSTM](./Cuadernos/Intro_LSTM.ipynb), [Modelamiento de Series de tiempo](./Cuadernos/Times_series_dummy_intro.ipynb),\n", 97 | "[Predicción Valor acciones](./Cuadernos/Stock_Prices_Prediction.ipynb), [Predicción Apple](./Cuadernos/Stock_Prices_Apple.ipynb), [Series de Tiempo Multivariadas](./Cuadernos/Times_series_Multivariate.ipynb)\n", 98 | "6. [Redes recurrentes GRU](./Cuadernos/Intro_GRU.ipynb), [Predicción de texto 1](./Cuadernos/Char_generator_RNN.ipynb)\n", 99 | "6. [Autocodificadores Variacionales](./Cuadernos/VAI_Introduction.ipynb)\n", 100 | "7. [Introducción a Keras.sequential](./Cuadernos/Intro_Keras_Sequential.ipynb)\n", 101 | "8. [Introducción a la API funcional de Keras](./Cuadernos/Intro_Keras_Functional.ipynb)\n", 102 | "9. [Redes ResNet](./Cuadernos/ResNet.ipynb)\n", 103 | "10. [Redes neuronales generativas. Redes adversarias](./Cuadernos/GAN.ipynb)\n", 104 | "11. Aprendizaje reforzado\n", 105 | "12. [Clasificación usando Lenguaje Natural](./Cuadernos/Simpons_3.ipynb)\n", 106 | "13. [Autocodificadores](./Cuadernos/Auto_Encoders.ipynb), [Autocodficador-clasificador](./Cuadernos/Auto_Encoder_clasifier.ipynb)\n", 107 | "\n", 108 | "

Introducción a Inteligencia Artificial

\n", 109 | "\n", 110 | "1. Agentes, ambientes, tareas\n", 111 | "2. Resolución de problemas\n", 112 | "3. Resolución de problemas bajo incertidumbre\n", 113 | "4. Aprendizaje" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "

Inteligencia Artificial hoy

\n", 121 | "\n", 122 | "\n", 123 | "Los agentes de IA entrenados por OpenAI jugaron 481 millones de juegos de escondite, muestran algunos comportamientos sorprendentes.\n", 124 | "\n", 125 | "[Gym Open AI](https://gym.openai.com/)\n", 126 | "\n", 127 | "[Artículo y video](https://openai.com/blog/emergent-tool-use/)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "[Desarrollos Aterradores de la IA](https://www.youtube.com/watch?v=IEBMi_LuFzE)" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python 3", 141 | "language": "python", 142 | "name": "python3" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.7.7" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 4 159 | } 160 | -------------------------------------------------------------------------------- /Codigos/Autoencoders/colorization-autoencoder-cifar10-3.4.1.py: -------------------------------------------------------------------------------- 1 | '''Colorization autoencoder 2 | 3 | The autoencoder is trained with grayscale images as input 4 | and colored images as output. 5 | Colorization autoencoder can be treated like the opposite 6 | of denoising autoencoder. Instead of removing noise, colorization 7 | adds noise (color) to the grayscale image. 8 | 9 | Grayscale Images --> Colorization --> Color Images 10 | ''' 11 | 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | from tensorflow.keras.layers import Dense, Input 17 | from tensorflow.keras.layers import Conv2D, Flatten 18 | from tensorflow.keras.layers import Reshape, Conv2DTranspose 19 | from tensorflow.keras.models import Model 20 | from tensorflow.keras.callbacks import ReduceLROnPlateau 21 | from tensorflow.keras.callbacks import ModelCheckpoint 22 | from tensorflow.keras.datasets import cifar10 23 | from tensorflow.keras.utils import plot_model 24 | from tensorflow.keras import backend as K 25 | 26 | import numpy as np 27 | import matplotlib.pyplot as plt 28 | import os 29 | 30 | def rgb2gray(rgb): 31 | """Convert from color image (RGB) to grayscale. 32 | Source: opencv.org 33 | grayscale = 0.299*red + 0.587*green + 0.114*blue 34 | Argument: 35 | rgb (tensor): rgb image 36 | Return: 37 | (tensor): grayscale image 38 | """ 39 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) 40 | 41 | 42 | # load the CIFAR10 data 43 | (x_train, _), (x_test, _) = cifar10.load_data() 44 | 45 | # input image dimensions 46 | # we assume data format "channels_last" 47 | img_rows = x_train.shape[1] 48 | img_cols = x_train.shape[2] 49 | channels = x_train.shape[3] 50 | 51 | # create saved_images folder 52 | imgs_dir = 'saved_images' 53 | save_dir = os.path.join(os.getcwd(), imgs_dir) 54 | if not os.path.isdir(save_dir): 55 | os.makedirs(save_dir) 56 | 57 | # display the 1st 100 input images (color and gray) 58 | imgs = x_test[:100] 59 | imgs = imgs.reshape((10, 10, img_rows, img_cols, channels)) 60 | imgs = np.vstack([np.hstack(i) for i in imgs]) 61 | plt.figure() 62 | plt.axis('off') 63 | plt.title('Test color images (Ground Truth)') 64 | plt.imshow(imgs, interpolation='none') 65 | plt.savefig('%s/test_color.png' % imgs_dir) 66 | plt.show() 67 | 68 | # convert color train and test images to gray 69 | x_train_gray = rgb2gray(x_train) 70 | x_test_gray = rgb2gray(x_test) 71 | 72 | # display grayscale version of test images 73 | imgs = x_test_gray[:100] 74 | imgs = imgs.reshape((10, 10, img_rows, img_cols)) 75 | imgs = np.vstack([np.hstack(i) for i in imgs]) 76 | plt.figure() 77 | plt.axis('off') 78 | plt.title('Test gray images (Input)') 79 | plt.imshow(imgs, interpolation='none', cmap='gray') 80 | plt.savefig('%s/test_gray.png' % imgs_dir) 81 | plt.show() 82 | 83 | 84 | # normalize output train and test color images 85 | x_train = x_train.astype('float32') / 255 86 | x_test = x_test.astype('float32') / 255 87 | 88 | # normalize input train and test grayscale images 89 | x_train_gray = x_train_gray.astype('float32') / 255 90 | x_test_gray = x_test_gray.astype('float32') / 255 91 | 92 | # reshape images to row x col x channel for CNN output/validation 93 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, channels) 94 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, channels) 95 | 96 | # reshape images to row x col x channel for CNN input 97 | x_train_gray = x_train_gray.reshape(x_train_gray.shape[0], img_rows, img_cols, 1) 98 | x_test_gray = x_test_gray.reshape(x_test_gray.shape[0], img_rows, img_cols, 1) 99 | 100 | # network parameters 101 | input_shape = (img_rows, img_cols, 1) 102 | batch_size = 32 103 | kernel_size = 3 104 | latent_dim = 256 105 | # encoder/decoder number of CNN layers and filters per layer 106 | layer_filters = [64, 128, 256] 107 | 108 | # build the autoencoder model 109 | # first build the encoder model 110 | inputs = Input(shape=input_shape, name='encoder_input') 111 | x = inputs 112 | # stack of Conv2D(64)-Conv2D(128)-Conv2D(256) 113 | for filters in layer_filters: 114 | x = Conv2D(filters=filters, 115 | kernel_size=kernel_size, 116 | strides=2, 117 | activation='relu', 118 | padding='same')(x) 119 | 120 | # shape info needed to build decoder model so we don't do hand computation 121 | # the input to the decoder's first Conv2DTranspose will have this shape 122 | # shape is (4, 4, 256) which is processed by the decoder back to (32, 32, 3) 123 | shape = K.int_shape(x) 124 | 125 | # generate a latent vector 126 | x = Flatten()(x) 127 | latent = Dense(latent_dim, name='latent_vector')(x) 128 | 129 | # instantiate encoder model 130 | encoder = Model(inputs, latent, name='encoder') 131 | encoder.summary() 132 | 133 | # build the decoder model 134 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input') 135 | x = Dense(shape[1]*shape[2]*shape[3])(latent_inputs) 136 | x = Reshape((shape[1], shape[2], shape[3]))(x) 137 | 138 | # stack of Conv2DTranspose(256)-Conv2DTranspose(128)-Conv2DTranspose(64) 139 | for filters in layer_filters[::-1]: 140 | x = Conv2DTranspose(filters=filters, 141 | kernel_size=kernel_size, 142 | strides=2, 143 | activation='relu', 144 | padding='same')(x) 145 | 146 | outputs = Conv2DTranspose(filters=channels, 147 | kernel_size=kernel_size, 148 | activation='sigmoid', 149 | padding='same', 150 | name='decoder_output')(x) 151 | 152 | # instantiate decoder model 153 | decoder = Model(latent_inputs, outputs, name='decoder') 154 | decoder.summary() 155 | 156 | # autoencoder = encoder + decoder 157 | # instantiate autoencoder model 158 | autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') 159 | autoencoder.summary() 160 | 161 | # prepare model saving directory. 162 | save_dir = os.path.join(os.getcwd(), 'saved_models') 163 | model_name = 'colorized_ae_model.{epoch:03d}.h5' 164 | if not os.path.isdir(save_dir): 165 | os.makedirs(save_dir) 166 | filepath = os.path.join(save_dir, model_name) 167 | 168 | # reduce learning rate by sqrt(0.1) if the loss does not improve in 5 epochs 169 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), 170 | cooldown=0, 171 | patience=5, 172 | verbose=1, 173 | min_lr=0.5e-6) 174 | 175 | # save weights for future use (e.g. reload parameters w/o training) 176 | checkpoint = ModelCheckpoint(filepath=filepath, 177 | monitor='val_loss', 178 | verbose=1, 179 | save_best_only=True) 180 | 181 | # Mean Square Error (MSE) loss function, Adam optimizer 182 | autoencoder.compile(loss='mse', optimizer='adam') 183 | 184 | # called every epoch 185 | callbacks = [lr_reducer, checkpoint] 186 | 187 | # train the autoencoder 188 | autoencoder.fit(x_train_gray, 189 | x_train, 190 | validation_data=(x_test_gray, x_test), 191 | epochs=30, 192 | batch_size=batch_size, 193 | callbacks=callbacks) 194 | 195 | # predict the autoencoder output from test data 196 | x_decoded = autoencoder.predict(x_test_gray) 197 | 198 | # display the 1st 100 colorized images 199 | imgs = x_decoded[:100] 200 | imgs = imgs.reshape((10, 10, img_rows, img_cols, channels)) 201 | imgs = np.vstack([np.hstack(i) for i in imgs]) 202 | plt.figure() 203 | plt.axis('off') 204 | plt.title('Colorized test images (Predicted)') 205 | plt.imshow(imgs, interpolation='none') 206 | plt.savefig('%s/colorized.png' % imgs_dir) 207 | plt.show() 208 | -------------------------------------------------------------------------------- /Cuadernos/Auto_Encoder_clasifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Auto Encoders

\n", 8 | "\n", 9 | "

Autor

\n", 10 | "\n", 11 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 12 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n", 13 | "\n", 14 | "

Fork

\n", 15 | "\n", 16 | "

Referencias

\n", 17 | "\n", 18 | "1. [Arvin Singh Kushwaha](https://towardsdatascience.com/how-to-make-an-autoencoder-2f2d99cd5103)\n", 19 | "2. [Gertjan vander Burg](https://gertjanvandenburg.com/blog/autoencoder/)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "''' Autoencoder with Classifier\n", 29 | "'''\n", 30 | "\n", 31 | "from __future__ import absolute_import\n", 32 | "from __future__ import division\n", 33 | "from __future__ import print_function\n", 34 | "import numpy as np\n", 35 | "import keras\n", 36 | "from keras.layers import Activation, Dense, Dropout, Input, BatchNormalization\n", 37 | "from keras.layers import Conv2D, MaxPooling2D, Flatten\n", 38 | "from keras.layers import Reshape, Conv2DTranspose, UpSampling2D\n", 39 | "from keras.models import Model\n", 40 | "from keras.datasets import mnist\n", 41 | "from keras.utils import to_categorical\n", 42 | "from keras.utils import plot_model\n", 43 | "from keras.callbacks import TensorBoard\n", 44 | "from keras import backend as K\n", 45 | "\n", 46 | "import math\n", 47 | "import matplotlib.pyplot as plt\n", 48 | "\n", 49 | "# MNIST dataset\n", 50 | "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", 51 | "\n", 52 | "num_labels = np.amax(y_train) + 1\n", 53 | "y_train = to_categorical(y_train)\n", 54 | "y_test = to_categorical(y_test)\n", 55 | "\n", 56 | "image_size = x_train.shape[1]\n", 57 | "x_train = np.reshape(x_train, [-1, image_size, image_size, 1])\n", 58 | "x_test = np.reshape(x_test, [-1, image_size, image_size, 1])\n", 59 | "x_train = x_train.astype('float32') / 255\n", 60 | "x_test = x_test.astype('float32') / 255\n", 61 | "\n", 62 | "# Network parameters\n", 63 | "input_shape = (image_size, image_size, 1)\n", 64 | "batch_size = 128\n", 65 | "kernel_size = 3\n", 66 | "pool_size = 2\n", 67 | "dropout = 0.4\n", 68 | "filters = 16\n", 69 | "latent_dim = 16\n", 70 | "\n", 71 | "# Build the autoencoder model\n", 72 | "# First build the encoder model\n", 73 | "inputs = Input(shape=input_shape, name='encoder_input')\n", 74 | "x = inputs\n", 75 | "# Stack of BN-ReLU-Conv2D-MaxPooling blocks\n", 76 | "for i in range(2):\n", 77 | " x = BatchNormalization()(x)\n", 78 | " x = Activation('relu')(x)\n", 79 | " filters = filters * 2\n", 80 | " x = Conv2D(filters=filters, kernel_size=kernel_size,\n", 81 | " padding='same')(x)\n", 82 | " x = MaxPooling2D()(x)\n", 83 | "\n", 84 | "# Shape info needed to build decoder model\n", 85 | "shape = x.shape.as_list()\n", 86 | "\n", 87 | "# Generate a 16-dim latent vector\n", 88 | "x = Flatten()(x)\n", 89 | "latent = Dense(latent_dim, name='latent_vector')(x)\n", 90 | "\n", 91 | "# Instantiate encoder model\n", 92 | "encoder = Model(inputs, latent, name='encoder')\n", 93 | "encoder.summary()\n", 94 | "plot_model(encoder, to_file='classifier-encoder.png', show_shapes=True)\n", 95 | "\n", 96 | "# Build the Decoder model\n", 97 | "latent_inputs = Input(shape=(latent_dim,), name='decoder_input')\n", 98 | "x = Dense(shape[1]*shape[2]*shape[3])(latent_inputs)\n", 99 | "x = Reshape((shape[1], shape[2], shape[3]))(x)\n", 100 | "\n", 101 | "# Stack of BN-ReLU-Transposed Conv2D-UpSampling blocks\n", 102 | "for i in range(2):\n", 103 | " x = BatchNormalization()(x)\n", 104 | " x = Activation('relu')(x)\n", 105 | " x = Conv2DTranspose(filters=filters, kernel_size=kernel_size,\n", 106 | " padding='same')(x)\n", 107 | " x = UpSampling2D()(x)\n", 108 | " filters = int(filters / 2)\n", 109 | "\n", 110 | "x = Conv2DTranspose(filters=1, kernel_size=kernel_size,\n", 111 | " padding='same')(x)\n", 112 | "\n", 113 | "outputs = Activation('sigmoid', name='decoder_output')(x)\n", 114 | "\n", 115 | "# Instantiate Decoder model\n", 116 | "decoder = Model(latent_inputs, outputs, name='decoder')\n", 117 | "decoder.summary()\n", 118 | "plot_model(decoder, to_file='classifier-decoder.png', show_shapes=True)\n", 119 | "\n", 120 | "# Classifier Model\n", 121 | "latent_inputs = Input(shape=(latent_dim,), name='classifier_input')\n", 122 | "x = Dense(512)(latent_inputs)\n", 123 | "x = Activation('relu')(x)\n", 124 | "x = Dropout(0.4)(x)\n", 125 | "x = Dense(256)(x)\n", 126 | "x = Activation('relu')(x)\n", 127 | "x = Dropout(0.4)(x)\n", 128 | "x = Dense(num_labels)(x)\n", 129 | "classifier_outputs = Activation('softmax', name='classifier_output')(x)\n", 130 | "classifier = Model(latent_inputs, classifier_outputs, name='classifier')\n", 131 | "classifier.summary()\n", 132 | "plot_model(classifier, to_file='classifier.png', show_shapes=True)\n", 133 | "\n", 134 | "# Autoencoder = Encoder + Classifier/Decoder\n", 135 | "# Instantiate autoencoder model\n", 136 | "autoencoder = Model(inputs,\n", 137 | " [classifier(encoder(inputs)), decoder(encoder(inputs))],\n", 138 | " name='autodecoder')\n", 139 | "autoencoder.summary()\n", 140 | "plot_model(autoencoder, to_file='classifier-autoencoder.png', show_shapes=True)\n", 141 | "\n", 142 | "# Mean Square Error (MSE) loss function, Adam optimizer\n", 143 | "autoencoder.compile(loss=['categorical_crossentropy', 'mse'],\n", 144 | " optimizer='adam',\n", 145 | " metrics=['accuracy', 'mse'])\n", 146 | "\n", 147 | "# Train the autoencoder for 1 epoch\n", 148 | "autoencoder.fit(x_train, [y_train, x_train],\n", 149 | " validation_data=(x_test, [y_test, x_test]),\n", 150 | " epochs=2, batch_size=batch_size,\n", 151 | " callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])\n", 152 | "\n", 153 | "# Predict the Autoencoder output from test data\n", 154 | "y_predicted, x_decoded = autoencoder.predict(x_test)\n", 155 | "print(np.argmax(y_predicted[:8], axis=1))\n", 156 | "\n", 157 | "# Display the 1st 8 input and decoded images\n", 158 | "imgs = np.concatenate([x_test[:8], x_decoded[:8]])\n", 159 | "imgs = imgs.reshape((4, 4, image_size, image_size))\n", 160 | "imgs = np.vstack([np.hstack(i) for i in imgs])\n", 161 | "plt.figure()\n", 162 | "plt.axis('off')\n", 163 | "plt.title('Input: 1st 2 rows, Decoded: last 2 rows')\n", 164 | "plt.imshow(imgs, interpolation='none', cmap='gray')\n", 165 | "plt.savefig('input_and_decoded.png')\n", 166 | "plt.show()\n", 167 | "\n", 168 | "# latent = encoder.predict(x_test)\n", 169 | "# print(\"Variance:\", K.var(latent))" 170 | ] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "Python 3", 176 | "language": "python", 177 | "name": "python3" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.7.7" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 4 194 | } 195 | -------------------------------------------------------------------------------- /Codigos/Autoencoders/autoencoder-2dim-mnist-3.2.2.py: -------------------------------------------------------------------------------- 1 | '''Example of autoencoder model on MNIST dataset using 2dim latent 2 | 3 | The autoencoder forces the encoder to discover 2-dim latent vector 4 | that the decoder can recover the original input. The 2-dim latent 5 | vector is projected on 2D space to analyze the distribution of code 6 | in the latent space. The latent space can be navigated by varying the 7 | values of latent vector to produce new MNIST digits. 8 | 9 | This autoencoder has modular design. The encoder, decoder and autoencoder 10 | are 3 models that share weights. For example, after training the 11 | autoencoder, the encoder can be used to generate latent vectors 12 | of input data for low-dim visualization like PCA or TSNE. 13 | ''' 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | from tensorflow.keras.layers import Dense, Input 20 | from tensorflow.keras.layers import Conv2D, Flatten 21 | from tensorflow.keras.layers import Reshape, Conv2DTranspose 22 | from tensorflow.keras.models import Model 23 | from tensorflow.keras.datasets import mnist 24 | from tensorflow.keras.utils import plot_model 25 | from tensorflow.keras import backend as K 26 | 27 | import numpy as np 28 | import matplotlib.pyplot as plt 29 | import os 30 | 31 | 32 | def plot_results(models, 33 | data, 34 | batch_size=32, 35 | model_name="autoencoder_2dim"): 36 | """Plots 2-dim latent values as scatter plot of digits 37 | then, plot MNIST digits as function of 2-dim latent vector 38 | 39 | Arguments: 40 | models (list): encoder and decoder models 41 | data (list): test data and label 42 | batch_size (int): prediction batch size 43 | model_name (string): which model is using this function 44 | """ 45 | 46 | encoder, decoder = models 47 | x_test, y_test = data 48 | xmin = ymin = -4 49 | xmax = ymax = +4 50 | os.makedirs(model_name, exist_ok=True) 51 | 52 | filename = os.path.join(model_name, "latent_2dim.png") 53 | # display a 2D plot of the digit classes in the latent space 54 | z = encoder.predict(x_test, 55 | batch_size=batch_size) 56 | plt.figure(figsize=(12, 10)) 57 | 58 | # axes x and y ranges 59 | axes = plt.gca() 60 | axes.set_xlim([xmin,xmax]) 61 | axes.set_ylim([ymin,ymax]) 62 | 63 | # subsample to reduce density of points on the plot 64 | z = z[0::2] 65 | y_test = y_test[0::2] 66 | plt.scatter(z[:, 0], z[:, 1], marker="") 67 | for i, digit in enumerate(y_test): 68 | axes.annotate(digit, (z[i, 0], z[i, 1])) 69 | plt.xlabel("z[0]") 70 | plt.ylabel("z[1]") 71 | plt.savefig(filename) 72 | plt.show() 73 | 74 | filename = os.path.join(model_name, "digits_over_latent.png") 75 | # display a 30x30 2D manifold of the digits 76 | n = 30 77 | digit_size = 28 78 | figure = np.zeros((digit_size * n, digit_size * n)) 79 | # linearly spaced coordinates corresponding to the 2D plot 80 | # of digit classes in the latent space 81 | grid_x = np.linspace(xmin, xmax, n) 82 | grid_y = np.linspace(ymin, ymax, n)[::-1] 83 | 84 | for i, yi in enumerate(grid_y): 85 | for j, xi in enumerate(grid_x): 86 | z = np.array([[xi, yi]]) 87 | x_decoded = decoder.predict(z) 88 | digit = x_decoded[0].reshape(digit_size, digit_size) 89 | figure[i * digit_size: (i + 1) * digit_size, 90 | j * digit_size: (j + 1) * digit_size] = digit 91 | 92 | plt.figure(figsize=(10, 10)) 93 | start_range = digit_size // 2 94 | end_range = n * digit_size + start_range + 1 95 | pixel_range = np.arange(start_range, end_range, digit_size) 96 | sample_range_x = np.round(grid_x, 1) 97 | sample_range_y = np.round(grid_y, 1) 98 | plt.xticks(pixel_range, sample_range_x) 99 | plt.yticks(pixel_range, sample_range_y) 100 | plt.xlabel("z[0]") 101 | plt.ylabel("z[1]") 102 | plt.imshow(figure, cmap='Greys_r') 103 | plt.savefig(filename) 104 | plt.show() 105 | 106 | 107 | # load MNIST dataset 108 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 109 | 110 | # reshape to (28, 28, 1) and normalize input images 111 | image_size = x_train.shape[1] 112 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) 113 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) 114 | x_train = x_train.astype('float32') / 255 115 | x_test = x_test.astype('float32') / 255 116 | 117 | # network parameters 118 | input_shape = (image_size, image_size, 1) 119 | batch_size = 32 120 | kernel_size = 3 121 | latent_dim = 2 122 | # encoder/decoder number of CNN layers and filters per layer 123 | layer_filters = [32, 64] 124 | 125 | # build the autoencoder model 126 | # first build the encoder model 127 | inputs = Input(shape=input_shape, name='encoder_input') 128 | x = inputs 129 | # stack of Conv2D(32)-Conv2D(64) 130 | for filters in layer_filters: 131 | x = Conv2D(filters=filters, 132 | kernel_size=kernel_size, 133 | activation='relu', 134 | strides=2, 135 | padding='same')(x) 136 | 137 | # shape info needed to build decoder model so we don't do hand computation 138 | # the input to the decoder's first Conv2DTranspose will have this shape 139 | # shape is (7, 7, 64) which is processed by the decoder back to (28, 28, 1) 140 | shape = K.int_shape(x) 141 | 142 | # generate latent vector 143 | x = Flatten()(x) 144 | latent = Dense(latent_dim, name='latent_vector')(x) 145 | 146 | # instantiate encoder model 147 | encoder = Model(inputs, latent, name='encoder') 148 | encoder.summary() 149 | plot_model(encoder, to_file='encoder.png', show_shapes=True) 150 | 151 | # build the decoder model 152 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input') 153 | # use the shape (7, 7, 64) that was earlier saved 154 | x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) 155 | # from vector to suitable shape for transposed conv 156 | x = Reshape((shape[1], shape[2], shape[3]))(x) 157 | 158 | # stack of Conv2DTranspose(64)-Conv2DTranspose(32) 159 | for filters in layer_filters[::-1]: 160 | x = Conv2DTranspose(filters=filters, 161 | kernel_size=kernel_size, 162 | activation='relu', 163 | strides=2, 164 | padding='same')(x) 165 | 166 | # reconstruct the input 167 | outputs = Conv2DTranspose(filters=1, 168 | kernel_size=kernel_size, 169 | activation='sigmoid', 170 | padding='same', 171 | name='decoder_output')(x) 172 | 173 | # instantiate decoder model 174 | decoder = Model(latent_inputs, outputs, name='decoder') 175 | decoder.summary() 176 | plot_model(decoder, to_file='decoder.png', show_shapes=True) 177 | 178 | # autoencoder = encoder + decoder 179 | # instantiate autoencoder model 180 | autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') 181 | autoencoder.summary() 182 | plot_model(autoencoder, to_file='autoencoder.png', show_shapes=True) 183 | 184 | # Mean Square Error (MSE) loss function, Adam optimizer 185 | autoencoder.compile(loss='mse', optimizer='adam') 186 | 187 | # train the autoencoder 188 | autoencoder.fit(x_train, 189 | x_train, 190 | validation_data=(x_test, x_test), 191 | epochs=20, 192 | batch_size=batch_size) 193 | 194 | # predict the autoencoder output from test data 195 | x_decoded = autoencoder.predict(x_test) 196 | 197 | # display the 1st 8 test input and decoded images 198 | imgs = np.concatenate([x_test[:8], x_decoded[:8]]) 199 | imgs = imgs.reshape((4, 4, image_size, image_size)) 200 | imgs = np.vstack([np.hstack(i) for i in imgs]) 201 | plt.figure() 202 | plt.axis('off') 203 | plt.title('Input: 1st 2 rows, Decoded: last 2 rows') 204 | plt.imshow(imgs, interpolation='none', cmap='gray') 205 | plt.savefig('input_and_decoded.png') 206 | plt.show() 207 | 208 | # project the 2-dim latent on 2D space 209 | models = (encoder, decoder) 210 | data = (x_test, y_test) 211 | plot_results(models, data, 212 | batch_size=batch_size, 213 | model_name="autoencoder-2dim") 214 | -------------------------------------------------------------------------------- /Codigos/Keras_api_funcional_resnet/densenet-cifar10-2.4.1.py: -------------------------------------------------------------------------------- 1 | """Trains a 100-Layer DenseNet on the CIFAR10 dataset. 2 | 3 | With data augmentation: 4 | Greater than 93.55% test accuracy in 200 epochs 5 | 225sec per epoch on GTX 1080Ti 6 | 7 | Densely Connected Convolutional Networks 8 | https://arxiv.org/pdf/1608.06993.pdf 9 | http://openaccess.thecvf.com/content_cvpr_2017/papers/ 10 | Huang_Densely_Connected_Convolutional_CVPR_2017_paper.pdf 11 | Network below is similar to 100-Layer DenseNet-BC (k=12) 12 | """ 13 | 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | 18 | from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization 19 | from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D 20 | from tensorflow.keras.layers import Input, Flatten, Dropout 21 | from tensorflow.keras.layers import concatenate, Activation 22 | from tensorflow.keras.optimizers import RMSprop 23 | from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau 24 | from tensorflow.keras.callbacks import LearningRateScheduler 25 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 26 | from tensorflow.keras.models import Model 27 | from tensorflow.keras.datasets import cifar10 28 | from tensorflow.keras.utils import plot_model 29 | from tensorflow.keras.utils import to_categorical 30 | import os 31 | import numpy as np 32 | 33 | # training parameters 34 | batch_size = 32 35 | epochs = 200 36 | data_augmentation = True 37 | 38 | # network parameters 39 | num_classes = 10 40 | num_dense_blocks = 3 41 | use_max_pool = False 42 | 43 | # DenseNet-BC with dataset augmentation 44 | # Growth rate | Depth | Accuracy (paper)| Accuracy (this) | 45 | # 12 | 100 | 95.49% | 93.74% | 46 | # 24 | 250 | 96.38% | requires big mem GPU | 47 | # 40 | 190 | 96.54% | requires big mem GPU | 48 | growth_rate = 12 49 | depth = 100 50 | num_bottleneck_layers = (depth - 4) // (2 * num_dense_blocks) 51 | 52 | num_filters_bef_dense_block = 2 * growth_rate 53 | compression_factor = 0.5 54 | 55 | # load the CIFAR10 data 56 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 57 | 58 | # input image dimensions 59 | input_shape = x_train.shape[1:] 60 | 61 | # mormalize data 62 | x_train = x_train.astype('float32') / 255 63 | x_test = x_test.astype('float32') / 255 64 | print('x_train shape:', x_train.shape) 65 | print(x_train.shape[0], 'train samples') 66 | print(x_test.shape[0], 'test samples') 67 | print('y_train shape:', y_train.shape) 68 | 69 | # convert class vectors to binary class matrices. 70 | y_train = to_categorical(y_train, num_classes) 71 | y_test = to_categorical(y_test, num_classes) 72 | 73 | def lr_schedule(epoch): 74 | """Learning Rate Schedule 75 | 76 | Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. 77 | Called automatically every epoch as part of callbacks during training. 78 | 79 | # Arguments 80 | epoch (int): The number of epochs 81 | 82 | # Returns 83 | lr (float32): learning rate 84 | """ 85 | lr = 1e-3 86 | if epoch > 180: 87 | lr *= 0.5e-3 88 | elif epoch > 160: 89 | lr *= 1e-3 90 | elif epoch > 120: 91 | lr *= 1e-2 92 | elif epoch > 80: 93 | lr *= 1e-1 94 | print('Learning rate: ', lr) 95 | return lr 96 | 97 | 98 | # start model definition 99 | # densenet CNNs (composite function) are made of BN-ReLU-Conv2D 100 | inputs = Input(shape=input_shape) 101 | x = BatchNormalization()(inputs) 102 | x = Activation('relu')(x) 103 | x = Conv2D(num_filters_bef_dense_block, 104 | kernel_size=3, 105 | padding='same', 106 | kernel_initializer='he_normal')(x) 107 | x = concatenate([inputs, x]) 108 | 109 | # stack of dense blocks bridged by transition layers 110 | for i in range(num_dense_blocks): 111 | # a dense block is a stack of bottleneck layers 112 | for j in range(num_bottleneck_layers): 113 | y = BatchNormalization()(x) 114 | y = Activation('relu')(y) 115 | y = Conv2D(4 * growth_rate, 116 | kernel_size=1, 117 | padding='same', 118 | kernel_initializer='he_normal')(y) 119 | if not data_augmentation: 120 | y = Dropout(0.2)(y) 121 | y = BatchNormalization()(y) 122 | y = Activation('relu')(y) 123 | y = Conv2D(growth_rate, 124 | kernel_size=3, 125 | padding='same', 126 | kernel_initializer='he_normal')(y) 127 | if not data_augmentation: 128 | y = Dropout(0.2)(y) 129 | x = concatenate([x, y]) 130 | 131 | # no transition layer after the last dense block 132 | if i == num_dense_blocks - 1: 133 | continue 134 | 135 | # transition layer compresses num of feature maps and reduces the size by 2 136 | num_filters_bef_dense_block += num_bottleneck_layers * growth_rate 137 | num_filters_bef_dense_block = int(num_filters_bef_dense_block * compression_factor) 138 | y = BatchNormalization()(x) 139 | y = Conv2D(num_filters_bef_dense_block, 140 | kernel_size=1, 141 | padding='same', 142 | kernel_initializer='he_normal')(y) 143 | if not data_augmentation: 144 | y = Dropout(0.2)(y) 145 | x = AveragePooling2D()(y) 146 | 147 | 148 | # add classifier on top 149 | # after average pooling, size of feature map is 1 x 1 150 | x = AveragePooling2D(pool_size=8)(x) 151 | y = Flatten()(x) 152 | outputs = Dense(num_classes, 153 | kernel_initializer='he_normal', 154 | activation='softmax')(y) 155 | 156 | # instantiate and compile model 157 | # orig paper uses SGD but RMSprop works better for DenseNet 158 | model = Model(inputs=inputs, outputs=outputs) 159 | model.compile(loss='categorical_crossentropy', 160 | optimizer=RMSprop(1e-3), 161 | metrics=['accuracy']) 162 | model.summary() 163 | plot_model(model, to_file="cifar10-densenet.png", show_shapes=True) 164 | 165 | # prepare model model saving directory 166 | save_dir = os.path.join(os.getcwd(), 'saved_models') 167 | model_name = 'cifar10_densenet_model.{epoch:02d}.h5' 168 | if not os.path.isdir(save_dir): 169 | os.makedirs(save_dir) 170 | filepath = os.path.join(save_dir, model_name) 171 | 172 | # prepare callbacks for model saving and for learning rate reducer 173 | checkpoint = ModelCheckpoint(filepath=filepath, 174 | monitor='val_accuracy', 175 | verbose=1, 176 | save_best_only=True) 177 | 178 | lr_scheduler = LearningRateScheduler(lr_schedule) 179 | 180 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), 181 | cooldown=0, 182 | patience=5, 183 | min_lr=0.5e-6) 184 | 185 | callbacks = [checkpoint, lr_reducer, lr_scheduler] 186 | 187 | # run training, with or without data augmentation 188 | if not data_augmentation: 189 | print('Not using data augmentation.') 190 | model.fit(x_train, y_train, 191 | batch_size=batch_size, 192 | epochs=epochs, 193 | validation_data=(x_test, y_test), 194 | shuffle=True, 195 | callbacks=callbacks) 196 | else: 197 | print('Using real-time data augmentation.') 198 | # preprocessing and realtime data augmentation 199 | datagen = ImageDataGenerator( 200 | featurewise_center=False, # set input mean to 0 over the dataset 201 | samplewise_center=False, # set each sample mean to 0 202 | featurewise_std_normalization=False, # divide inputs by std of dataset 203 | samplewise_std_normalization=False, # divide each input by its std 204 | zca_whitening=False, # apply ZCA whitening 205 | rotation_range=0, # randomly rotate images in the range (deg 0 to 180) 206 | width_shift_range=0.1, # randomly shift images horizontally 207 | height_shift_range=0.1, # randomly shift images vertically 208 | horizontal_flip=True, # randomly flip images 209 | vertical_flip=False) # randomly flip images 210 | 211 | # compute quantities required for featurewise normalization 212 | # (std, mean, and principal components if ZCA whitening is applied) 213 | datagen.fit(x_train) 214 | 215 | # fit the model on the batches generated by datagen.flow() 216 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 217 | steps_per_epoch=x_train.shape[0] // batch_size, 218 | validation_data=(x_test, y_test), 219 | epochs=epochs, verbose=1, 220 | callbacks=callbacks) 221 | 222 | # score trained model 223 | scores = model.evaluate(x_test, y_test, verbose=0) 224 | print('Test loss:', scores[0]) 225 | print('Test accuracy:', scores[1]) 226 | -------------------------------------------------------------------------------- /Cuadernos/Intro_GRU.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Redes de unidad recurrente cerrada (Gated Recurrent Unit)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Autor\n", 15 | "\n", 16 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 17 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n", 18 | "\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## References\n", 26 | "\n", 27 | "1. J. Chung, C. Gulcehre, K. Cho, Y. Bengio, [Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling](https://arxiv.org/pdf/1412.3555v1.pdf)\n", 28 | "2. Karpathy, [*The Unreasonable Effectiveness of Recurrent Neural Networks*](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)\n", 29 | "\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "# Introducción\n", 37 | "\n", 38 | "Una red neuronal recurrente (RNR) es una extensión de una red neuronal convencional,\n", 39 | "que es capaz de manejar una entrada de secuencia de longitud variable. El RNR maneja la longitud variable de las secuencias mediante un estado oculto recurrente cuya activación en cada momento depende del estado anterior. Más formalmente, dada una secuencia $\\mathbf{x }= (x_1, x_2, \\ldots, x_T)$, la RNR actualiza su estado oculto recurrente $h_t$ mediante \n", 40 | "\n", 41 | "\n", 42 | "$$\n", 43 | "h_t = \\begin{cases}\n", 44 | "0, &\\text{ si } t=0,\\\\\n", 45 | "\\phi(h_{t-1},x_t), &\\text{en otro caso}.\n", 46 | "\\end{cases}\n", 47 | "$$\n", 48 | "\n", 49 | "\n", 50 | "Si $g$ es una función de activación suave, como un sigmoide o una tangente hiperbólica, es común definir\n", 51 | "\n", 52 | "$$\n", 53 | "h_t = g(Wh_{t-1} + Ux_t)\n", 54 | "$$\n", 55 | "\n", 56 | "Una RNR generativa genera una distribución de probabilidad sobre el siguiente elemento de la secuencia, dado su estado actual $h_t$, y este modelo generativo puede capturar una distribución sobre secuencias de longitud variable mediante el uso de un símbolo de salida especial para representar el final de la secuencia. La secuencia la probabilidad se puede descomponer en\n", 57 | "\n", 58 | "$$\n", 59 | "p (x_1,\\ldots, x_T) = p (x_1) p (x_2 | x_1) p (x_3 | x_1, x_2) \\ldots p (x_T | x_1,\\ldots, x_{T − 1}), \n", 60 | "$$\n", 61 | "\n", 62 | "donde el último elemento es un valor especial de final de secuencia. Modelamos cada probabilidad condicional distribución con\n", 63 | "\n", 64 | "$$\n", 65 | "p (x_t | x_1,\\ldots, x_{t − 1}) = g (h_t)\n", 66 | "$$\n", 67 | "\n", 68 | "El problema con este modelo, es que el cálculo del gradiente usando el método de gradiente estocástico tiende a volverse cero o a explotar. \n", 69 | "\n", 70 | "Dos líneas de trabajo impulsó esta situación. Por un lado se inició la búsqueda de nuevas técnicas para para el uso del gradiente en el proceso de optimización de la función de costo y por el otro, el desarrollo de nuevos modelos de redes neuronales. La primera línea ha producido nuevas técnicas de optimización estocástica basados en el gradiente, que han sido usado exitosamente ne redes generales. \n", 71 | "\n", 72 | "La segunda línea llevó al desarrollo de las redes [LSTM](Intro_LSTM.ipynb), en las cuales la función de activación consiste en una transformación afinada seguida por una simple no linealidad de elementos mediante el uso de unidades de compuerta, [Hochreiter y Schmidhuber, 1997](https://www.bioinf.jku.at/publications/older/2604.pdf). Más recientemente, otro tipo de unidad recurrente, a la que nos referimos como una unidad recurrente cerrada (GRU), fue propuesta por [Cho et al. 2014](https://arxiv.org/pdf/1412.3555v1.pdf). De estas unidades recurrentes se ha demostrado que funcionan bien en tareas que requieren captura de dependencias a largo plazo. Esas tareas incluyen, pero no se limitan a reconocimiento de voz, música,...\n", 73 | "\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "# Funcionamiento de una red GRU" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "Una red de unidad recurrente cerrada (GRU) permite que cada unidad recurrente capture de forma adaptativa dependencias de diferentes escalas de tiempo. De manera similar a la unidad LSTM, el GRU tiene puertas que modulan el flujo de información dentro de la unidad. Sin embargo, a diferencia de las redes LSTM no tiene celdas de memoria separadas. ><<<<<<" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "
\n", 95 | "
\n", 96 | "\n", 97 | "
\n", 98 | "
\n", 99 | "

Arquitectura general de un red neuronal GRU

\n", 100 | "
\n", 101 | "
\n", 102 | "\n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "El diagrama presenta la estructura general de una red GRU. A la entrada de cada unidad se presenta, la información de memoria procedente de la unida anterior y la infromación en la unidad de tiempo $t$. La siguente imagen muestra la estructura interna de una GRU." 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "
\n", 117 | "
\n", 118 | "\n", 119 | "
\n", 120 | "
\n", 121 | "

Arquitectura interna de una GRU

\n", 122 | "
\n", 123 | "
" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "La notación es bastante estándar. \n", 131 | "\n", 132 | "1. $+$ : indica suma de vectores\n", 133 | "2. $\\sigma$ : representa a la función de activación sigmoide\n", 134 | "3. $\\tanh$ : representa a la función de activación tangente hiperbólica.\n", 135 | "4.$\\odot$ : es producto componente a componente (producto de Hamard)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "Una GRU tien dos tipos de puerta: actualización (update) y reinicio (reset)." 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "## Puerta de actualización (update)\n", 150 | "\n", 151 | "\n", 152 | "Comenzamos calculando la puerta de actualización $z_t$ para el paso de tiempo $t$ usando la fórmula:\n", 153 | "\n", 154 | "$$\n", 155 | "z_t = \\sigma(W_z x_t + U_zh_{t-1})\n", 156 | "$$\n", 157 | "\n", 158 | "en donde $W_z$ y $U_z$ son pesos asociados $x_t$ (la nueva entrada) y $h_{t-1}$, (la información procedente de la unidad anterior).\n", 159 | "\n", 160 | "\n", 161 | "La puerta de actualización ayuda al modelo a determinar qué cantidad de la información pasada (de los pasos de tiempo anteriores) debe transmitirse al futuro. Eso es realmente poderoso porque el modelo puede decidir copiar toda la información del pasado y eliminar el riesgo de desvanecer del gradiente.\n", 162 | "\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Puerta reinicio\n", 170 | "\n", 171 | "Esencialmente, esta puerta se utiliza para que el modelo decida qué cantidad de información pasada debe olvidar. Para calcularlo, utilizamos:\n", 172 | "\n", 173 | "$$\n", 174 | "r_t = \\sigma(W_r x_t + U_rh_{t-1}),\n", 175 | "$$\n", 176 | "\n", 177 | "en donde $W_r$ y $U_r$ son pesos asociados $x_t$ (la nueva entrada) y $h_{t-1}$, (la información procedente de la unidad anterior)." 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "## Contenido de memoria actual\n", 185 | "\n", 186 | "Veamos cómo afectarán exactamente las puertas al resultado final. Primero, comenzamos con el uso de la puerta de reinicio. Introducimos un nuevo contenido de memoria que utilizará la puerta de reinicio para almacenar la información relevante del pasado. Se calcula de la siguiente manera:\n", 187 | "\n", 188 | "\n", 189 | "$$\n", 190 | "h_t' = \\sigma(Wx_t + r_t\\odot Uh_{t-1}),\n", 191 | "$$\n", 192 | "\n", 193 | "en donde $W$ y $U$ son pesos asociados a las $x$'s y a las $h$'s respectivamente." 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.7.7" 214 | }, 215 | "toc-autonumbering": true 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 4 219 | } 220 | -------------------------------------------------------------------------------- /Cuadernos/AutoDiff_tf_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Diferenciación Automática con Tensorflow 2.1

\n", 8 | "\n", 9 | "

Autor

\n", 10 | "\n", 11 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 12 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n", 13 | "\n", 14 | "

Fork

\n", 15 | "\n", 16 | "

Referencias

\n", 17 | "\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "

1. Introducción

\n", 25 | "\n", 26 | "[La diferenciación automática](https://en.wikipedia.org/wiki/Automatic_differentiation)\n", 27 | "es una técnica clave para la optimización de modelos de aprendizaje de máquinas (machine learning). En este cuaderno se hace una breve descripción de tf.GradientTape la API para diferenicación automática en tensorflow 2.1." 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "

2. Gradient Tapes

\n", 35 | "\n", 36 | "Lo que hace el *administrador de contexto* **tf.GradientTape** es calcular el gradiente de un cálculo con respecto a sus variables de entrada. Tensorflow registra todas las operaciones ejecutadas dentro del contexto de un tf.GradientTape en una cinta. Tensorflow usa esa cinta y los gradientes asociados con cada operación registrada para calcular los gradientes de un cálculo registrado usando el [modo de diferenciación hacia automática atrás](https://en.wikipedia.org/wiki/Automatic_differentiation)\n", 37 | "\n", 38 | "\n", 39 | "Las operaciones se registran si se ejecutan dentro de este administrador de contexto y al menos una de sus entradas está siendo *observada* (watched). Si una variable es creada con *tf.Variable* es marcada como entrenable, será observada (registrada).\n", 40 | "\n", 41 | "Por ejemplo:" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "Versión de Tensorflow: 2.0.0\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "from __future__ import absolute_import, division, print_function, unicode_literals\n", 59 | "\n", 60 | "import tensorflow as tf\n", 61 | "print(\"Versión de Tensorflow: \", tf.__version__)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "x = tf.ones((2,2))\n", 71 | "\n", 72 | "# this is the GradientTape context\n", 73 | "with tf.GradientTape() as t:\n", 74 | " t.watch(x)\n", 75 | " y = tf.reduce_sum(x)\n", 76 | " z = tf.multiply(y,y)\n", 77 | " \n", 78 | "# Derivate of z with respect to the original input tensor x\n", 79 | "dz_dx = t.gradient(z,x)\n", 80 | "for i in [0,1]:\n", 81 | " for j in [0,1]:\n", 82 | " assert dz_dx[i][j].numpy() == 8.0\n" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Note that matemáticamente lo que hicimos es lo siguiente:\n", 90 | "\n", 91 | "$$\n", 92 | "\\begin{equation}\n", 93 | "x = \\begin{pmatrix} 1 & 1\\\\1 & 1 \\end{pmatrix}\n", 94 | "\\end{equation}\n", 95 | "$$\n", 96 | "\n", 97 | "$$\n", 98 | "\\begin{align}\n", 99 | "y &= x_{11} + x_{12} + x_{21} + x_{22} = 4\\\\\n", 100 | "z &= y^2\n", 101 | "\\end{align}\n", 102 | "$$\n", 103 | "\n", 104 | "La derivada es calculada usando la regla de la cadena.\n", 105 | "\n", 106 | "$$\n", 107 | "\\begin{equation}\n", 108 | "\\frac{dz}{dx} = \\left( \\frac{dz}{dy}\\right) \\left( \\frac{dy}{dx}\\right) = 2yx= 8 x = \\begin{pmatrix} 8 & 8\\\\8 & 8 \\end{pmatrix}\n", 109 | "\\end{equation}\n", 110 | "$$" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 3, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "" 124 | ] 125 | }, 126 | "execution_count": 3, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "dz_dx" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 4, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "array([[8., 8.],\n", 144 | " [8., 8.]], dtype=float32)" 145 | ] 146 | }, 147 | "execution_count": 4, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "dz_dx.numpy()" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Es posible obtener gradientes de la salida (output) con respecto a valores intermedios computados en un contexto tf.GradientTape *registrado*. Por defecto, los recursos mantenidos por GradientTape, son liberados tan pronto el método *GradientTape.gradient()* es llamado. Para hacer multiples llamados es necesario crear las instancia de GradientTape en modo persistente. Cuando ya no se requiere más, se recomienda liberar el recurso. Veamos el ejemplo." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 12, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "108.0\n", 173 | "6.0\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "x = tf.constant(3.0)\n", 179 | "with tf.GradientTape(persistent=True) as t:\n", 180 | " t.watch(x)\n", 181 | " y = x * x\n", 182 | " z = y * y\n", 183 | "dz_dx = t.gradient(z,x) # 108.0 (4*x³) evaluate at x=3.0\n", 184 | "dy_dx = t.gradient(y,x) # 6.0\n", 185 | "del t # delete the reference to the tape ( free the resource)\n", 186 | "print(dz_dx.numpy())\n", 187 | "print(dy_dx.numpy())" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "

3. Registro del flujo de control

\n", 195 | "\n", 196 | "Debido a que las cintas (tapes) registran operaciones tal como ellas son ejecutadas, sentencias Python para el control del flujo (por ejemplo *if s* and *while s*) pueden ser manejadas de manera natural:" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 15, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "def f(x,y):\n", 206 | " output = 1.0\n", 207 | " for i in range(y):\n", 208 | " if i > 1 and i < 5:\n", 209 | " output = tf.multiply(output,x)\n", 210 | " return output\n", 211 | "\n", 212 | "def grad(x,y):\n", 213 | " with tf.GradientTape() as t:\n", 214 | " t.watch(x)\n", 215 | " out = f(x,y)\n", 216 | " return t.gradient(out,x)\n", 217 | "\n", 218 | "x = tf.convert_to_tensor(2.0)\n", 219 | "\n", 220 | "assert grad(x,6).numpy() == 12.0\n", 221 | "assert grad(x,5).numpy() == 12.0\n", 222 | "assert grad(x,4).numpy() == 4.0\n", 223 | " " 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "

4. Gradientes de órdenes superiores

\n", 231 | "\n", 232 | "\n", 233 | "Las operaciones dentro del administrador de contexto *GradientTape* se registran para la diferenciación automática. Si los gradientes de órdenes superiores se calculan en ese contexto, el cálculo del gradiente también se registra. Como resultado, la misma API también funciona para gradientes de orden superior. Por ejemplo:" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 16, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "x = tf.Variable(1.0) # Creates a Tensorflow variable intialized to 1.0\n", 243 | "\n", 244 | "with tf.GradientTape() as t:\n", 245 | " with tf.GradientTape() as t2:\n", 246 | " y = x * x * x\n", 247 | " # Compute the gradient inside the 't' context manager\n", 248 | " # which means the gradient computation is differentiable as well.\n", 249 | " dy_dx = t2.gradient(y,x)\n", 250 | "d2y_d2x = t.gradient(dy_dx,x)\n", 251 | "\n", 252 | "assert dy_dx.numpy() == 3.0\n", 253 | "assert d2y_d2x.numpy() == 6.0" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [] 262 | } 263 | ], 264 | "metadata": { 265 | "kernelspec": { 266 | "display_name": "Python 3", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.7.5" 281 | } 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 4 285 | } 286 | -------------------------------------------------------------------------------- /Syllabus_Ciencia_Datos.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#
Curso de Ciencia de Datos
" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Autores" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 22 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com " 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Referencias" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "1. Alvaro Montenegro y Daniel Montenegro, [Ciencia de Datos](https://github.com/AprendizajeProfundo/Ciencia-de-Datos), 2021\n", 37 | "2. Aston Zhang, Zachary C. Lipton, Mu Li, and Alexander J. Smola, [Dive into Deep Learning](https://d2l.ai/), 2021\n", 38 | "3. Christofer. M. Bishop, [Pattern Recognition and machine Learning](http://libgen.rs/search.php?req=bishop+pattern&lg_topic=libgen&open=0&view=simple&res=25&phrase=1&column=def)\n", 39 | ", Second edition,Springer, 2006\n", 40 | "4. James V Stone, [Information Theory: A Tutorial Introduction](https://arxiv.org/pdf/1802.05968.pdf)\n", 41 | "5. John Hunt, [A Beginners Guide to Python 3 Programming](http://libgen.rs/search.php?req=A+Beginners+Guide+to+Python+3+Programming+hunt&open=0&res=25&view=simple&phrase=1&column=def), 2019\n", 42 | "6. John Hunt,[Advanced Guide to Python 3 Programming](http://libgen.rs/search.php?req=Advanced+Guide+to+Python+3+Programming+hunt&open=0&res=25&view=simple&phrase=1&column=def). 2019\n", 43 | "5. Rowel Atienza, [Advanced Deep Learning with TensorFlow 2 and Keras: Apply DL, GANs, VAEs, deep RL, unsupervised learning, object detection and segmentation, and more, 2nd Edition](http://libgen.rs/search.php?req=advanced+deep+learning+tensorflow+keras+atienza&open=0&res=25&view=simple&phrase=1&column=def), 2020.\n", 44 | "8. Joseph Howse, Joe Minichino, [Learning OpenCV 4 Computer Vision with Python 3: Get to grips with tools, techniques, and algorithms for computer vision and machine learning](http://libgen.rs/search.php?req=opencv+4&open=0&res=25&view=simple&phrase=1&column=def), 2019\n", 45 | "9. [Librosa](https://librosa.org/doc/latest/index.html), 2021\n", 46 | "10. [Nltk](https://www.nltk.org/), 2021\n", 47 | "11. [Gensim](https://radimrehurek.com/gensim/), 2021\n", 48 | "12. [OpenCV](https://docs.opencv.org/master/d6/d00/tutorial_py_root.html), 2021\n", 49 | "13. Tensorfow, [Time series Forcasting](https://www.tensorflow.org/tutorials/structured_data/time_series), 2021\n", 50 | "14. Oleg Jarma, [Instalación de Anaconda](https://www.youtube.com/watch?v=yKG-bzLZxyI&t), 2020" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Tecnologías del curso" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Python, Tensorflow, JupyterLab, Github, OpenCv, Librosa, Nltk, Gensim " 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Fuentes de datos" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "1. [Best Public Datasets for Machine Learning and Data Science](https://pub.towardsai.net/best-datasets-for-machine-learning-data-science-computer-vision-nlp-ai-c9541058cf4f)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "## Evaluación" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "1. Un proyecto que será desarrollado por máximo dos personas a lo largo del curso. Total 70%.\n", 93 | "2. Seguimiento del proyecto. Asistencias a asesorías extraclase, Total 10%.\n", 94 | "3. Video promocional de cinco minutos sobre el proyecto, Total 5%.\n", 95 | "3. Una exposición final sobre el proyecto. Total 15%." 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "## Posibles proyectos de curso" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "Revise los conjuntos de datos y tome una decisión la priema semana de clase." 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "## Contenido del curso" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "### Introducción a Python" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "1. Tipos de datos, Variables\n", 131 | "2. Estructuras de control\n", 132 | "3. Funciones\n", 133 | "4. Clases y objetos\n", 134 | "5. Herencia\n", 135 | "6. Manipulación de datos\n", 136 | "7. Visualización de datos" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "### Matemáticas para Ciencia de Datos" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "1. [Teoría de la Información.](./Cuadernos/Teoria_Informacion.ipynb)\n", 151 | "2. [Diferenciación automática](./Cuadernos/JAX_AutoDiff.ipynb), [Diferenciación automática usando Tensorflow 2.1](./Cuadernos/AutoDiff_tf_2.ipynb)\n", 152 | "3. [Métodos de Optimización estocástica modernos](./Cuadernos/Optimizacion_M_Aprendizaje.ipynb)\n", 153 | "4. [Tensores y Algebra de Tensores](./Cuadernos/Intro_tensores.ipynb), [Introducción a Numpy](./Cuadernos/Intro_Numpy.ipynb)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "### Reducción de dimensión" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "1. El curso de la dimensionalidad.\n", 168 | "2. Distribuciones de probabilidad en dimensiones grandes.\n", 169 | "3. Sobre transformaciones topológicas. Concepto de sumergimiento (embedding) en dimensiones más altas.\n", 170 | "3. Dimensión intrínsica de un conjunto de datos. q-dimensión.\n", 171 | "4. Codificación y decodificación. Pérdida de información.\n", 172 | "5. Reducción de datos normales. Análisis de componentes Principales (ACP).\n", 173 | "6. PCA probabilístico.\n", 174 | "7. Reducción de datos no normales. Análisis de componentes Independientes (ICA).\n", 175 | "8. Isomap." 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "### Métodos No supervisados. Clustering" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "1. k-means\n", 190 | "2. Vecinos más cercanos\n", 191 | "3. Random Forest-clasificación (semi-supervisado)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "### Visualización de datos de altas dimensiones" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "1. Planos Factoriales\n", 206 | "2. Mapas Auto Organizados (SOM)\n", 207 | "3. t-SNE" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "### Procesamiento de Lenguaje Natural" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "1. Conceptos básicos: limpieza, lematización,introducción a gensim y nltk\n", 222 | "2. Corpus de textos y espacios de vectores\n", 223 | "3. Tópicos y transformaciones\n", 224 | "4. Consultas de similaridades\n", 225 | "5. Modelo word2vec\n", 226 | "6. Modelo doc2vec\n", 227 | "7. Modelo Fast Text\n", 228 | "8. Modelo LDA\n", 229 | "10. Embeddings. Modeleo Glove" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "### Métodos Supervisados -Clasificación" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "1. Máquinas de Soporte Vectorial (SVM)\n", 244 | "2. Random Forest - Clasificación" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "### Métodos supérvisados - Regresión" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "1. Conceptos generales\n", 259 | "2. Estimación secuencial\n", 260 | "3. Regularización; regresión Lasso\n", 261 | "4. Distribuciones aprioris y posteriores\n", 262 | "5. Modelos generales de regresión: GAMLSS.\n", 263 | "6. Random Forest - Regresión\n", 264 | "6. Regresión usando tensorflow." 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "### Aprendizaje Profundo" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "1. [Redes Neuronales](./Cuadernos/Intro_RNA.ipynb), [Red Neuronal desde cero usando keras](./Cuadernos/RNA_desde_cero_intro_keras.ipynb)\n", 279 | "2. [Redes convolucionadas](./Cuadernos/Convolucion_Redes.ipynb),[Visualizacion de filtros e imagenes filtradas](./Cuadernos/Convolucion_Redes_pretraining.ipynb), [Reconocimiento de digitos](./Cuadernos/Convolution_mnist.ipynb),[Reconocimiento de imagenes](Convolucion_mnist_fashion.ipynb)\n", 280 | "3. [Redes recurrentes LSTM](./Cuadernos/Intro_LSTM.ipynb), [Modelamiento de Series de tiempo](./Cuadernos/Times_series_dummy_intro.ipynb),\n", 281 | "[Predicción Valor acciones](./Cuadernos/Stock_Prices_Prediction.ipynb), [Predicción Apple](./Cuadernos/Stock_Prices_Apple.ipynb), [Series de Tiempo Multivariadas](./Cuadernos/Times_series_Multivariate.ipynb)\n", 282 | "4. [Redes recurrentes GRU](./Cuadernos/Intro_GRU.ipynb), [Predicción de texto 1](./Cuadernos/Char_generator_RNN.ipynb)\n", 283 | "5. [Autocodficadores](./Cuadernos/Auto_Encoder_clasifier.ipynb)\n", 284 | "6. [Introducción a Keras.sequential](./Cuadernos/Intro_Keras_Sequential.ipynb)\n", 285 | "7. [Introducción a la API funcional de Keras](./Cuadernos/Intro_Keras_Functional.ipynb)\n", 286 | "8. [Redes ResNet](./Cuadernos/ResNet.ipynb)\n", 287 | "9. [Redes neuronales generativas. Redes adversarias](./Cuadernos/GAN.ipynb)\n", 288 | "10. [Autocodificadores Variacionales](./Cuadernos/VAI_Introduction.ipynb)\n", 289 | "10. Modelos Atencionales\n", 290 | "11. Transformers\n", 291 | "12. Modelo Bert\n", 292 | "13. NLP-parte II. Aplicaciones, análisis de sentimiento, traductores, chatbots." 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "

Inteligencia Artificial hoy

\n", 300 | "\n", 301 | "\n", 302 | "Los agentes de IA entrenados por OpenAI jugaron 481 millones de juegos de escondite, muestran algunos comportamientos sorprendentes.\n", 303 | "\n", 304 | "[Gym Open AI](https://gym.openai.com/)\n", 305 | "\n", 306 | "[Artículo y video](https://openai.com/blog/emergent-tool-use/)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "[Desarrollos Aterradores de la IA](https://www.youtube.com/watch?v=IEBMi_LuFzE)" 314 | ] 315 | } 316 | ], 317 | "metadata": { 318 | "kernelspec": { 319 | "display_name": "Python 3", 320 | "language": "python", 321 | "name": "python3" 322 | }, 323 | "language_info": { 324 | "codemirror_mode": { 325 | "name": "ipython", 326 | "version": 3 327 | }, 328 | "file_extension": ".py", 329 | "mimetype": "text/x-python", 330 | "name": "python", 331 | "nbconvert_exporter": "python", 332 | "pygments_lexer": "ipython3", 333 | "version": "3.8.5" 334 | } 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 4 338 | } 339 | -------------------------------------------------------------------------------- /Cuadernos/Auto_Encoders.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Auto encoders" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Autor\n", 15 | "\n", 16 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 17 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Referencias" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "1. [Documentación de Keras](https://keras.io/getting-started/sequential-model-guide/)\n", 32 | "2. [Arvin Singh Kushwaha](https://towardsdatascience.com/how-to-make-an-autoencoder-2f2d99cd5103)\n", 33 | "3. [Gertjan vander Burg](https://gertjanvandenburg.com/blog/autoencoder/)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "# Introducción" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "# Estructura de un autoencoder" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "\n", 55 | "
\n", 56 | "
\n", 57 | "\n", 58 | "
\n", 59 | "
\n", 60 | "

Estrcttura de un Autoencoder

\n", 61 | "
\n", 62 | "
" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | " " 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "# Codificación en 2D de los datos de Mnist\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "
\n", 84 | "
\n", 85 | "\n", 86 | "
\n", 87 | "
\n", 88 | "

Codificacion en 2d de los datos de mnist

\n", 89 | "
\n", 90 | "
" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "# titulo" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "\n" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "# El Código inicial para mnist" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "'''Example of autoencoder model on MNIST dataset\n", 133 | "\n", 134 | "This autoencoder has modular design. The encoder, decoder and autoencoder\n", 135 | "are 3 models that share weights. For example, after training the\n", 136 | "autoencoder, the encoder can be used to generate latent vectors\n", 137 | "of input data for low-dim visualization like PCA\n", 138 | "'''\n", 139 | "\n", 140 | "from __future__ import absolute_import\n", 141 | "from __future__ import division\n", 142 | "from __future__ import print_function\n", 143 | "\n", 144 | "from tensorflow.keras.layers import Dense, Input\n", 145 | "from tensorflow.keras.layers import Conv2D, Flatten\n", 146 | "from tensorflow.keras.layers import Reshape, Conv2DTranspose\n", 147 | "from tensorflow.keras.models import Model\n", 148 | "from tensorflow.keras.datasets import mnist\n", 149 | "from tensorflow.keras.utils import plot_model\n", 150 | "from tensorflow.keras import backend as K\n", 151 | "\n", 152 | "import numpy as np\n", 153 | "import matplotlib.pyplot as plt" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "## Carga los datos" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 6, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "# load MNIST dataset\n", 170 | "(x_train, _), (x_test, _) = mnist.load_data()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 7, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "# reshape to (28, 28, 1) and normalize input images\n", 180 | "image_size = x_train.shape[1]\n", 181 | "x_train = np.reshape(x_train, [-1, image_size, image_size, 1])\n", 182 | "x_test = np.reshape(x_test, [-1, image_size, image_size, 1])\n", 183 | "x_train = x_train.astype('float32') / 255\n", 184 | "x_test = x_test.astype('float32') / 255" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 8, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "# network parameters\n", 194 | "input_shape = (image_size, image_size, 1)\n", 195 | "batch_size = 32\n", 196 | "kernel_size = 3\n", 197 | "latent_dim = 16" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "# Construcción del codificador (encoder)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 13, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "(None, 14, 14, 32)\n", 217 | "(None, 7, 7, 64)\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "# encoder/decoder number of CNN layers and filters per layer\n", 223 | "layer_filters = [32, 64]\n", 224 | "\n", 225 | "# build the autoencoder model\n", 226 | "# first build the encoder model\n", 227 | "inputs = Input(shape=input_shape, name='encoder_input')\n", 228 | "x = inputs\n", 229 | "# stack of Conv2D(32)-Conv2D(64)\n", 230 | "for filters in layer_filters:\n", 231 | " x = Conv2D(filters=filters,\n", 232 | " kernel_size=kernel_size,\n", 233 | " activation='relu',\n", 234 | " strides=2,\n", 235 | " padding='same')(x)\n", 236 | " shape = K.int_shape(x)\n", 237 | " print(shape)\n", 238 | " \n", 239 | "shape = K.int_shape(x) \n", 240 | "# generate latent vector\n", 241 | "x = Flatten()(x)\n", 242 | "latent = Dense(latent_dim, name='latent_vector')(x)\n", 243 | "\n", 244 | "# instantiate encoder model\n", 245 | "encoder = Model(inputs,\n", 246 | " latent,\n", 247 | " name='encoder')\n", 248 | "encoder.summary()\n", 249 | "plot_model(encoder,\n", 250 | " to_file='encoder.png',\n", 251 | " show_shapes=True) \n", 252 | " " 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "# Construcción del decodificador (decoder)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "# build the decoder model\n", 269 | "latent_inputs = Input(shape=(latent_dim,), name='decoder_input')\n", 270 | "# use the shape (7, 7, 64) that was earlier saved\n", 271 | "x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)\n", 272 | "# from vector to suitable shape for transposed conv\n", 273 | "x = Reshape((shape[1], shape[2], shape[3]))(x)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "# stack of Conv2DTranspose(64)-Conv2DTranspose(32)\n", 283 | "for filters in layer_filters[::-1]:\n", 284 | " x = Conv2DTranspose(filters=filters,\n", 285 | " kernel_size=kernel_size,\n", 286 | " activation='relu',\n", 287 | " strides=2,\n", 288 | " padding='same')(x)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "# reconstruct the input\n", 298 | "outputs = Conv2DTranspose(filters=1,\n", 299 | " kernel_size=kernel_size,\n", 300 | " activation='sigmoid',\n", 301 | " padding='same',\n", 302 | " name='decoder_output')(x)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "# instantiate decoder model\n", 312 | "decoder = Model(latent_inputs, outputs, name='decoder')\n", 313 | "decoder.summary()\n", 314 | "plot_model(decoder, to_file='decoder.png', show_shapes=True)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "# autoencoder = encoder + decoder\n", 324 | "# instantiate autoencoder model\n", 325 | "autoencoder = Model(inputs,\n", 326 | " decoder(encoder(inputs)),\n", 327 | " name='autoencoder')\n", 328 | "autoencoder.summary()\n", 329 | "plot_model(autoencoder,\n", 330 | " to_file='autoencoder.png',\n", 331 | " show_shapes=True)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "# Mean Square Error (MSE) loss function, Adam optimizer\n", 341 | "autoencoder.compile(loss='mse', optimizer='adam')\n", 342 | "\n" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "# train the autoencoder\n", 352 | "autoencoder.fit(x_train,\n", 353 | " x_train,\n", 354 | " validation_data=(x_test, x_test),\n", 355 | " epochs=1,\n", 356 | " batch_size=batch_size)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "# predict the autoencoder output from test data\n", 366 | "x_decoded = autoencoder.predict(x_test)" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "# display the 1st 8 test input and decoded images\n", 376 | "imgs = np.concatenate([x_test[:8], x_decoded[:8]])\n", 377 | "imgs = imgs.reshape((4, 4, image_size, image_size))\n", 378 | "imgs = np.vstack([np.hstack(i) for i in imgs])\n", 379 | "plt.figure()\n", 380 | "plt.axis('off')\n", 381 | "plt.title('Input: 1st 2 rows, Decoded: last 2 rows')\n", 382 | "plt.imshow(imgs, interpolation='none', cmap='gray')\n", 383 | "plt.savefig('input_and_decoded.png')\n", 384 | "plt.show()\n", 385 | "\n", 386 | "# latent = encoder.predict(x_test)\n", 387 | "# print(\"Variance:\", K.var(latent))" 388 | ] 389 | } 390 | ], 391 | "metadata": { 392 | "kernelspec": { 393 | "display_name": "Python 3", 394 | "language": "python", 395 | "name": "python3" 396 | }, 397 | "language_info": { 398 | "codemirror_mode": { 399 | "name": "ipython", 400 | "version": 3 401 | }, 402 | "file_extension": ".py", 403 | "mimetype": "text/x-python", 404 | "name": "python", 405 | "nbconvert_exporter": "python", 406 | "pygments_lexer": "ipython3", 407 | "version": "3.7.7" 408 | } 409 | }, 410 | "nbformat": 4, 411 | "nbformat_minor": 4 412 | } 413 | -------------------------------------------------------------------------------- /Cuadernos/Logistic_Reg_AutoDiff.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Modelo Lineal de Clasificación con JAX

\n", 8 | "\n", 9 | "

Autor

\n", 10 | "\n", 11 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 12 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n", 13 | "\n", 14 | "

Fork

\n", 15 | "\n", 16 | "

Referencias

" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "

1. Introducción

\n", 24 | "\n", 25 | "Con su versión actualizada de [Autograd](https://github.com/hips/autograd), [JAX](https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html) puede diferenciar automáticamente el código nativo de Python y NumPy. Puede derivarse través de un gran subconjunto de características de Python, incluidos bucles, ifs, recursión y clousures, e incluso puede tomar derivadas de derivadas de derivadas. Admite la diferenciación tanto en modo inverso como en modo directo, y los dos pueden componerse arbitrariamente en cualquier orden.\n", 26 | "\n", 27 | "Lo nuevo es que JAX usa [XLA](https://www.tensorflow.org/xla) para compilar y ejecutar su código NumPy en aceleradores, como GPU y TPU. La compilación ocurre de forma predeterminada, con las llamadas de la biblioteca compiladas y ejecutadas justo a tiempo. Pero JAX incluso le permite compilar justo a tiempo sus propias funciones de Python en núcleos optimizados para XLA utilizando una API de una función. La compilación y la diferenciación automática se pueden componer de forma arbitraria, por lo que puede expresar algoritmos sofisticados y obtener el máximo rendimiento sin tener que abandonar Python." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# !pip install --upgrade jax jaxlib \n", 37 | "\n", 38 | "from __future__ import print_function\n", 39 | "import jax.numpy as np\n", 40 | "from jax import grad, jit, vmap\n", 41 | "from jax import random\n", 42 | "key = random.PRNGKey(0)\n", 43 | "# Current convention is to import original numpy as \"onp\"\n", 44 | "import numpy as onp\n", 45 | "import itertools\n", 46 | "\n", 47 | "\n", 48 | "#import random\n", 49 | "#import jax\n" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "

2. Función de Predicción

\n", 57 | "\n" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 84, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "def sigmoid(x):\n", 67 | " return 0.5*(np.tanh(x/2)+1)\n", 68 | "# more stable than 1.0/(1+np.exp(-x))\n", 69 | "\n", 70 | "# outputs probability of a label being true\n", 71 | "def predict(W,b,inputs):\n", 72 | " return sigmoid(np.dot(inputs,W)+b)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "

3. Función de Pérdida. Entropía cruzada

" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 84, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "\n", 89 | "# training loss: -log likelihood of trainig examples\n", 90 | "def loss(W,b,x,y):\n", 91 | " preds = predict(W,b,x)\n", 92 | " label_probs = preds*y + (1-preds)*(1-y)\n", 93 | " return -np.sum(np.log(label_probs))\n", 94 | "\n", 95 | "# initialize coefficients\n", 96 | "key, W_key, b_key = random.split(key,3)\n", 97 | "W = random.normal(key, (3,))\n", 98 | "b = random.normal(key,())" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "

4. Ejemplo. Datos de Juguete

" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# Build a toy dataset\n", 115 | "inputs = np.array([[0.52, 1.12, 0.77],\n", 116 | " [0.88, -1.08, 0.15],\n", 117 | " [0.52, 0.06, -1.30],\n", 118 | " [0.74, -2.49, 1.39]])\n", 119 | "targets = np.array([True, True, False, True])" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "

5. Gradiente

" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "Usaremos la funcion *grad* con sus argumentos para diferenciar la función con respecto a sus parámetros ṕosicionales" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 52, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# compile with jit\n", 143 | "# argsnums define positional params to derive with respect to\n", 144 | "grad_loss = jit(grad(loss,argnums=(0,1)))" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 53, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "W_grad = [-0.13325673 0.7287398 -1.7607927 ]\n", 157 | "b_grad = 0.022453208\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "W_grad, b_grad = grad_loss(W,b,inputs, targets)\n", 163 | "print(\"W_grad = \", W_grad)\n", 164 | "print(\"b_grad = \", b_grad)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "

6. Entrenamiento del modelo

" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 54, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# train function\n", 181 | "def train(W,b,x,y, lr= 0.12):\n", 182 | " gradient = grad_loss(W,b,inputs,targets) \n", 183 | " W_grad, b_grad = grad_loss(W,b,inputs,targets)\n", 184 | " W -= W_grad*lr\n", 185 | " b -= b_grad*lr\n", 186 | " return(W,b)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 55, 192 | "metadata": { 193 | "collapsed": true, 194 | "jupyter": { 195 | "outputs_hidden": true 196 | } 197 | }, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "Epoch 0: train loss 2.3193581104278564\n", 204 | "Epoch 1: train loss 2.01922345161438\n", 205 | "Epoch 2: train loss 1.7796587944030762\n", 206 | "Epoch 3: train loss 1.5825984477996826\n", 207 | "Epoch 4: train loss 1.4183536767959595\n", 208 | "Epoch 5: train loss 1.2804994583129883\n", 209 | "Epoch 6: train loss 1.1641706228256226\n", 210 | "Epoch 7: train loss 1.0654593706130981\n", 211 | "Epoch 8: train loss 0.9811764359474182\n", 212 | "Epoch 9: train loss 0.9087210297584534\n", 213 | "Epoch 10: train loss 0.8459861278533936\n", 214 | "Epoch 11: train loss 0.7912724614143372\n", 215 | "Epoch 12: train loss 0.7432132959365845\n", 216 | "Epoch 13: train loss 0.7007092833518982\n", 217 | "Epoch 14: train loss 0.6628734469413757\n", 218 | "Epoch 15: train loss 0.628989040851593\n", 219 | "Epoch 16: train loss 0.5984709858894348\n", 220 | "Epoch 17: train loss 0.5708418488502502\n", 221 | "Epoch 18: train loss 0.5457080006599426\n", 222 | "Epoch 19: train loss 0.522742509841919\n" 223 | ] 224 | } 225 | ], 226 | "source": [ 227 | "# \n", 228 | "weights, biases = [], []\n", 229 | "train_loss= []\n", 230 | "epochs = 20\n", 231 | "\n", 232 | "train_loss.append(loss(W,b,inputs,targets))\n", 233 | "\n", 234 | "for epoch in range(epochs):\n", 235 | " W,b = train(W,b,inputs, targets)\n", 236 | " weights.append(W)\n", 237 | " biases.append(b)\n", 238 | " losss = loss(W,b,inputs,targets)\n", 239 | " train_loss.append(losss)\n", 240 | " print(f\"Epoch {epoch}: train loss {losss}\")" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 60, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "text": [ 252 | "weights\n", 253 | "[ 0.5526737 -0.0814647 -0.11806437]\n", 254 | "[ 0.55927765 -0.13807121 0.06845167]\n", 255 | "[ 0.5614956 -0.17740442 0.2367465 ]\n", 256 | "[ 0.561902 -0.2063056 0.39015874]\n", 257 | "[ 0.5618833 -0.22850716 0.53068244]\n", 258 | "[ 0.5621856 -0.24620296 0.6597552 ]\n", 259 | "[ 0.56318307 -0.2607605 0.77857214]\n", 260 | "[ 0.56502855 -0.27307138 0.888201 ]\n", 261 | "[ 0.5677453 -0.2837374 0.9896157]\n", 262 | "[ 0.57128537 -0.29317585 1.0837021 ]\n", 263 | "[ 0.5755653 -0.30168238 1.1712576 ]\n", 264 | "[ 0.58048826 -0.30947018 1.252992 ]\n", 265 | "[ 0.5859567 -0.3166952 1.3295317]\n", 266 | "[ 0.59187865 -0.32347307 1.4014258 ]\n", 267 | "[ 0.59817106 -0.32989037 1.4691548 ]\n", 268 | "[ 0.60476077 -0.33601263 1.5331378 ]\n", 269 | "[ 0.61158454 -0.34188995 1.5937407 ]\n", 270 | "[ 0.6185881 -0.34756085 1.6512834 ]\n", 271 | "[ 0.6257253 -0.3530553 1.706046 ]\n", 272 | "[ 0.63295746 -0.3583968 1.7582744 ]\n", 273 | "biases\n", 274 | "0.8808514\n", 275 | "0.8669749\n", 276 | "0.8486012\n", 277 | "0.8292053\n", 278 | "0.81065005\n", 279 | "0.7939027\n", 280 | "0.7793964\n", 281 | "0.7672425\n", 282 | "0.7573656\n", 283 | "0.74959135\n", 284 | "0.7437017\n", 285 | "0.7394684\n", 286 | "0.73667145\n", 287 | "0.73510873\n", 288 | "0.7345997\n", 289 | "0.73498607\n", 290 | "0.7361306\n", 291 | "0.7379152\n", 292 | "0.74023885\n", 293 | "0.7430152\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "print('weights')\n", 299 | "for weight in weights:\n", 300 | " print(weight)\n", 301 | "print('biases')\n", 302 | "for bias in biases:\n", 303 | " print(bias)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 71, 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "name": "stdout", 313 | "output_type": "stream", 314 | "text": [ 315 | "[-0.06078603 0.04339207 -0.41592562]\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "print(grad(loss)(W,b,inputs,targets))" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "### Calculando el valor de la función y el gradiente con value_and_grad" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 82, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "name": "stdout", 337 | "output_type": "stream", 338 | "text": [ 339 | "loss value: 0.5227425\n", 340 | "gradient value: (DeviceArray([-0.06078603, 0.04339207, -0.41592562], dtype=float32), DeviceArray(-0.02629587, dtype=float32))\n" 341 | ] 342 | } 343 | ], 344 | "source": [ 345 | "from jax import value_and_grad\n", 346 | "loss_val, Wb_grad = value_and_grad(loss,(0,1))(W,b,inputs, targets)\n", 347 | "print('loss value: ', loss_val)\n", 348 | "print('gradient value: ', Wb_grad)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [] 357 | } 358 | ], 359 | "metadata": { 360 | "kernelspec": { 361 | "display_name": "Python 3", 362 | "language": "python", 363 | "name": "python3" 364 | }, 365 | "language_info": { 366 | "codemirror_mode": { 367 | "name": "ipython", 368 | "version": 3 369 | }, 370 | "file_extension": ".py", 371 | "mimetype": "text/x-python", 372 | "name": "python", 373 | "nbconvert_exporter": "python", 374 | "pygments_lexer": "ipython3", 375 | "version": "3.7.5" 376 | } 377 | }, 378 | "nbformat": 4, 379 | "nbformat_minor": 4 380 | } 381 | -------------------------------------------------------------------------------- /Cuadernos/Convolucion_Redes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Redes Neuronales Convolucionadas (RNC)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Autor\n", 15 | "\n", 16 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 17 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com " 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# References \n", 32 | "\n", 33 | "1. Ian Goodfellow, Yosua Bengio and Aaron Courville, *Deep Learning*, MIT press, 2016.\n", 34 | "2. Vincent Doumolin and Francesco Visin, *A guide to convolution arithmetic for deep learning*, ArXiv:1603.07285v2, 2018\n", 35 | "3. https://towardsdatascience.com/applied-deep-learning-part-4-convolutional-neural-networks-584bc134c1e2" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "\n", 43 | "\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "# Introducción\n", 51 | "\n", 52 | " \n", 53 | "\n", 54 | "Las redes RNC son un tipo especializado de red neuronal para procesar datos que tiene una topología conocida similar a una cuadrícula. Los ejemplos incluyen datos de series temporales, que pueden puede considerarse como una cuadrícula 1D que toma muestras a intervalos de tiempo regulares y datos de imagen, que puede considerarse como una cuadrícula de píxeles en 2D.\n", 55 | "\n", 56 | "\n", 57 | "El nombre \"red neuronal convolucional\" indica que la red emplea una operación matemática llamada convolución. La convolución es un tipo especializado de operación lineal. Las redes convolucionales son simplemente redes neuronales que utilizan convolución en lugar de una matriz general en al menos una de sus capas.\n" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "# La operación de convolución\n", 65 | "\n", 66 | "En su forma más general, la convolución es una operación en dos funciones de un valor real argumento.\n", 67 | "\n", 68 | "\n", 69 | "Para motivar la definición de convolución, comenzamos con ejemplos de dos funciones que podríamos usar.\n", 70 | "\n", 71 | "\n", 72 | "Supongamos que estamos rastreando la ubicación de una nave espacial con un sensor láser. Nuestro\n", 73 | " sensor láser proporciona una salida única $ x (t) $, la posición de la nave espacial en el momento\n", 74 | "$ t $. Tanto $ x $ como $ t $ tienen un valor real, es decir, podemos obtener una lectura diferente del láser\n", 75 | "sensor en cualquier instante en el tiempo.\n", 76 | "\n", 77 | "\n", 78 | "Ahora suponga que nuestro sensor láser es algo ruidoso. Para obtener una estimación menos ruidosa de la posición de la nave espacial, una buena idea es promediar varias mediciones.\n", 79 | "\n", 80 | "Por supuesto, las mediciones más recientes son más relevantes, por lo que lo haremos un promedio ponderado que otorgue más peso a las mediciones recientes.\n", 81 | "\n", 82 | "Podemos hacer esto con una función de ponderación $ w (a) $, donde $ a $ es la edad de una medición.\n", 83 | "\n", 84 | "Si aplicamos una operación promedio ponderada en cada momento, obtenemos una nueva\n", 85 | "función que proporciona una estimación suavizada de la posición $s$ de la nave espacial:\n", 86 | "\n", 87 | "$$\n", 88 | "s(t) = \\int x(a) w(t-a)da\n", 89 | "$$\n", 90 | "\n", 91 | "Observe que si $a_1.\\ldots, a_n$ es una muestra de la distibución cuya función de densidad es $w$, entonces se tiene que\n", 92 | "\n", 93 | "$$\n", 94 | "s(t)\\approx \\tfrac{1}{n}\\sum_{i=1}^n x(a_i) w(t-a_i)\n", 95 | "$$\n", 96 | "\n", 97 | "Esta operación se llama **convolución**. La operación de convolución es típicamente denotado con un asterisco:\n", 98 | "\n", 99 | "$$\n", 100 | "s(t) = (x ∗ w)(t)\n", 101 | "$$\n", 102 | "\n", 103 | "En el ejemplo, $ w $ debe ser una función de densidad de probabilidad válida, o el\n", 104 | "el resultado no es un promedio ponderado.\n", 105 | "\n", 106 | "\n", 107 | "\n", 108 | "Además, $ w $ debe ser 0 para todos los argumentos negativos, o mirar hacia el futuro, lo que presumiblemente está más allá de nuestras capacidades. Sin embargo, estas limitaciones son particulares de nuestro ejemplo. En general, la convolución se define para cualquier función para la cual se define la integral anterior, y puede modificarse para otras multas además de tomar promedios ponderados.\n", 109 | "\n", 110 | "$\\leadsto$ **Terminología**\n", 111 | "1. $x$ es la entrada **input**.\n", 112 | "2. $w$ es el **kernel** o filtro\n", 113 | "3. La salida es el **feature map**. \n", 114 | "\n", 115 | "**Discretización**\n", 116 | "\n", 117 | "En el ejemplo, la idea de un sensor láser que pueda proporcionar mediciones en cada instante en el tiempo no es realista.\n", 118 | "\n", 119 | "Por lo general, cuando trabajamos con **datos** en una computadora, el tiempo será discretizado y nuestro sensor proporcionará datos a intervalos regulares. En el ejemplo, podría ser más realista suponer que nuestro láser proporciona una medición una vez por segundo. El índice de tiempo $ t $ solo puede tomar valores enteros Si ahora suponemos que $x$ y $w $ se definen solo en el entero $t$, puede definir la convolución discreta:\n", 120 | "\n", 121 | "$$\n", 122 | "s(t) = (x ∗ w)(t) = \\sum_{a=-\\infty}^{\\infty} x(a)w(t-a)\n", 123 | "$$" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "# Cuestiones prácticas\n", 131 | "\n", 132 | "$ \\leadsto $ **Sumas finitas**\n", 133 | "\n", 134 | "En las aplicaciones de aprendizaje automático, la entrada suele ser un tensor) de datos y el núcleo suele ser una matriz multidimensional de parámetros que son aprendidos por el algoritmo de aprendizaje.\n", 135 | "\n", 136 | "\n", 137 | "Porque cada elemento de la entrada y el núcleo deben almacenarse explícitamente\n", 138 | "por separado, generalmente asumimos que estas funciones son cero en todas partes excepto\n", 139 | "conjunto finito de puntos para los que almacenamos los valores. Esto significa que en la práctica se\n", 140 | "puede implementar la suma infinita como una suma sobre un número finito de\n", 141 | "elementos de la matriz.\n", 142 | "\n", 143 | "A menudo usamos convoluciones en más de un eje a la vez. Por\n", 144 | "ejemplo, si usamos una imagen bidimensional $ I $ como entrada, probablemente también queremos para usar un kernel bidimensional $ K $:\n", 145 | "\n", 146 | "$$\n", 147 | "S (i, j) = (I ∗ K) (i, j) = \\sum_ {m} \\sum_ {n} I (m, n) K (i-m, j-n).\n", 148 | "$$\n", 149 | "\n", 150 | "$ \\leadsto $ **Conmutativo**\n", 151 | "\n", 152 | "La convolución es conmutativa. Podemos escribir\n", 153 | "\n", 154 | "$$\n", 155 | "S (i, j) = (K ∗ I) (i, j) = \\sum_ {m} \\sum_ {n} I (i-m, j-n) K (m, n).\n", 156 | "$$\n", 157 | "\n", 158 | "Esta propiedad es conveniente para el estudio matemático de convoluciones.\n", 159 | "\n", 160 | "\n", 161 | "$ \\leadsto $ **Correlación cruzada**\n", 162 | "\n", 163 | "Por otro lado, muchas bibliotecas de redes neuronales implementan un\n", 164 | "función relacionada llamada correlación cruzada, que es lo mismo que convolución\n", 165 | "pero sin voltear el kernel:\n", 166 | "\n", 167 | "$$\n", 168 | "S (i, j) = (I ∗ K) (i, j) = \\sum_m \\sum_n I (i + m, j + n) K (m, n).\n", 169 | "$$\n", 170 | "\n", 171 | "Muchas bibliotecas de aprendizaje automático implementan correlación cruzada pero la llaman convolución. Haremos lo mismo.\n", 172 | "\n", 173 | "# " 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "# Estructuras intrínsecas de datos donde la convolución es útil \n", 181 | "\n", 182 | "Las imágenes, los clips de sonido y muchos otros tipos de datos similares tienen estructura característica común.\n", 183 | "\n", 184 | "Más formalmente, comparten estas propiedades importantes:\n", 185 | "\n", 186 | "1. Se almacenan como matrices multidimensionales (tensores).\n", 187 | "2. Cuentan con uno o más ejes para los que importa el orden (por ejemplo, ejes de ancho y alto para una imagen, eje de tiempo para un clip de sonido).\n", 188 | "3. Un eje, llamado eje del canal (channel), se usa para acceder a diferentes vistas de los datos (por ejemplo, los canales rojo, verde y azul de una imagen en color, o los canales izquierdo y derecho de una pista de audio estéreo)." 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "# Convoluciones discretas\n", 196 | "\n", 197 | "Supongamos que tenemos un kernel como el mostrado en la imagen. El kernel es mostrado como una distribución no normalizada. Los cálculos son como se muestran en la imagen." 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "
\n", 205 | "
\n", 206 | "\n", 207 | "
\n", 208 | "
\n", 209 | "

Cálculo de una convolución

\n", 210 | "
\n", 211 | "
" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "La siguiente figura proporciona un ejemplo de una convolución discreta. El azul claro\n", 219 | "La cuadrícula se denomina mapa de entidades de entrada. Para mantener el dibujo simple, una sola entrada\n", 220 | "el mapa de características está representado, pero no es raro tener múltiples características\n", 221 | "mapas apilados uno sobre otro, como en caso de la imágenes de color.\n", 222 | "\n", 223 | "
\n", 224 | "
\n", 225 | "\n", 226 | "
\n", 227 | "
\n", 228 | "

Cálculo completo de una convolución

\n", 229 | "
\n", 230 | "
" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "\n", 238 | "En cada ubicación, se calcula el producto entre cada elemento del núcleo y el elemento de entrada que se superpone y los resultados se resumen para obtener la salida en la ubicación actual.\n", 239 | "\n", 240 | "El procedimiento puede repetirse usando diferentes núcleos para formar tantas características de salida\n", 241 | "mapas según lo deseado. Los resultados finales de este procedimiento se denominan mapas de características de salida.\n", 242 | "\n", 243 | "Si hay múltiples mapas de de entrada (digamos tres por cada imagen), el núcleo puede que ser tridimensional, o, de manera equivalente, cada uno de los mapas s estará filtrado con un núcleo distinto, y los mapas resultantes serán resumido por elementos para producir el mapa de características de salida." 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## Relleno (padding)\n", 251 | "\n", 252 | "¿Qué sucede cuando aplica tres filtros de 5 x 5 x 3 a un volumen de entrada de 32 x 32 x 3? El volumen de salida sería 28 x 28 x 3. Observe que las dimensiones espaciales disminuyen. A medida que seguimos aplicando capas *conv*, el tamaño del volumen disminuirá más rápido de lo que quisiéramos. \n", 253 | "\n", 254 | "En las primeras capas de nuestra red, queremos preservar la mayor cantidad de información sobre el volumen de entrada original para poder extraer esas características de bajo nivel. Supongamos que queremos aplicar la misma capa *conv* pero queremos que el volumen de salida permanezca 32 x 32 x 3. Para hacer esto, podemos aplicar un relleno cero de tamaño 2 a esa capa. \n", 255 | "\n", 256 | "El relleno cero rellena el volumen de entrada con ceros alrededor del borde. Si pensamos en un relleno cero de dos, esto daría como resultado un volumen de entrada de 36 x 36 x 3.\n", 257 | "\n", 258 | "
\n", 259 | "
\n", 260 | "\n", 261 | "
\n", 262 | "
\n", 263 | "

Ilustración de relleno (padding)

\n", 264 | "
\n", 265 | "
\n" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "## Saltos (stripes)\n", 273 | "\n", 274 | "En este caso, deslizamos nuestra ventana por 1 píxel a la vez. En algunos casos, las personas deslizan las ventanas más de 1 píxel. Este número se llama zancada (stripe).\n", 275 | "\n" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "## Agregación (pooling)\n", 283 | "\n", 284 | "\n", 285 | "La capa de agrupación se usa principalmente inmediatamente después de la capa convolucional para reducir el tamaño espacial (solo ancho y alto, no profundidad). Esto reduce el número de parámetros, por lo tanto, se reduce el cálculo. Además, un número menor de parámetros evita el sobreajuste. La forma más común de agrupación es la agrupación máxima, donde tomamos un filtro de tamaño F * F y aplicamos la operación máxima sobre la parte de tamaño de F * F de la imagen.\n", 286 | "\n", 287 | "\n", 288 | "
\n", 289 | "
\n", 290 | "\n", 291 | "
\n", 292 | "
\n", 293 | "

Cálculo del pooling

\n", 294 | "
\n", 295 | "
\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "# Arquitecture de redes RNC\n", 303 | "\n", 304 | "\n", 305 | "Todos los modelos RCN siguen una arquitectura similar, como se muestra en la figura a continuación.\n", 306 | "\n", 307 | "Hay una imagen de entrada con la que estamos trabajando. Realizamos una serie de operaciones de convolución + agrupación, seguidas de varias capas completamente conectadas. Si estamos realizando una clasificación multiclase, la salida es softmax. Ahora nos sumergiremos en cada componente." 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": {}, 313 | "source": [ 314 | "
\n", 315 | "
\n", 316 | "\n", 317 | "
\n", 318 | "
\n", 319 | "

Red Neuronal Convolucionada típica

\n", 320 | "
\n", 321 | "
" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "
\n", 329 | "
\n", 330 | "\n", 331 | "
\n", 332 | "
\n", 333 | "

Ejemplo de funcionamiento

\n", 334 | "
\n", 335 | "
" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [] 344 | } 345 | ], 346 | "metadata": { 347 | "kernelspec": { 348 | "display_name": "Python 3", 349 | "language": "python", 350 | "name": "python3" 351 | }, 352 | "language_info": { 353 | "codemirror_mode": { 354 | "name": "ipython", 355 | "version": 3 356 | }, 357 | "file_extension": ".py", 358 | "mimetype": "text/x-python", 359 | "name": "python", 360 | "nbconvert_exporter": "python", 361 | "pygments_lexer": "ipython3", 362 | "version": "3.7.7" 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 4 367 | } 368 | -------------------------------------------------------------------------------- /Codigos/Keras_api_funcional_resnet/resnet-cifar10-2.2.1.py: -------------------------------------------------------------------------------- 1 | """Trains a ResNet on the CIFAR10 dataset. 2 | 3 | ResNet v1 4 | [a] Deep Residual Learning for Image Recognition 5 | https://arxiv.org/pdf/1512.03385.pdf 6 | 7 | ResNet v2 8 | [b] Identity Mappings in Deep Residual Networks 9 | https://arxiv.org/pdf/1603.05027.pdf 10 | """ 11 | 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | from tensorflow.keras.layers import Dense, Conv2D 17 | from tensorflow.keras.layers import BatchNormalization, Activation 18 | from tensorflow.keras.layers import AveragePooling2D, Input 19 | from tensorflow.keras.layers import Flatten, add 20 | from tensorflow.keras.optimizers import Adam 21 | from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler 22 | from tensorflow.keras.callbacks import ReduceLROnPlateau 23 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 24 | from tensorflow.keras.regularizers import l2 25 | from tensorflow.keras.models import Model 26 | from tensorflow.keras.datasets import cifar10 27 | from tensorflow.keras.utils import plot_model 28 | from tensorflow.keras.utils import to_categorical 29 | import numpy as np 30 | import os 31 | 32 | # training parameters 33 | batch_size = 32 # orig paper trained all networks with batch_size=128 34 | epochs = 200 35 | data_augmentation = True 36 | num_classes = 10 37 | 38 | # subtracting pixel mean improves accuracy 39 | subtract_pixel_mean = True 40 | 41 | # Model parameter 42 | # ---------------------------------------------------------------------------- 43 | # | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch 44 | # Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti 45 | # |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) 46 | # ---------------------------------------------------------------------------- 47 | # ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) 48 | # ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) 49 | # ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) 50 | # ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) 51 | # ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) 52 | # ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) 53 | # ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) 54 | # --------------------------------------------------------------------------- 55 | n = 3 56 | 57 | # model version 58 | # orig paper: version = 1 (ResNet v1), 59 | # improved ResNet: version = 2 (ResNet v2) 60 | version = 1 61 | 62 | # computed depth from supplied model parameter n 63 | if version == 1: 64 | depth = n * 6 + 2 65 | elif version == 2: 66 | depth = n * 9 + 2 67 | 68 | # model name, depth and version 69 | model_type = 'ResNet%dv%d' % (depth, version) 70 | 71 | # load the CIFAR10 data. 72 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 73 | 74 | # input image dimensions. 75 | input_shape = x_train.shape[1:] 76 | 77 | # normalize data. 78 | x_train = x_train.astype('float32') / 255 79 | x_test = x_test.astype('float32') / 255 80 | 81 | # if subtract pixel mean is enabled 82 | if subtract_pixel_mean: 83 | x_train_mean = np.mean(x_train, axis=0) 84 | x_train -= x_train_mean 85 | x_test -= x_train_mean 86 | 87 | print('x_train shape:', x_train.shape) 88 | print(x_train.shape[0], 'train samples') 89 | print(x_test.shape[0], 'test samples') 90 | print('y_train shape:', y_train.shape) 91 | 92 | # convert class vectors to binary class matrices. 93 | y_train = to_categorical(y_train, num_classes) 94 | y_test = to_categorical(y_test, num_classes) 95 | 96 | 97 | def lr_schedule(epoch): 98 | """Learning Rate Schedule 99 | 100 | Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. 101 | Called automatically every epoch as part of callbacks during training. 102 | 103 | # Arguments 104 | epoch (int): The number of epochs 105 | 106 | # Returns 107 | lr (float32): learning rate 108 | """ 109 | lr = 1e-3 110 | if epoch > 180: 111 | lr *= 0.5e-3 112 | elif epoch > 160: 113 | lr *= 1e-3 114 | elif epoch > 120: 115 | lr *= 1e-2 116 | elif epoch > 80: 117 | lr *= 1e-1 118 | print('Learning rate: ', lr) 119 | return lr 120 | 121 | 122 | def resnet_layer(inputs, 123 | num_filters=16, 124 | kernel_size=3, 125 | strides=1, 126 | activation='relu', 127 | batch_normalization=True, 128 | conv_first=True): 129 | """2D Convolution-Batch Normalization-Activation stack builder 130 | 131 | Arguments: 132 | inputs (tensor): input tensor from input image or previous layer 133 | num_filters (int): Conv2D number of filters 134 | kernel_size (int): Conv2D square kernel dimensions 135 | strides (int): Conv2D square stride dimensions 136 | activation (string): activation name 137 | batch_normalization (bool): whether to include batch normalization 138 | conv_first (bool): conv-bn-activation (True) or 139 | bn-activation-conv (False) 140 | 141 | Returns: 142 | x (tensor): tensor as input to the next layer 143 | """ 144 | conv = Conv2D(num_filters, 145 | kernel_size=kernel_size, 146 | strides=strides, 147 | padding='same', 148 | kernel_initializer='he_normal', 149 | kernel_regularizer=l2(1e-4)) 150 | 151 | x = inputs 152 | if conv_first: 153 | x = conv(x) 154 | if batch_normalization: 155 | x = BatchNormalization()(x) 156 | if activation is not None: 157 | x = Activation(activation)(x) 158 | else: 159 | if batch_normalization: 160 | x = BatchNormalization()(x) 161 | if activation is not None: 162 | x = Activation(activation)(x) 163 | x = conv(x) 164 | return x 165 | 166 | 167 | def resnet_v1(input_shape, depth, num_classes=10): 168 | """ResNet Version 1 Model builder [a] 169 | 170 | Stacks of 2 x (3 x 3) Conv2D-BN-ReLU 171 | Last ReLU is after the shortcut connection. 172 | At the beginning of each stage, the feature map size is halved 173 | (downsampled) by a convolutional layer with strides=2, while 174 | the number of filters is doubled. Within each stage, 175 | the layers have the same number filters and the 176 | same number of filters. 177 | Features maps sizes: 178 | stage 0: 32x32, 16 179 | stage 1: 16x16, 32 180 | stage 2: 8x8, 64 181 | The Number of parameters is approx the same as Table 6 of [a]: 182 | ResNet20 0.27M 183 | ResNet32 0.46M 184 | ResNet44 0.66M 185 | ResNet56 0.85M 186 | ResNet110 1.7M 187 | 188 | Arguments: 189 | input_shape (tensor): shape of input image tensor 190 | depth (int): number of core convolutional layers 191 | num_classes (int): number of classes (CIFAR10 has 10) 192 | 193 | Returns: 194 | model (Model): Keras model instance 195 | """ 196 | if (depth - 2) % 6 != 0: 197 | raise ValueError('depth should be 6n+2 (eg 20, 32, in [a])') 198 | # start model definition. 199 | num_filters = 16 200 | num_res_blocks = int((depth - 2) / 6) 201 | 202 | inputs = Input(shape=input_shape) 203 | x = resnet_layer(inputs=inputs) 204 | # instantiate the stack of residual units 205 | for stack in range(3): 206 | for res_block in range(num_res_blocks): 207 | strides = 1 208 | # first layer but not first stack 209 | if stack > 0 and res_block == 0: 210 | strides = 2 # downsample 211 | y = resnet_layer(inputs=x, 212 | num_filters=num_filters, 213 | strides=strides) 214 | y = resnet_layer(inputs=y, 215 | num_filters=num_filters, 216 | activation=None) 217 | # first layer but not first stack 218 | if stack > 0 and res_block == 0: 219 | # linear projection residual shortcut 220 | # connection to match changed dims 221 | x = resnet_layer(inputs=x, 222 | num_filters=num_filters, 223 | kernel_size=1, 224 | strides=strides, 225 | activation=None, 226 | batch_normalization=False) 227 | x = add([x, y]) 228 | x = Activation('relu')(x) 229 | num_filters *= 2 230 | 231 | # add classifier on top. 232 | # v1 does not use BN after last shortcut connection-ReLU 233 | x = AveragePooling2D(pool_size=8)(x) 234 | y = Flatten()(x) 235 | outputs = Dense(num_classes, 236 | activation='softmax', 237 | kernel_initializer='he_normal')(y) 238 | 239 | # instantiate model. 240 | model = Model(inputs=inputs, outputs=outputs) 241 | return model 242 | 243 | 244 | def resnet_v2(input_shape, depth, num_classes=10): 245 | """ResNet Version 2 Model builder [b] 246 | 247 | Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or 248 | also known as bottleneck layer. 249 | First shortcut connection per layer is 1 x 1 Conv2D. 250 | Second and onwards shortcut connection is identity. 251 | At the beginning of each stage, 252 | the feature map size is halved (downsampled) 253 | by a convolutional layer with strides=2, 254 | while the number of filter maps is 255 | doubled. Within each stage, the layers have 256 | the same number filters and the same filter map sizes. 257 | Features maps sizes: 258 | conv1 : 32x32, 16 259 | stage 0: 32x32, 64 260 | stage 1: 16x16, 128 261 | stage 2: 8x8, 256 262 | 263 | Arguments: 264 | input_shape (tensor): shape of input image tensor 265 | depth (int): number of core convolutional layers 266 | num_classes (int): number of classes (CIFAR10 has 10) 267 | 268 | Returns: 269 | model (Model): Keras model instance 270 | """ 271 | if (depth - 2) % 9 != 0: 272 | raise ValueError('depth should be 9n+2 (eg 110 in [b])') 273 | # start model definition. 274 | num_filters_in = 16 275 | num_res_blocks = int((depth - 2) / 9) 276 | 277 | inputs = Input(shape=input_shape) 278 | # v2 performs Conv2D with BN-ReLU 279 | # on input before splitting into 2 paths 280 | x = resnet_layer(inputs=inputs, 281 | num_filters=num_filters_in, 282 | conv_first=True) 283 | 284 | # instantiate the stack of residual units 285 | for stage in range(3): 286 | for res_block in range(num_res_blocks): 287 | activation = 'relu' 288 | batch_normalization = True 289 | strides = 1 290 | if stage == 0: 291 | num_filters_out = num_filters_in * 4 292 | # first layer and first stage 293 | if res_block == 0: 294 | activation = None 295 | batch_normalization = False 296 | else: 297 | num_filters_out = num_filters_in * 2 298 | # first layer but not first stage 299 | if res_block == 0: 300 | # downsample 301 | strides = 2 302 | 303 | # bottleneck residual unit 304 | y = resnet_layer(inputs=x, 305 | num_filters=num_filters_in, 306 | kernel_size=1, 307 | strides=strides, 308 | activation=activation, 309 | batch_normalization=batch_normalization, 310 | conv_first=False) 311 | y = resnet_layer(inputs=y, 312 | num_filters=num_filters_in, 313 | conv_first=False) 314 | y = resnet_layer(inputs=y, 315 | num_filters=num_filters_out, 316 | kernel_size=1, 317 | conv_first=False) 318 | if res_block == 0: 319 | # linear projection residual shortcut connection 320 | # to match changed dims 321 | x = resnet_layer(inputs=x, 322 | num_filters=num_filters_out, 323 | kernel_size=1, 324 | strides=strides, 325 | activation=None, 326 | batch_normalization=False) 327 | x = add([x, y]) 328 | 329 | num_filters_in = num_filters_out 330 | 331 | # add classifier on top. 332 | # v2 has BN-ReLU before Pooling 333 | x = BatchNormalization()(x) 334 | x = Activation('relu')(x) 335 | x = AveragePooling2D(pool_size=8)(x) 336 | y = Flatten()(x) 337 | outputs = Dense(num_classes, 338 | activation='softmax', 339 | kernel_initializer='he_normal')(y) 340 | 341 | # instantiate model. 342 | model = Model(inputs=inputs, outputs=outputs) 343 | return model 344 | 345 | 346 | if version == 2: 347 | model = resnet_v2(input_shape=input_shape, depth=depth) 348 | else: 349 | model = resnet_v1(input_shape=input_shape, depth=depth) 350 | 351 | model.compile(loss='categorical_crossentropy', 352 | optimizer=Adam(lr=lr_schedule(0)), 353 | metrics=['accuracy']) 354 | model.summary() 355 | plot_model(model, to_file="%s.png" % model_type, show_shapes=True) 356 | print(model_type) 357 | 358 | # prepare model model saving directory. 359 | save_dir = os.path.join(os.getcwd(), 'saved_models') 360 | model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type 361 | if not os.path.isdir(save_dir): 362 | os.makedirs(save_dir) 363 | filepath = os.path.join(save_dir, model_name) 364 | 365 | # prepare callbacks for model saving and for learning rate adjustment. 366 | checkpoint = ModelCheckpoint(filepath=filepath, 367 | monitor='val_accuracy', 368 | verbose=1, 369 | save_best_only=True) 370 | 371 | lr_scheduler = LearningRateScheduler(lr_schedule) 372 | 373 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), 374 | cooldown=0, 375 | patience=5, 376 | min_lr=0.5e-6) 377 | 378 | callbacks = [checkpoint, lr_reducer, lr_scheduler] 379 | 380 | # run training, with or without data augmentation. 381 | if not data_augmentation: 382 | print('Not using data augmentation.') 383 | model.fit(x_train, y_train, 384 | batch_size=batch_size, 385 | epochs=epochs, 386 | validation_data=(x_test, y_test), 387 | shuffle=True, 388 | callbacks=callbacks) 389 | else: 390 | print('Using real-time data augmentation.') 391 | # this will do preprocessing and realtime data augmentation: 392 | datagen = ImageDataGenerator( 393 | # set input mean to 0 over the dataset 394 | featurewise_center=False, 395 | # set each sample mean to 0 396 | samplewise_center=False, 397 | # divide inputs by std of dataset 398 | featurewise_std_normalization=False, 399 | # divide each input by its std 400 | samplewise_std_normalization=False, 401 | # apply ZCA whitening 402 | zca_whitening=False, 403 | # randomly rotate images in the range (deg 0 to 180) 404 | rotation_range=0, 405 | # randomly shift images horizontally 406 | width_shift_range=0.1, 407 | # randomly shift images vertically 408 | height_shift_range=0.1, 409 | # randomly flip images 410 | horizontal_flip=True, 411 | # randomly flip images 412 | vertical_flip=False) 413 | 414 | # compute quantities required for featurewise normalization 415 | # (std, mean, and principal components if ZCA whitening is applied). 416 | datagen.fit(x_train) 417 | 418 | # fit the model on the batches generated by datagen.flow(). 419 | model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 420 | validation_data=(x_test, y_test), 421 | epochs=epochs, verbose=1, 422 | steps_per_epoch=len(x_train)//batch_size, 423 | callbacks=callbacks) 424 | 425 | # score trained model 426 | scores = model.evaluate(x_test, 427 | y_test, 428 | batch_size=batch_size, 429 | verbose=0) 430 | print('Test loss:', scores[0]) 431 | print('Test accuracy:', scores[1]) 432 | -------------------------------------------------------------------------------- /Cuadernos/Autodif_XOR_func.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Diferenciación Automática con JAX
Implementación de la función XOR

" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "

1. Introducción

\n", 15 | "\n", 16 | "Con su versión actualizada de [Autograd](https://github.com/hips/autograd), [JAX](https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html) puede diferenciar automáticamente el código nativo de Python y NumPy. Puede derivarse través de un gran subconjunto de características de Python, incluidos bucles, ifs, recursión y clousures, e incluso puede tomar derivadas de derivadas de derivadas. Admite la diferenciación tanto en modo inverso como en modo directo, y los dos pueden componerse arbitrariamente en cualquier orden.\n", 17 | "\n", 18 | "Lo nuevo es que JAX usa [XLA](https://www.tensorflow.org/xla) para compilar y ejecutar su código NumPy en aceleradores, como GPU y TPU. La compilación ocurre de forma predeterminada, con las llamadas de la biblioteca compiladas y ejecutadas justo a tiempo. Pero JAX incluso le permite compilar justo a tiempo sus propias funciones de Python en núcleos optimizados para XLA utilizando una API de una función. La compilación y la diferenciación automática se pueden componer de forma arbitraria, por lo que puede expresar algoritmos sofisticados y obtener el máximo rendimiento sin tener que abandonar Python.\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 36, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# !pip install --upgrade jax jaxlib \n", 28 | "from __future__ import print_function\n", 29 | "import jax.numpy as np\n", 30 | "from jax import grad, jit, vmap\n", 31 | "from jax import random\n", 32 | "key = random.PRNGKey(0)\n", 33 | "# Current convention is to import original numpy as \"onp\"\n", 34 | "import numpy as onp\n", 35 | "import itertools\n", 36 | "\n", 37 | "import random\n", 38 | "import jax\n" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "

2. Función XOR

\n", 46 | "\n", 47 | "En este documento implementamos una red neuronal que calcula la función XOR. Esta es una función muy famosa en la historia de las redes neuronales artificiales, dado que fué la causante del primer invierno de estas.\n", 48 | "\n", 49 | "La función lógica XOR es definida por\n", 50 | "\n", 51 | "$$\n", 52 | "\\begin{aligned}\n", 53 | "f(0,0) &= 1\\\\\n", 54 | "f(0,1) &= 0\\\\\n", 55 | "f(1,0) &=0\\\\\n", 56 | "f(0,0) &=1\n", 57 | "\\end{aligned}\n", 58 | "$$\n", 59 | "\n", 60 | "Usaremos una red neuronal con una sola capa oculta con 3 neuronas y una no linealidad tangente hiperbólica, entrenada con la función de pérdida *entropía cruzada*, optimizando través del descenso de gradiente estocástico. Implementemos este modelo y la función de pérdida. Tenga en cuenta que el código es exactamente como lo escribiría en numpy estándar.\n", 61 | "

3. Funciones Requeridas

" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 37, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# define the output activation\n", 71 | "def sigmoid(x):\n", 72 | "# more stable 0.5*(np.tanh(x/2)+1)\n", 73 | " return 1.0/(1+np.exp(-x))\n", 74 | "\n", 75 | "# define the net\n", 76 | "def net(params, x):\n", 77 | " w1, b1, w2, b2 = params\n", 78 | " hidden = np.tanh(np.dot(w1,x) + b1)\n", 79 | " return (sigmoid(np.dot(w2,hidden) + b2))\n", 80 | "\n", 81 | "# cross entropy loss function\n", 82 | "def loss(params, x,y):\n", 83 | " out = net(params,x)\n", 84 | " cross_entropy = -y * np.log(out) - (1-y)*np.log(1-out) # esta es -log likelihood\n", 85 | " return cross_entropy\n", 86 | "\n", 87 | "# Utility function for testing whether the net produces the correct\n", 88 | "# output for all possible inputs\n", 89 | "def test_all_inputs(inputs, params):\n", 90 | " predictions = [int(net(params, inp) > 0.5) for inp in inputs]\n", 91 | " for inp, out in zip(inputs, predictions):\n", 92 | " print(inp, '->', out)\n", 93 | " return (predictions == [onp.bitwise_xor(*inp) for inp in inputs])\n", 94 | "\n", 95 | "def initial_params():\n", 96 | " return [\n", 97 | " onp.random.randn(3, 2), # w1\n", 98 | " onp.random.randn(3), # b1\n", 99 | " onp.random.randn(3), # w2\n", 100 | " onp.random.randn(), #b2\n", 101 | " ]" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "Hay algunos lugares donde queremos usar numpy estándar en lugar de jax.numpy. Uno de esos lugares es con la inicialización de parámetros. Nos gustaría inicializar nuestros parámetros al azar antes de entrenar nuestra red, que no es una operación para la que necesitamos derivados o compilación. JAX usa su propia biblioteca jax.random en lugar de numpy.random que proporciona un mejor soporte para la reproducibilidad (siembra) a través de diferentes transformaciones. Dado que no necesitamos transformar la inicialización de los parámetros de ninguna manera, es más simple usar numpy.random estándar en lugar de jax.random aquí." 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "

4. jax.grad

\n", 116 | "\n", 117 | "La primera transformación que usaremos es *jax.grad*. *jax.grad* toma una función y devuelve una nueva función que calcula el gradiente de la función original. Por defecto, el gradiente se toma con respecto al primer argumento; esto se puede controlar mediante el argumento argnums de jax.grad. Para usar el gradiente descen diente, queremos poder calcular el gradiente de nuestra función de pérdida con respecto a los parámetros de nuestra red neuronal. " 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 39, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "loss_grad = grad(loss)\n", 127 | "\n", 128 | "# Stochastic gradient descent\n", 129 | "# Learning rate\n", 130 | "learning_rate = 1.0\n", 131 | "# all possible inputs \n", 132 | "inputs = onp.array([[0,0],[0,1],[1,0],[1,1]])\n", 133 | "targets = onp.array([0,1,1,0])\n", 134 | "ide = onp.array([0,1,2,3])\n", 135 | "# Initialize parameters randomly\n", 136 | "params = initial_params()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 40, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "[array([[ 0.25280359, -1.05972091],\n", 148 | " [-0.86363086, -0.57583006],\n", 149 | " [-0.06159423, 0.98018524]]),\n", 150 | " array([1.37862389, 0.94909999, 1.6194938 ]),\n", 151 | " array([-0.14456356, 0.42562094, -0.34343166]),\n", 152 | " -0.6278622829106619]" 153 | ] 154 | }, 155 | "execution_count": 40, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "params" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 41, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "Iteration0\n", 174 | "[0 0] -> 0\n", 175 | "[0 1] -> 0\n", 176 | "[1 0] -> 0\n", 177 | "[1 1] -> 0\n", 178 | "Iteration100\n", 179 | "[0 0] -> 0\n", 180 | "[0 1] -> 1\n", 181 | "[1 0] -> 1\n", 182 | "[1 1] -> 1\n", 183 | "Iteration200\n", 184 | "[0 0] -> 0\n", 185 | "[0 1] -> 1\n", 186 | "[1 0] -> 1\n", 187 | "[1 1] -> 0\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "for n in itertools.count():\n", 193 | " # grab a single random input\n", 194 | " ix = ide[onp.random.choice(ide.shape[0])]\n", 195 | " # input\n", 196 | " x = inputs[ix]\n", 197 | " # output\n", 198 | " y = targets[ix]\n", 199 | " # get the gradient of the loss for this input/output losss\n", 200 | " grads = loss_grad(params,x,y)\n", 201 | " # update parameters via gradient descent\n", 202 | " params = [param - learning_rate * grad \n", 203 | " for param, grad in zip(params,grads) ]\n", 204 | " # Every 100 iterations, check whether we've solve XOR\n", 205 | " if not n %100:\n", 206 | " print('Iteration{}'.format(n))\n", 207 | " if test_all_inputs(inputs, params):\n", 208 | " break\n", 209 | " " 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 42, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "data": { 219 | "text/plain": [ 220 | "[DeviceArray([[ 3.7044861, -2.387884 ],\n", 221 | " [-2.489525 , -3.1954105],\n", 222 | " [-2.625351 , 3.7185783]], dtype=float32),\n", 223 | " DeviceArray([0.79461575, 0.5428428 , 1.2141498 ], dtype=float32),\n", 224 | " DeviceArray([-3.5103402, -2.874178 , -3.5018072], dtype=float32),\n", 225 | " DeviceArray(0.32925892, dtype=float32)]" 226 | ] 227 | }, 228 | "execution_count": 42, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "params" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 43, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "[DeviceArray([[ 3.7044861, -2.387884 ],\n", 246 | " [-2.489525 , -3.1954105],\n", 247 | " [-2.625351 , 3.7185783]], dtype=float32),\n", 248 | " DeviceArray([0.79461575, 0.5428428 , 1.2141498 ], dtype=float32),\n", 249 | " DeviceArray([-3.5103402, -2.874178 , -3.5018072], dtype=float32),\n", 250 | " DeviceArray(0.32925892, dtype=float32)]" 251 | ] 252 | }, 253 | "execution_count": 43, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "params" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "

5. jax.jit

\n", 267 | "\n", 268 | "Si bien el código numpy cuidadosamente escrito puede ser razonablemente eficaz, para el aprendizaje automático moderno queremos que nuestro código se ejecute lo más rápido posible. Esto a menudo implica ejecutar nuestro código en diferentes \"aceleradores\" como GPU o TPU. *JAX* proporciona un compilador *JIT* (justo a tiempo) que toma una función estándar de *Python/numpy* y la compila para ejecutarse eficientemente en un acelerador. Compilar una función también evita la sobrecarga del intérprete de Python, lo que ayuda tanto si está utilizando un acelerador como si no. En total, *jax.jit* puede acelerar drásticamente su código esencialmente sin sobrecarga de codificación; solo tiene que pedirle a JAX que compile la función por usted. Incluso nuestra pequeña red neuronal puede ver una aceleración bastante dramática al usar *jax.jit*:" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 44, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "16.8 ms ± 755 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", 281 | "641 µs ± 124 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "# Time the original gradient function\n", 287 | "%timeit loss_grad(params, x, y)\n", 288 | "loss_grad = jax.jit(jax.grad(loss))\n", 289 | "# Run once to trigger JIT compilation\n", 290 | "loss_grad(params, x, y)\n", 291 | "%timeit loss_grad(params, x, y)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "Let us run again the loop" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 46, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "name": "stdout", 308 | "output_type": "stream", 309 | "text": [ 310 | "Iteration0\n", 311 | "[0 0] -> 0\n", 312 | "[0 1] -> 1\n", 313 | "[1 0] -> 1\n", 314 | "[1 1] -> 0\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "for n in itertools.count():\n", 320 | " # grab a single random input\n", 321 | " ix = ide[onp.random.choice(ide.shape[0])]\n", 322 | " # input\n", 323 | " x = inputs[ix]\n", 324 | " # output\n", 325 | " y = targets[ix]\n", 326 | " # get the gradient of the loss for this input/output losss\n", 327 | " grads = loss_grad(params,x,y)\n", 328 | " # update parameters via gradient descent\n", 329 | " params = [param - learning_rate * grad \n", 330 | " for param, grad in zip(params,grads) ]\n", 331 | " # Every 100 iterations, check whether we've solve XOR\n", 332 | " if not n %100:\n", 333 | " print('Iteration{}'.format(n))\n", 334 | " if test_all_inputs(inputs, params):\n", 335 | " break\n", 336 | " " 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "

6. jax.vmap

\n", 344 | "\n", 345 | "Hemos estado entrenando nuestra red neuronal en un solo ejemplo a la vez. Este es el \"verdadero\" descenso de gradiente estocástico; en la práctica, cuando entrenamos modelos modernos de aprendizaje automático, realizamos un descenso de gradiente “minibatch” donde promediamos los gradientes de pérdida en un mini lote de ejemplos en cada paso del descenso de gradiente. \n", 346 | "\n", 347 | "*JAX* proporciona *jax.vmap*, que es una transformación que automáticamente \"vectoriza\" una función. Lo que esto significa es que le permite calcular la salida de una función en paralelo sobre algún eje de la entrada. Para nosotros, esto significa que podemos aplicar la transformación de la función *jax.vmap* e inmediatamente obtener una versión de nuestro gradiente de la función de pérdida que es susceptible de utilizar un minibatch de ejemplos.\n", 348 | "\n", 349 | "*jax.vmap* toma argumentos adicionales:\n", 350 | "\n", 351 | "- *in_axes* es una tupla o número entero que le dice a *JAX* sobre qué ejes deben paralelizarse los argumentos de la función. La tupla debe tener la misma longitud que el número de argumentos de la función que se está vectorizando, o debe ser un número entero cuando solo hay un argumento. En nuestro ejemplo, usaremos *(None, 0, 0)*, que significa \"no paralelizar sobre el primer argumento (parámetros), y paralelizar sobre la primera dimensión (cero) del segundo y tercer argumento (x e y) \".\n", 352 | "- *out_axes* es análogo a in_axes, excepto que especifica qué ejes de la salida de la función se deben paralelizar. En nuestro caso, usaremos 0, que significa paralelizar sobre la primera dimensión (cero) de la única salida de la función (los gradientes de pérdida).\n", 353 | "\n", 354 | "Tenga en cuenta que tendremos que cambiar un poco el código de entrenamiento: necesitamos obtener un lote de datos en lugar de un solo ejemplo a la vez, y debemos promediar los gradientes sobre el lote antes de aplicarlos para actualizar los parámetros." 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 47, 360 | "metadata": {}, 361 | "outputs": [ 362 | { 363 | "name": "stdout", 364 | "output_type": "stream", 365 | "text": [ 366 | "Iteration 0\n", 367 | "[0 0] -> 0\n", 368 | "[0 1] -> 0\n", 369 | "[1 0] -> 0\n", 370 | "[1 1] -> 0\n", 371 | "Iteration 100\n", 372 | "[0 0] -> 0\n", 373 | "[0 1] -> 1\n", 374 | "[1 0] -> 1\n", 375 | "[1 1] -> 0\n" 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "loss_grad = jax.jit(jax.vmap(jax.grad(loss), in_axes=(None, 0, 0), out_axes=0))\n", 381 | "\n", 382 | "params = initial_params()\n", 383 | "\n", 384 | "batch_size = 100\n", 385 | "\n", 386 | "for n in itertools.count():\n", 387 | " # Generate a batch of inputs\n", 388 | " x = inputs[onp.random.choice(inputs.shape[0], size=batch_size)]\n", 389 | " y = onp.bitwise_xor(x[:, 0], x[:, 1])\n", 390 | " # The call to loss_grad remains the same!\n", 391 | " grads = loss_grad(params, x, y)\n", 392 | " # Note that we now need to average gradients over the batch\n", 393 | " params = [param - learning_rate * np.mean(grad, axis=0)\n", 394 | " for param, grad in zip(params, grads)]\n", 395 | " if not n % 100:\n", 396 | " print('Iteration {}'.format(n))\n", 397 | " if test_all_inputs(inputs, params):\n", 398 | " break" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 49, 404 | "metadata": {}, 405 | "outputs": [ 406 | { 407 | "data": { 408 | "text/plain": [ 409 | "[DeviceArray([[-2.7300384 , 2.8734598 ],\n", 410 | " [ 0.0461734 , 0.20690341],\n", 411 | " [-2.25555 , 2.1526465 ]], dtype=float32),\n", 412 | " DeviceArray([ 1.4270214, 1.1485996, -1.0554334], dtype=float32),\n", 413 | " DeviceArray([-3.0328572, 1.0033779, 3.0124404], dtype=float32),\n", 414 | " DeviceArray(1.8351157, dtype=float32)]" 415 | ] 416 | }, 417 | "execution_count": 49, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "params" 424 | ] 425 | } 426 | ], 427 | "metadata": { 428 | "kernelspec": { 429 | "display_name": "Python 3", 430 | "language": "python", 431 | "name": "python3" 432 | }, 433 | "language_info": { 434 | "codemirror_mode": { 435 | "name": "ipython", 436 | "version": 3 437 | }, 438 | "file_extension": ".py", 439 | "mimetype": "text/x-python", 440 | "name": "python", 441 | "nbconvert_exporter": "python", 442 | "pygments_lexer": "ipython3", 443 | "version": "3.7.6" 444 | } 445 | }, 446 | "nbformat": 4, 447 | "nbformat_minor": 4 448 | } 449 | -------------------------------------------------------------------------------- /Cuadernos/Regresion_Lineal_tf_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

Modelo Lineal de Regresión Simple

\n", 8 | "\n", 9 | "

Autor

\n", 10 | "\n", 11 | "1. Alvaro Mauricio Montenegro Díaz, ammontenegrod@unal.edu.co\n", 12 | "2. Daniel Mauricio Montenegro Reyes, dextronomo@gmail.com \n", 13 | "\n", 14 | "

Fork

\n", 15 | "\n", 16 | "

Referencias

\n" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "

1. Introducción

\n", 24 | "\n", 25 | "En este cuaderno se introduce la diferenciación automática usando Tensorflow 2.1 . Como ilustración implementaremos un modelo lineal simple de regresión." 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "

2. Importar módulos requeridos

\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 6, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "2.1.0\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "from __future__ import absolute_import, division, print_function, unicode_literals\n", 50 | "\n", 51 | "import numpy as np\n", 52 | "import pandas as pd\n", 53 | "import seaborn as sb\n", 54 | "import matplotlib.pyplot as plt\n", 55 | "import tensorflow as tf\n", 56 | "import random\n", 57 | "\n", 58 | "print(tf.__version__)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "

3. Creación de datos sintéticos

\n", 66 | "\n", 67 | "Para el ejemplo vamos a asumir que los datos pueden ser modelados mediante una línea recta de la forma\n", 68 | "\n", 69 | "$$\n", 70 | "\\begin{equation}\n", 71 | "y = wx + b \n", 72 | "\\end{equation}\n", 73 | "$$\n", 74 | "\n", 75 | "en donde $w= 2$ y $b=1$. Vamos a suponer que la variable dependiente $y$ esta afectada por un error de observación aleatorio $e \\sim N(0,0.3^2)$. \n", 76 | "\n", 77 | "Los datos sintéticos son generados como sigue. Generamos $n=120$ valores $x_i$ equidistantes en el intervalo $(0,3)$. Los valores de la variable dependiente $y_i$ son generados usando la ecuación de la recta más el error: $y_i = w x_i +b + e_i$." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 7, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "w = 1.2\n", 87 | "b = 2\n", 88 | "n = 1200\n", 89 | "scale= 1.0\n", 90 | "\n", 91 | "x = np.linspace(0,6,n)\n", 92 | "y = w*x + b + np.random.normal(scale=scale, size= x.shape[0])\n", 93 | "# pandas object to descriptive analysis\n", 94 | "data = pd.DataFrame({'x': x, 'y': y})" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/html": [ 105 | "
\n", 106 | "\n", 119 | "\n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | "
xy
00.0000002.307600
10.0050042.072407
20.0100080.994219
30.0150131.777229
40.0200171.410617
\n", 155 | "
" 156 | ], 157 | "text/plain": [ 158 | " x y\n", 159 | "0 0.000000 2.307600\n", 160 | "1 0.005004 2.072407\n", 161 | "2 0.010008 0.994219\n", 162 | "3 0.015013 1.777229\n", 163 | "4 0.020017 1.410617" 164 | ] 165 | }, 166 | "execution_count": 8, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "data.head()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "

4. Primer acercamiento gráfico a los datos

\n", 180 | "\n", 181 | "Revisaremos las relaciones entre variables, usando pairplot y un gráfico de correlación" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "#sb.scatterplot(data_f, label =\" Datos sintéticos\")\n", 191 | "#sb.pairplot(data_f, diag_kind =\"kde\")\n", 192 | "#plt.show()\n", 193 | "\n", 194 | "# Create an lmplot\n", 195 | "grid = sb.lmplot('x', 'y', data, height=7, truncate=True, markers='.', scatter_kws={\"s\": 100})\n", 196 | "\n", 197 | "# Rotate the labels on x-axis\n", 198 | "grid.set_xticklabels(rotation=30)\n", 199 | "# Access the Figure\n", 200 | "fig = grid.fig \n", 201 | "\n", 202 | "# Add a title to the Figure\n", 203 | "fig.suptitle('Regresión lineal \\n $y = 1.2x + 1 + \\epsilon$', fontsize=20)\n", 204 | "# Show the plot\n", 205 | "plt.show()" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 10, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/html": [ 216 | "\n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | "
x y
x10.897203
y0.8972031
" 241 | ], 242 | "text/plain": [ 243 | "" 244 | ] 245 | }, 246 | "execution_count": 10, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "correlation_data = data.corr()\n", 253 | "correlation_data.style.background_gradient(cmap='coolwarm', axis =None)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "

5 Estadísticas Descriptivas

\n", 261 | "\n", 262 | "Tendencia central y dispersión" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 11, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/html": [ 273 | "
\n", 274 | "\n", 287 | "\n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | "
countmeanstdmin25%50%75%max
x1200.03.0000001.7342180.0000001.5000003.0000004.500006.000000
y1200.05.5910032.2919170.3174783.7397245.5841837.4165710.917964
\n", 326 | "
" 327 | ], 328 | "text/plain": [ 329 | " count mean std min 25% 50% 75% \\\n", 330 | "x 1200.0 3.000000 1.734218 0.000000 1.500000 3.000000 4.50000 \n", 331 | "y 1200.0 5.591003 2.291917 0.317478 3.739724 5.584183 7.41657 \n", 332 | "\n", 333 | " max \n", 334 | "x 6.000000 \n", 335 | "y 10.917964 " 336 | ] 337 | }, 338 | "execution_count": 11, 339 | "metadata": {}, 340 | "output_type": "execute_result" 341 | } 342 | ], 343 | "source": [ 344 | "stats = data.describe()\n", 345 | "stats = stats.transpose()\n", 346 | "stats" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "

5. Prepara datos para entrenamiento y validación

\n" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 12, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "# index to sample\n", 363 | "size_test = 0.2 # 20% for testing\n", 364 | "n_test = np.int(n*size_test )\n", 365 | "n_train = np.int(n-n_test)\n", 366 | "\n", 367 | "# index for sample for testing and training\n", 368 | "test_id = np.random.choice(range(x.shape[0]),n_test,replace =False)\n", 369 | "train_id = np.setdiff1d(range(x.shape[0]), test_id , assume_unique=True)\n", 370 | "\n", 371 | "# extract the samples\n", 372 | "x_test = x[test_id]\n", 373 | "x_train = x[train_id]\n", 374 | "y_test = y[test_id]\n", 375 | "y_train = y[train_id]" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "

6. Crea una clase Linear Model

\n", 383 | "\n", 384 | "La clase tiene dos métodos: **init** y **call**. *init* inicializa *w* (weight) y *b* (bias) aleatoriamente y *call* retorna los valores usando la ecuación $y = wx + b$." 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 13, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "#LinearModel class\n", 394 | "import numpy as np\n", 395 | "\n", 396 | "class LinearModel:\n", 397 | " def __call__(self,x):\n", 398 | " return self.weight * x + self.bias\n", 399 | " \n", 400 | " def __init__(self):\n", 401 | " self.weight = tf.Variable(np.random.rand()) # generate a value in [0,1)\n", 402 | " self.bias = tf.Variable(np.random.rand()) # generate a value in [0,1)\n" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": {}, 408 | "source": [ 409 | "

7. Define las funciones pérdida (loss) y entrenamiento (train)

\n", 410 | "\n", 411 | "La función pérdida será el error cuadrático medio definido por\n", 412 | "\n", 413 | "$$\n", 414 | "\\begin{equation}\n", 415 | "loss = \\frac{1}{n} \\sum_{i=1}^{n} (y_i-\\tilde{y}_i)^2,\n", 416 | "\\end{equation}\n", 417 | "$$\n", 418 | "\n", 419 | "en donde $\\tilde{y}_i$ es el valor predicho (pred) por el modelo para $x_i$.\n", 420 | "\n", 421 | "En la función de entrenamiento vamos a introducir diferenciación automática con un contexto de *tf.GradientTape*. El método de optimización es gradiente descendiente,que usa una tasa de aprendizaje *lr*." 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 14, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "# loss function\n", 431 | "def loss(y, pred):\n", 432 | " return tf.reduce_mean(tf.math.square(y-pred))\n", 433 | "\n", 434 | "# train function\n", 435 | "def train(linear_model, x,y, lr= 0.12):\n", 436 | " with tf.GradientTape() as t:\n", 437 | " t.watch([linear_model.weight, linear_model.bias])\n", 438 | " current_loss = loss(y, linear_model(x))\n", 439 | " \n", 440 | " lr_weight, lr_bias = t.gradient(current_loss, [linear_model.weight, linear_model.bias])\n", 441 | " linear_model.weight.assign_sub(lr*lr_weight) # linear_model.weight is a tensor\n", 442 | " linear_model.bias.assign_sub(lr*lr_bias) # linear_model.bias is a tensor\n", 443 | " " 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "

8. Entrenamiento del modelo

\n", 451 | "\n", 452 | "Estamos listos para correr el modelo de regresión lineal. Diremos: *entrenar el modelo*. Definimos un número de epochs (iteraciones). Por defecto usaremos el valor 0.06 como rata de aprendizaje. Usaremos 200 epochs en este experimento." 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": null, 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "linear_model = LinearModel()\n", 462 | "weights, biases = [], []\n", 463 | "train_loss_p, test_loss_p = [], []\n", 464 | "epochs = 200\n", 465 | "\n", 466 | "for epoch in range(epochs):\n", 467 | " weights.append(linear_model.weight.numpy())\n", 468 | " biases.append(linear_model.bias.numpy())\n", 469 | " train(linear_model, x_train, y_train, lr =0.06)\n", 470 | " train_loss = loss(y_train,linear_model(x_train))\n", 471 | " test_loss = loss(y_test,linear_model(x_test))\n", 472 | " # save loss values to plot\n", 473 | " train_loss_p.append(train_loss)\n", 474 | " test_loss_p.append(test_loss)\n", 475 | " print(f\"Epoch {epoch}: train loss {train_loss.numpy()}: test loss {test_loss.numpy()}\")" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": {}, 481 | "source": [ 482 | "

9. Validación. Gráficos de la función de pérdida

" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "# plots\n", 492 | "def loss_plots(train, test, y_min =0,y_max=2):\n", 493 | " plt.plot(train,color='red', label='train loss')\n", 494 | " plt.plot(test, color='blue', label='test loss')\n", 495 | " plt.ylim(y_min,y_max)\n", 496 | " plt.xlabel('Epoch')\n", 497 | " plt.ylabel('Loss')\n", 498 | " plt.legend()\n", 499 | " plt.show()\n", 500 | "\n", 501 | "#\n", 502 | "loss_plots(train_loss_p, test_loss_p,y_min=0.5,y_max = 3.0)" 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "

10. Extracción de parámetros

" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 17, 515 | "metadata": {}, 516 | "outputs": [ 517 | { 518 | "name": "stdout", 519 | "output_type": "stream", 520 | "text": [ 521 | "weight = 1.193455\n", 522 | "bias = 2.0037918\n" 523 | ] 524 | } 525 | ], 526 | "source": [ 527 | "print(\"weight =\", linear_model.weight.numpy())\n", 528 | "print(\"bias =\",linear_model.bias.numpy())" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": null, 534 | "metadata": {}, 535 | "outputs": [], 536 | "source": [] 537 | } 538 | ], 539 | "metadata": { 540 | "kernelspec": { 541 | "display_name": "Python 3", 542 | "language": "python", 543 | "name": "python3" 544 | }, 545 | "language_info": { 546 | "codemirror_mode": { 547 | "name": "ipython", 548 | "version": 3 549 | }, 550 | "file_extension": ".py", 551 | "mimetype": "text/x-python", 552 | "name": "python", 553 | "nbconvert_exporter": "python", 554 | "pygments_lexer": "ipython3", 555 | "version": "3.7.6" 556 | } 557 | }, 558 | "nbformat": 4, 559 | "nbformat_minor": 4 560 | } 561 | --------------------------------------------------------------------------------