├── .gitignore ├── LICENSE ├── README.md ├── notebooks ├── 01_mac0460_Q.ipynb ├── 02_mac0460_Q.ipynb ├── EP4_MNIST.ipynb ├── README.md ├── cnn.ipynb ├── download_scripts │ ├── efigi.sh │ └── pi_car.sh ├── images │ ├── arbitrary_padding_no_strides_transposed.gif │ ├── cnn_arch.png │ ├── conv.png │ ├── dfn.png │ ├── dfn_description.png │ ├── dwarf.png │ ├── elliptical.png │ ├── image_classification.png │ ├── irregular.png │ ├── lenticular.png │ ├── logistic_regression.png │ ├── multchain.png │ ├── pytorch-logo-dark.png │ ├── simple_example.png │ ├── spiral.png │ ├── test_track.png │ ├── train_track.png │ └── vector_graph.png ├── pytorch_basico1.ipynb ├── pytorch_basico2.ipynb ├── regressao_linear1.ipynb ├── regressao_linear2.ipynb ├── transfer_learning.ipynb └── util │ ├── DataHolder.py │ ├── __init__.py │ ├── plots.py │ └── util.py ├── requirements.txt └── slides ├── LICENSE ├── README.md ├── backprop1 ├── Makefile ├── TikzFiles │ ├── 3x3.tex │ ├── 5x5.tex │ ├── BackPropScalar.tex │ ├── Compgraph1.tex │ ├── Compgraph2.tex │ ├── Compgraph3.tex │ ├── Compgraph4.tex │ ├── DFNclassification.tex │ ├── DFNclassification2.tex │ ├── DeepNN.tex │ ├── Dropout1.tex │ ├── Dropout2.tex │ ├── Entropy1.tex │ ├── Entropy2.tex │ ├── Kernel_image_pro.tex │ ├── KullbackLeibler.tex │ ├── NN.tex │ ├── OldNN1.tex │ ├── OldNN2.tex │ ├── OldNN3.tex │ ├── ReLU.tex │ ├── Sigmoid.tex │ ├── Softmax.tex │ ├── b1_path1.tex │ ├── b1_path1_grad.tex │ ├── b1_path2.tex │ ├── b1_path2_grad.tex │ ├── b1_path3.tex │ ├── b1_path3_grad.tex │ ├── batch_example_values.tex │ ├── batch_graph.tex │ ├── chain_rule_nodes.tex │ ├── convnet_arch.tex │ ├── div.tex │ ├── examples_values.tex │ ├── exp.tex │ ├── expanded_graph_0.tex │ ├── expanded_graph_1.tex │ ├── expanded_graph_10.tex │ ├── expanded_graph_11.tex │ ├── expanded_graph_12.tex │ ├── expanded_graph_13.tex │ ├── expanded_graph_14.tex │ ├── expanded_graph_2.tex │ ├── expanded_graph_3.tex │ ├── expanded_graph_4.tex │ ├── expanded_graph_5.tex │ ├── expanded_graph_6.tex │ ├── expanded_graph_7.tex │ ├── expanded_graph_8.tex │ ├── expanded_graph_9.tex │ ├── fashionMNIST.tex │ ├── feature_engineering.tex │ ├── feature_map.tex │ ├── log.tex │ ├── lr_graph.tex │ ├── lr_graph1.tex │ ├── lr_graph_grad.tex │ ├── minus1.tex │ ├── mult.tex │ ├── multiple_paths.tex │ ├── perceptron.tex │ ├── simple_example.tex │ ├── simple_example0.tex │ ├── simple_exampleCR.tex │ ├── soma.tex │ └── squ.tex ├── all_imports.tex ├── all_new_commands.tex ├── definitions │ ├── colors.tex │ └── styles.tex ├── header.tex ├── images │ └── logo.png ├── main.tex ├── my_references.bib └── pdf │ └── BackpropLecture1.pdf ├── backprop2 ├── Makefile ├── all_imports.tex ├── all_new_commands.tex ├── definitions │ ├── colors.tex │ └── styles.tex ├── header.tex ├── images │ └── logo.png ├── main.tex ├── my_references.bib └── pdf │ └── BackpropLecture2.pdf ├── images └── cc-logo.png └── install.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 MLIME 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MAC0460 - Introdução ao aprendizado de máquina 2 | [![License](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/MLIME/MAC0460/blob/master/LICENSE) 3 | 4 | Nesse repositório estão os diferentes materais da disciplina **MAC0460 - Introdução ao aprendizado de máquina** ministrada no Instituto de Matemática e Estatística (IME) da Universidade de São Paulo (USP). Maiores informações podem ser encontradas [aqui](https://uspdigital.usp.br/jupiterweb/obterDisciplina?sgldis=MAC0460). 5 | 6 | Um pedaço da parte teórica do curso foi baseado no curso [**Learning from Data**](https://work.caltech.edu/telecourse.html), vale a pena conferir esse material. 7 | 8 | ## Uso 9 | 10 | Na pasta **notebooks** estão os exercícios práticos dados no curso, na pasta **slides** estão os materiais didáticos de algumas aulas. 11 | 12 | ### Instalação (Ubuntu / Debian) 13 | Para quem desejar, recomendamos a utilização de ambientes virtuais. 14 | [Instale os pacotes necessários](http://railslide.io/virtualenvwrapper-python3.html) e crie um novo ambiente, ex: 15 | 16 | ``` 17 | $ mkvirtualenv mac0460 18 | $ workon mac0460 19 | ``` 20 | 21 | Para instalar o [Jupyter Notebook](http://jupyter.org/) basta rodar: 22 | 23 | ``` 24 | $ sudo apt install python3-pip python3-tk 25 | $ pip3 install --upgrade pip3 26 | $ sudo pip3 install jupyter 27 | ``` 28 | 29 | Vamos usar uma série de bibliotecas de Python ao longo da disciplina, para instalar as principais rode: 30 | 31 | ``` 32 | $ pip3 install -r requirements.txt 33 | ``` 34 | 35 | É preciso ir no site do [PyTorch](https://pytorch.org/) para instalar essa biblioteca. 36 | 37 | Para acessar os notebooks basta rodar: 38 | 39 | ``` 40 | $ cd notebooks 41 | $ jupyter notebook 42 | ``` 43 | 44 | ### Pontos importantes 45 | 46 | - Usamos a biblioteca de deep learning PyTorch. Essa biblioteca esta mudando constantemente. Os exercícios práticos foram desenvolvidos para a versão 0.4.0. Não é garantido que os notebooks vão funcionar para as versões futuras. 47 | 48 | - Partimos do pressuposto que o usuário está usando Ubuntu ou Debian. A compatibilidade com outros sistemas não foi testada. 49 | 50 | - Os notebooks foram feitos para a versão 3.5 do Python 51 | 52 | 53 | ### Referências 54 | 55 | Vale a pena se familiarizar com cada uma das bibliotecas que vão ser usadas: 56 | - [Jupyter](https://jupyter.readthedocs.io/en/latest/) 57 | - [NumPy](https://docs.scipy.org/doc/numpy-dev/user/quickstart.html) 58 | - [Matplotlib](https://matplotlib.org/tutorials/index.html) 59 | - [PyTorch](https://pytorch.org/tutorials/) 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /notebooks/02_mac0460_Q.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Objetivos\n", 8 | "\n", 9 | "Implementar e testar a solução analítica para o problema de regressão linear \n", 10 | "\n", 11 | "Este notebook depende de módulos auxiliares que estão na pasta util/ mais alguns imports do Python" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Regressão linear: equações normais\n", 19 | "\n", 20 | "\n", 21 | "Dado um dataset $\\{(\\mathbf{x}_{1}, y_{1}), \\dots ,(\\mathbf{x}_{N}, y_{N})\\}$ onde $\\mathbf{x}_i \\in \\mathbb{R}^{d}$ e $y_i \\in \\mathbb{R}$, queremos aproximar a função desconhecida $f:\\mathbb{R}^{d} \\rightarrow \\mathbb{R}$ (lembrando que $y_i =f(\\mathbf{x}_i)$) por meio de um modelo linear $h$:\n", 22 | "$$\n", 23 | "h(\\mathbf{x}_{i}; \\mathbf{w}, b) = \\mathbf{w}^\\top \\mathbf{x}_{i} + b\n", 24 | "$$\n", 25 | "\n", 26 | "Note que $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ é na verdade uma [transformação afim](https://en.wikipedia.org/wiki/Affine_transformation) de $\\mathbf{x}_{i}$. Como em outros lugares, vamos usar o termo \"linear\" também para caracterizar uma transformação afim.\n", 27 | "\n", 28 | "A saída de $h$ é uma transformação linear de $\\mathbf{x}_{i}$. Usamos a notação $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ para deixar claro que $h$ é um modelo parametrizado, i.e., a transformação $h$ é definida pelos parâmetros $\\mathbf{w}$ e $b$. Podemos pensar no vetor $\\mathbf{w}$ como um vetor de *pesos* controlando o efeito de cada *feature* na predição.\n", 29 | "\n", 30 | "Adicionando uma feature a mais na obsevação $\\mathbf{x}_{i}$ (com o valor 1) -- coordenada artificial -- podemos simplificar a notação do modelo:\n", 31 | "\n", 32 | "$$\n", 33 | "h(\\mathbf{x}_{i}; \\mathbf{w}) = \\hat{y}_{i} = \\mathbf{w}^\\top \\mathbf{x}_{i}\n", 34 | "$$\n", 35 | "\n", 36 | "Gostaríamos de encontrar os melhores parâmetros $\\mathbf{w}$ de modo que a predição $\\hat{y}_{i}$ seja a mais próxima de $y_{i}$ de acordo com alguma métrica de erro. Usando o *erro quadrárico médio* como tal métrica podemos obter a seguinte função de custo:\n", 37 | "\n", 38 | "\\begin{equation}\n", 39 | "J(\\mathbf{w}) = \\frac{1}{N}\\sum_{i=1}^{N}(\\hat{y}_{i} - y_{i})^{2}\n", 40 | "\\end{equation}\n", 41 | "\n", 42 | "Desse modo, a tarefa de achar a função $h$ mais próxima de $f$ se torna a tarefa de encontrar os valores de $\\mathbf{w}$ para minimizar $J(\\mathbf{w})$.\n", 43 | "\n", 44 | "**Aqui vamos começar a explorar esse modelo olhando para um dataset bem simples**\n" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# all imports\n", 54 | "import numpy as np\n", 55 | "import time\n", 56 | "from util.util import get_housing_prices_data, r_squared\n", 57 | "from util.plots import plot_points_regression \n", 58 | "\n", 59 | "%matplotlib inline" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### O dataset\n", 67 | "\n", 68 | "Os dados que vamos trabalhar são dados artificiais. Iremos gerar 100 observações com apenas uma *feature* e um valor asociado a cada uma delas. Podemos interpretar essas observações como sendo um par *(metros quadrados de um imóvel, preço desse imóvel em $)*. Nossa tarefa é construir um modelo que consiga predizer o valor dos imóveis, dadas as suas áreas." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "X, y = get_housing_prices_data(N=100)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "### Plotando os dados\n", 85 | "\n", 86 | "Acima temos algumas informações sobre os dados. Podemos também visualizar cada ponto." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "plot_points_regression(X,\n", 96 | " y,\n", 97 | " title='Real estate prices prediction',\n", 98 | " xlabel=\"m\\u00b2\",\n", 99 | " ylabel='$')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "### Equações normais\n", 107 | "\n", 108 | "Dados $f:\\mathbb{R}^{n\\times m} \\rightarrow \\mathbb{R}$ e $\\mathbf{A} \\in \\mathbb{R}^{n\\times m}$, definimos o gradiente de $f$ com respeito a $\\mathbf{A}$ como:\n", 109 | "\n", 110 | "\\begin{equation*}\n", 111 | "\\nabla_{\\mathbf{A}}f = \\frac{\\partial f}{\\partial \\mathbf{A}} = \\begin{bmatrix}\n", 112 | "\\frac{\\partial f}{\\partial \\mathbf{A}_{1,1}} & \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{1,m}} \\\\\n", 113 | "\\vdots & \\ddots & \\vdots \\\\\n", 114 | "\\frac{\\partial f}{\\partial \\mathbf{A}_{n,1}} & \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{n,m}}\n", 115 | "\\end{bmatrix}\n", 116 | "\\end{equation*}\n", 117 | "\n", 118 | "Seja $\\mathbf{X} \\in \\mathbb{R}^{N\\times m}$ a matriz cujas linhas são as observações do dataset (também chamada de *design matrix*) e seja $\\mathbf{y} \\in \\mathbb{R}^{N}$ o vetor contendo todos os valores de $y_{i}$ (i.e., $\\mathbf{X}_{i,:} = \\mathbf{x}_{i}$ e $\\mathbf{y}_{i} =y_{i}$). É fácil checar que: \n", 119 | "\n", 120 | "\n", 121 | "\\begin{equation}\n", 122 | "J(\\mathbf{w}) = \\frac{1}{N}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})^{T}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})\n", 123 | "\\end{equation}\n", 124 | "\n", 125 | "\n", 126 | "Usando certos conceitos básicos de derivada com matrizes podemos chegar no gradiente de $J(\\mathbf{w})$ com respeito a $\\mathbf{w}$:\n", 127 | "\n", 128 | "\\begin{equation}\n", 129 | "\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = \\frac{2}{N} (\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} -\\mathbf{X}^{T}\\mathbf{y}) \n", 130 | "\\end{equation}\n", 131 | "\n", 132 | "Assim, quando $\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = 0$ temos que \n", 133 | "\n", 134 | "\\begin{equation}\n", 135 | "\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} = \\mathbf{X}^{T}\\mathbf{y}\n", 136 | "\\end{equation}\n", 137 | "\n", 138 | "Desse modo,\n", 139 | "\n", 140 | "\\begin{equation}\n", 141 | "\\mathbf{w} = (\\mathbf{X}^{T}\\mathbf{X})^{-1}\\mathbf{X}^{T}\\mathbf{y}\n", 142 | "\\end{equation}\n", 143 | "\n", 144 | "A solução dada por essas equações são conhecidas como **equações normais**. Note que esse tipo de solução tem um custo, pois conforme cresce o número de variáveis, o tempo da inversão da matriz fica proibitivo. Vale a pena ler [esse material](http://cs229.stanford.edu/notes/cs229-notes1.pdf) para ver o argumento com mais detalhes." 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "### Exercício\n", 152 | "Usando apenas a biblioteca **NumPy** (uma introdução rápida a certas funcionalidades dessa biblioteca pode ser encontrada [aqui](http://cs231n.github.io/python-numpy-tutorial/)), complete as duas funções abaixo. Lembre que $\\mathbf{X} \\in \\mathbb{R}^{N\\times d}$; assim, será preciso adicionar uma componente com valor 1 a cada observação em $\\mathbf{X}$ para realizar a computação descrita acima." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "def normal_equation_weights(X, y):\n", 162 | " \"\"\"\n", 163 | " Calculates the weights of a linear function using the normal equation method.\n", 164 | " You should add into X a new column with 1s.\n", 165 | "\n", 166 | " :param X: design matrix\n", 167 | " :type X: np.ndarray(shape=(N, d))\n", 168 | " :param y: regression targets\n", 169 | " :type y: np.ndarray(shape=(N, 1))\n", 170 | " :return: weight vector\n", 171 | " :rtype: np.ndarray(shape=(d+1, 1))\n", 172 | " \"\"\"\n", 173 | " \n", 174 | " # START OF YOUR CODE:\n", 175 | " raise NotImplementedError(\"Falta implementar normal_equation_weights()\")\n", 176 | " # END YOUR CODE\n", 177 | "\n", 178 | " return w" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "# teste da função normal_equation_weights()\n", 188 | "\n", 189 | "w = 0 # isto é desnecessário\n", 190 | "w = normal_equation_weights(X, y)\n", 191 | "print(\"Estimated w = \", w)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "def normal_equation_prediction(X, w):\n", 201 | " \"\"\"\n", 202 | " Calculates the prediction over a set of observations X using the linear function\n", 203 | " characterized by the weight vector w.\n", 204 | " You should add into X a new column with 1s.\n", 205 | "\n", 206 | " :param X: design matrix\n", 207 | " :type X: np.ndarray(shape=(N, d))\n", 208 | " :param w: weight vector\n", 209 | " :type w: np.ndarray(shape=(d+1, 1))\n", 210 | " :param y: regression prediction\n", 211 | " :type y: np.ndarray(shape=(N, 1))\n", 212 | " \"\"\"\n", 213 | " \n", 214 | " # START OF YOUR CODE:\n", 215 | " raise NotImplementedError(\"Falta implementar normal_equation_prediction()\")\n", 216 | " # END YOUR CODE\n", 217 | " \n", 218 | " return prediction" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "\n", 226 | "Você pode usar a métrica [$R^2$](https://pt.wikipedia.org/wiki/R%C2%B2) para ver o quão bem o modelo linear está se ajustando aos dados.\n", 227 | "\n", 228 | "**Nesse caso $𝑅^2$ tem que estar próximo de 0.5.**" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# teste da função normal_equation_prediction()\n", 238 | "prediction = normal_equation_prediction(X, w)\n", 239 | "r_2 = r_squared(y, prediction)\n", 240 | "plot_points_regression(X,\n", 241 | " y,\n", 242 | " title='Real estate prices prediction',\n", 243 | " xlabel=\"m\\u00b2\",\n", 244 | " ylabel='$',\n", 245 | " prediction=prediction,\n", 246 | " legend=True,\n", 247 | " r_squared=r_2)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "## Testes adicionais\n", 255 | "\n", 256 | "Vamos fazer a predição para $x=650$\n" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "# usando a função de predição\n", 266 | "x = np.asarray([650]).reshape(1,1)\n", 267 | "prediction = normal_equation_prediction(x, w)\n", 268 | "print(\"Area = %.2f Predicted price = %.4f\" %(x[0], prediction))\n", 269 | "\n", 270 | "# de forma mais direta\n", 271 | "y = np.dot(np.asarray((1,x)), w)\n", 272 | "print(\"Area = %.2f Predicted price = %.4f\" %(x, y))\n" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Efeito do número de amostras e dimensão dos dados\n", 280 | "\n", 281 | "Varie o número de amostras $N$ e veja como varia o tempo de processamento.\n", 282 | "\n", 283 | "Teste o seu código para dados nos quais $𝑑>1$." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "# Teste para diferentes valores de N\n", 293 | "X, y = get_housing_prices_data(N=1000000)\n", 294 | "init = time.time()\n", 295 | "w = normal_equation_weights(X, y)\n", 296 | "prediction = normal_equation_prediction(X,w)\n", 297 | "init = time.time() - init\n", 298 | "\n", 299 | "print(\"Tempo de execução = {:.8f}(s)\".format(init))" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "# Teste para dados de dimensão d>1" 309 | ] 310 | } 311 | ], 312 | "metadata": { 313 | "kernelspec": { 314 | "display_name": "Python 3", 315 | "language": "python", 316 | "name": "python3" 317 | }, 318 | "language_info": { 319 | "codemirror_mode": { 320 | "name": "ipython", 321 | "version": 3 322 | }, 323 | "file_extension": ".py", 324 | "mimetype": "text/x-python", 325 | "name": "python", 326 | "nbconvert_exporter": "python", 327 | "pygments_lexer": "ipython3", 328 | "version": "3.7.1" 329 | } 330 | }, 331 | "nbformat": 4, 332 | "nbformat_minor": 2 333 | } 334 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Notebooks 2 | 3 | ## Organização 4 | 5 | Os notebooks aqui servem para fixar os conceitos, métodos e algoritmos apresentados em aula. Eles foram pensados na seguinte ordem: 6 | 7 | - Ano 2019 8 | * 01 Perceptron 9 | * 02 Regressão linear (solução analítica) 10 | 11 | - Ano 2018 12 | * regressao_linear1 13 | * regressao_linear2 14 | * pytorch_basico1 15 | * pytorch_basico2 16 | * cnn 17 | * transfer_learning 18 | 19 | ## Guia de atividades 20 | Cada notebook é auto explicativo. Os alunos devem rodar cada um para entender a questão que está sendo tratada, os exercícios estão claramente indicados no código. 21 | 22 | -------------------------------------------------------------------------------- /notebooks/download_scripts/efigi.sh: -------------------------------------------------------------------------------- 1 | wget https://www.dropbox.com/s/i1mx1mqjhcfmkwd/efigi_data_all_classes.zip 2 | unzip efigi_data_all_classes.zip 3 | rm efigi_data_all_classes.zip -------------------------------------------------------------------------------- /notebooks/download_scripts/pi_car.sh: -------------------------------------------------------------------------------- 1 | wget https://www.dropbox.com/s/7i0kb82ti1zia73/data.tar?dl=0 2 | tar xvf data.tar?dl=0 3 | rm data.tar?dl=0 4 | mv data self_driving_pi_car_data -------------------------------------------------------------------------------- /notebooks/images/arbitrary_padding_no_strides_transposed.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/arbitrary_padding_no_strides_transposed.gif -------------------------------------------------------------------------------- /notebooks/images/cnn_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/cnn_arch.png -------------------------------------------------------------------------------- /notebooks/images/conv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/conv.png -------------------------------------------------------------------------------- /notebooks/images/dfn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/dfn.png -------------------------------------------------------------------------------- /notebooks/images/dfn_description.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/dfn_description.png -------------------------------------------------------------------------------- /notebooks/images/dwarf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/dwarf.png -------------------------------------------------------------------------------- /notebooks/images/elliptical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/elliptical.png -------------------------------------------------------------------------------- /notebooks/images/image_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/image_classification.png -------------------------------------------------------------------------------- /notebooks/images/irregular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/irregular.png -------------------------------------------------------------------------------- /notebooks/images/lenticular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/lenticular.png -------------------------------------------------------------------------------- /notebooks/images/logistic_regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/logistic_regression.png -------------------------------------------------------------------------------- /notebooks/images/multchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/multchain.png -------------------------------------------------------------------------------- /notebooks/images/pytorch-logo-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/pytorch-logo-dark.png -------------------------------------------------------------------------------- /notebooks/images/simple_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/simple_example.png -------------------------------------------------------------------------------- /notebooks/images/spiral.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/spiral.png -------------------------------------------------------------------------------- /notebooks/images/test_track.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/test_track.png -------------------------------------------------------------------------------- /notebooks/images/train_track.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/train_track.png -------------------------------------------------------------------------------- /notebooks/images/vector_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/vector_graph.png -------------------------------------------------------------------------------- /notebooks/regressao_linear1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Regressão linear 1: equações normais\n", 8 | "\n", 9 | "\n", 10 | "Dado o dataset $(\\mathbf{x}_{1}, y_{1}), \\dots ,(\\mathbf{x}_{N}, y_{N})$ onde $\\mathbf{x}_i \\in \\mathbb{R}^{d}$ e $y_i \\in \\mathbb{R}$, podemos aproximar a função desconhecida $f:\\mathbb{R}^{d} \\rightarrow \\mathbb{R}$ (lembrando que $y_i =f(\\mathbf{x}_i)$) por meio de um modelo linear $h$:\n", 11 | "$$\n", 12 | "h(\\mathbf{x}_{i}; \\mathbf{w}, b) = \\mathbf{w}^\\top \\mathbf{x}_{i} + b\n", 13 | "$$\n", 14 | "\n", 15 | "Note que $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ é na verdade uma [transformação afim](https://en.wikipedia.org/wiki/Affine_transformation) de $\\mathbf{x}_{i}$. Como em outros lugares, vamos usar o termo \"linear\" também para caracterizar uma transformação afim.\n", 16 | "\n", 17 | "A saída de $h$ é uma transformação linear de $\\mathbf{x}_{i}$. Usamos a notação $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ para deixar claro que $h$ é um modelo parametrizado, i.e., a transformação $h$ é definida pelos parâmetros $\\mathbf{w}$ e $b$. Podemos pensar no vetor $\\mathbf{w}$ como um vetor de *pesos* controlando o efeito de cada *feature* na predição.\n", 18 | "\n", 19 | "Adicionando uma feature a mais na obsevação $\\mathbf{x}_{i}$ (com o valor 1) -- coordenada artificial -- podemos simplificar a notação do modelo:\n", 20 | "\n", 21 | "$$\n", 22 | "h(\\mathbf{x}_{i}; \\mathbf{w}) = \\hat{y}_{i} = \\mathbf{w}^\\top \\mathbf{x}_{i}\n", 23 | "$$\n", 24 | "\n", 25 | "Procuramos os melhores parâmetros $\\mathbf{w}$ de modo que a predição $\\hat{y}_{i}$ seja a mais próxima de $y_{i}$ de acordo com alguma métrica de erro. Usando o *erro quadrárico médio* como tal métrica podemos obter a seguinte função de custo:\n", 26 | "\n", 27 | "\\begin{equation}\n", 28 | "J(\\mathbf{w}) = \\frac{1}{N}\\sum_{i=1}^{N}(\\hat{y}_{i} - y_{i})^{2}\n", 29 | "\\end{equation}\n", 30 | "\n", 31 | "Desse modo, a tarefa de achar a função $h$ mais próxima de $f$ se torna a tarefa de encontrar os valores de $\\mathbf{w}$ para minimizar $J(\\mathbf{w})$.\n", 32 | "\n", 33 | "**Aqui vamos começar a explorar esse modelo olhando para um dataset bem simples**\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# all imports\n", 43 | "import numpy as np\n", 44 | "import time\n", 45 | "from util.util import get_housing_prices_data, r_squared\n", 46 | "from util.plots import plot_points_regression \n", 47 | "\n", 48 | "%matplotlib inline" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### O dataset\n", 56 | "\n", 57 | "Os dados que vamos trabalhar vão ser dados artificiais. Vamos pegar 100 observações com apenas uma *feature* (metros quadrados de um imóvel) e com isso vamos associar um valor (o preço desse imóvel em $). Nossa tarefa é construir um modelo que consiga predizer o valor dos imóveis." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "X, y = get_housing_prices_data(N=100)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Plotando os dados\n", 74 | "\n", 75 | "Acima temos algumas informações sobre os dados, podemos também visualizar cada ponto." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "plot_points_regression(X,\n", 85 | " y,\n", 86 | " title='Real estate prices prediction',\n", 87 | " xlabel=\"m\\u00b2\",\n", 88 | " ylabel='$')" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "### Equações normais\n", 96 | "\n", 97 | "Dados $f:\\mathbb{R}^{n\\times m} \\rightarrow \\mathbb{R}$ e $\\mathbf{A} \\in \\mathbb{R}^{n\\times m}$, definimos o gradiente de $f$ com respeito a $\\mathbf{A}$ como:\n", 98 | "\n", 99 | "\\begin{equation*}\n", 100 | "\\nabla_{\\mathbf{A}}f = \\frac{\\partial f}{\\partial \\mathbf{A}} = \\begin{bmatrix}\n", 101 | "\\frac{\\partial f}{\\partial \\mathbf{A}_{1,1}} & \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{1,m}} \\\\\n", 102 | "\\vdots & \\ddots & \\vdots \\\\\n", 103 | "\\frac{\\partial f}{\\partial \\mathbf{A}_{n,1}} & \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{n,m}}\n", 104 | "\\end{bmatrix}\n", 105 | "\\end{equation*}\n", 106 | "\n", 107 | "Seja $\\mathbf{X} \\in \\mathbb{R}^{N\\times m}$ a matriz cujas linhas são as observações do dataset (também chamada de *design matrix*) e seja $\\mathbf{y} \\in \\mathbb{R}^{N}$ o vetor contendo todos os valores de $y_{i}$ (i.e., $\\mathbf{X}_{i,:} = \\mathbf{x}_{i}$ e $\\mathbf{y}_{i} =y_{i}$). É fácil checar que: \n", 108 | "\n", 109 | "\n", 110 | "\\begin{equation}\n", 111 | "J(\\mathbf{w}) = \\frac{1}{N}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})^{T}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})\n", 112 | "\\end{equation}\n", 113 | "\n", 114 | "\n", 115 | "Usando certos conceitos básicos de derivada com matrizes podemos chegar no gradiente de $J(\\mathbf{w})$ com respeito a $\\mathbf{w}$:\n", 116 | "\n", 117 | "\\begin{equation}\n", 118 | "\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = \\frac{2}{N} (\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} -\\mathbf{X}^{T}\\mathbf{y}) \n", 119 | "\\end{equation}\n", 120 | "\n", 121 | "Assim, quando $\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = 0$ temos que \n", 122 | "\n", 123 | "\\begin{equation}\n", 124 | "\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} = \\mathbf{X}^{T}\\mathbf{y}\n", 125 | "\\end{equation}\n", 126 | "\n", 127 | "Desse modo,\n", 128 | "\n", 129 | "\\begin{equation}\n", 130 | "\\mathbf{w} = (\\mathbf{X}^{T}\\mathbf{X})^{-1}\\mathbf{X}^{T}\\mathbf{y}\n", 131 | "\\end{equation}\n", 132 | "\n", 133 | "A solução dada por essas equações são conhecidas como **equações normais**. Note que esse tipo de solução tem um custo, pois conforme cresce o número de variáveis, o tempo da inversão da matriz fica proibitivo. Vale a pena ler [esse material](http://cs229.stanford.edu/notes/cs229-notes1.pdf) para ver o argumento com mais detalhes." 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "### Exercício\n", 141 | "Implemente a predição usando o método de equações normais. Usando apenas a biblioteca **NumPy** você deve completar a função abaixo (uma introdução rápida a certas funcionalidades dessa biblioteca pode ser encontrada [aqui](http://cs231n.github.io/python-numpy-tutorial/)). Lembre que $\\mathbf{X} \\in \\mathbb{R}^{N\\times d}$, você precisa adicionar uma componente com apenas 1s a cada observação em $\\mathbf{X}$ e realizar a computação descrita acima." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "def normal_equation_prediction(X, y):\n", 151 | " \"\"\"\n", 152 | " Calculates the prediction using the normal equation method.\n", 153 | " You should add a new row with 1s.\n", 154 | "\n", 155 | " :param X: design matrix\n", 156 | " :type X: np.ndarray(shape=(N, d))\n", 157 | " :param y: regression targets\n", 158 | " :type y: np.ndarray(shape=(N, 1))\n", 159 | " :return: prediction\n", 160 | " :rtype: np.ndarray(shape=(N, 1))\n", 161 | " \"\"\"\n", 162 | " # YOUR CODE HERE:\n", 163 | " raise NotImplementedError(\"falta completar a função normal_equation_prediction\")\n", 164 | " # END YOUR CODE\n", 165 | " return prediction\n" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "prediction = normal_equation_prediction(X, y)\n", 175 | "r_2 = r_squared(y, prediction)\n", 176 | "plot_points_regression(X,\n", 177 | " y,\n", 178 | " title='Real estate prices prediction',\n", 179 | " xlabel=\"m\\u00b2\",\n", 180 | " ylabel='$',\n", 181 | " prediction=prediction,\n", 182 | " legend=True,\n", 183 | " r_squared=r_2)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "Você pode usar a métrica [$R^2$](https://pt.wikipedia.org/wiki/R%C2%B2) para ver o quão bem o modelo linear está se ajustando aos dados.\n", 191 | "\n", 192 | "**Nesse caso $R^2$ tem que estar próximo de $0.5$.**" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "X, y = get_housing_prices_data(N=1000000, verbose=False)\n", 202 | "init = time.time()\n", 203 | "prediction = normal_equation_prediction(X, y)\n", 204 | "init = time.time() - init\n", 205 | "print(\"Tempo de execução da função de predição = {:.8f}(s)\".format(init))\n", 206 | "print(\"Tem que ser em menos de 1 segundo \")" 207 | ] 208 | } 209 | ], 210 | "metadata": { 211 | "kernelspec": { 212 | "display_name": "Python 3", 213 | "language": "python", 214 | "name": "python3" 215 | }, 216 | "language_info": { 217 | "codemirror_mode": { 218 | "name": "ipython", 219 | "version": 3 220 | }, 221 | "file_extension": ".py", 222 | "mimetype": "text/x-python", 223 | "name": "python", 224 | "nbconvert_exporter": "python", 225 | "pygments_lexer": "ipython3", 226 | "version": "3.5.2" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 2 231 | } 232 | -------------------------------------------------------------------------------- /notebooks/util/DataHolder.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | import torch 3 | 4 | 5 | class GentleLoader: 6 | def __init__(self, dataset, batch_size, shuffle): 7 | self.dataLoader = DataLoader(dataset=dataset, 8 | batch_size=batch_size, 9 | shuffle=True) 10 | 11 | def __iter__(self): 12 | return ((batch_X.type(torch.float), batch_y.type(torch.long)) 13 | for (batch_X, batch_y) in self.dataLoader) 14 | 15 | 16 | class DataHolderGentle(): 17 | """ 18 | Class to store all data using the GentleLoader. 19 | 20 | :param config: hyper params configuration 21 | :type config: LRConfig or DFNConfig 22 | :param train_dataset: dataset of training data 23 | :type train_dataset: torch.utils.data.dataset.TensorDataset 24 | :param test_dataset: dataset of test data 25 | :type test_dataset: torch.utils.data.dataset.TensorDataset 26 | :param valid_dataset: dataset of valid data 27 | :type valid_dataset: torch.utils.data.dataset.TensorDataset 28 | :param batch_size: batch size for training 29 | :type test_batch: batch size for the testing data 30 | :param test_batch: int 31 | """ 32 | def __init__(self, 33 | config, 34 | train_dataset, 35 | valid_dataset, 36 | test_dataset, 37 | test_batch=1000): 38 | batch_size = config.batch_size 39 | self.train_loader = GentleLoader(dataset=train_dataset, 40 | batch_size=batch_size, 41 | shuffle=True) 42 | self.valid_loader = GentleLoader(dataset=valid_dataset, 43 | batch_size=batch_size, 44 | shuffle=True) 45 | self.test_loader = GentleLoader(dataset=test_dataset, 46 | batch_size=test_batch, 47 | shuffle=True) 48 | 49 | 50 | class DataHolder(): 51 | """ 52 | Class to store all data. 53 | 54 | :param config: hyper params configuration 55 | :type config: LRConfig or DFNConfig 56 | :param train_dataset: dataset of training data 57 | :type train_dataset: torch.utils.data.dataset.TensorDataset 58 | :param test_dataset: dataset of test data 59 | :type test_dataset: torch.utils.data.dataset.TensorDataset 60 | :param valid_dataset: dataset of valid data 61 | :type valid_dataset: torch.utils.data.dataset.TensorDataset 62 | :param batch_size: batch size for training 63 | :type test_batch: batch size for the testing data 64 | :param test_batch: int 65 | """ 66 | def __init__(self, 67 | config, 68 | train_dataset, 69 | valid_dataset, 70 | test_dataset, 71 | test_batch=1000): 72 | batch_size = config.batch_size 73 | self.train_loader = DataLoader(dataset=train_dataset, 74 | batch_size=batch_size, 75 | shuffle=True) 76 | self.valid_loader = DataLoader(dataset=valid_dataset, 77 | batch_size=batch_size, 78 | shuffle=True) 79 | self.test_loader = DataLoader(dataset=test_dataset, 80 | batch_size=test_batch, 81 | shuffle=True) 82 | -------------------------------------------------------------------------------- /notebooks/util/__init__.py: -------------------------------------------------------------------------------- 1 | # util module 2 | -------------------------------------------------------------------------------- /notebooks/util/plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import itertools 3 | import matplotlib.pyplot as plt 4 | from mpl_toolkits.mplot3d import Axes3D 5 | from sklearn.metrics import confusion_matrix 6 | 7 | 8 | 9 | def plot_points_regression(x, 10 | y, 11 | title, 12 | xlabel, 13 | ylabel, 14 | prediction=None, 15 | legend=False, 16 | r_squared=None, 17 | position=(90, 100)): 18 | """ 19 | Plots the data points and the prediction, 20 | if there is one. 21 | 22 | :param x: design matrix 23 | :type x: np.array 24 | :param y: regression targets 25 | :type y: np.array 26 | :param title: plot's title 27 | :type title: str 28 | :param xlabel: x axis label 29 | :type xlabel: str 30 | :param ylabel: y axis label 31 | :type ylabel: str 32 | :param prediction: model's prediction 33 | :type prediction: np.array 34 | :param legend: param to control print legends 35 | :type legend: bool 36 | :param r_squared: r^2 value 37 | :type r_squared: float 38 | :param position: text position 39 | :type position: tuple 40 | """ 41 | fig, ax = plt.subplots(1, 1, figsize=(8, 8)) 42 | line1, = ax.plot(x, y, 'bo', label='Real data') 43 | if prediction is not None: 44 | line2, = ax.plot(x, prediction, 'r', label='Predicted data') 45 | if legend: 46 | plt.legend(handles=[line1, line2], loc=2) 47 | ax.set_title(title, 48 | fontsize=20, 49 | fontweight='bold') 50 | if r_squared is not None: 51 | bbox_props = dict(boxstyle="square,pad=0.3", 52 | fc="white", ec="black", lw=0.2) 53 | t = ax.text(position[0], position[1], "$R^2 ={:.4f}$".format(r_squared), 54 | size=15, bbox=bbox_props) 55 | 56 | ax.set_xlabel(xlabel, fontsize=20) 57 | ax.set_ylabel(ylabel, fontsize=20) 58 | plt.show() 59 | 60 | def plot_cost_function_curve(X, 61 | y, 62 | cost_function, 63 | title, 64 | weights_list=None, 65 | cost_list=None, 66 | position=(20, 40), 67 | range_points=(20, 40)): 68 | """ 69 | Plots a cost surfice. 70 | It assumes that weight.shape == (2,). 71 | 72 | :param X: design matrix 73 | :type X: np.ndarray 74 | :param y: regression targets 75 | :type y: np.ndarray 76 | :param cost_function: function to compute regression cost 77 | :type cost_function: lambda: (np.ndarray, np.ndarray, np.ndarray) -> float 78 | :param title: plot's title 79 | :type title: str 80 | :param weights_list: list of weights 81 | :type weights_list: list 82 | :param cost_list: list of costs 83 | :type cost_list: list 84 | :param position: surfice rotation position 85 | :type position: tuple 86 | :param range_points: range of values for w 87 | :type range_points: tuple 88 | """ 89 | 90 | w_0, w_1 = 0, 0 91 | ms = np.linspace(w_0 - range_points[0] , w_0 + range_points[0], range_points[0]) 92 | bs = np.linspace(w_1 - range_points[1] , w_1 + range_points[1], range_points[1]) 93 | M, B = np.meshgrid(ms, bs) 94 | MB = np.stack((np.ravel(M), np.ravel(B)), axis=1) 95 | size = MB.shape[0] 96 | MB = MB.reshape((size, 2, 1)) 97 | zs = np.array([cost_function(X, y, MB[i]) 98 | for i in range(size)]) 99 | Z = zs.reshape(M.shape) 100 | fig = plt.figure(figsize=(20, 10)) 101 | ax = fig.add_subplot(111, projection='3d') 102 | ax.plot_surface(M, B, Z, rstride=1, cstride=1, color='b', alpha=0.2) 103 | ax.set_xlabel('w[0]', labelpad=30, fontsize=24, fontweight='bold') 104 | ax.set_ylabel('w[1]', labelpad=30, fontsize=24, fontweight='bold') 105 | ax.set_zlabel('J(w)', labelpad=30, fontsize=24, fontweight='bold') 106 | if weights_list is not None and cost_list is not None: 107 | ax.plot([weights_list[0][0]], 108 | [weights_list[0][1]], 109 | [cost_list[0]], 110 | markerfacecolor=(1.0, 0.0, 0.0, 1.0), 111 | markeredgecolor=(1.0, 0.0, 0.0, 1.0), 112 | marker='o', 113 | markersize=7) 114 | ax.plot([weights_list[-1][0]], 115 | [weights_list[-1][1]], 116 | [cost_list[-1]], 117 | markerfacecolor=(0.0, 0.0, 1.0, 1.0), 118 | markeredgecolor=(0.0, 0.0, 1.0, 1.0), 119 | marker='o', 120 | markersize=7) 121 | temp_red = 1.0 122 | temp_blue = 0.0 123 | size = len(weights_list) 124 | oldx = 0.0 125 | oldy = 0.0 126 | oldz = 0.0 127 | for w, cost in zip(weights_list, cost_list): 128 | rgba_color = (temp_red * 1.0, 0.0, temp_blue * 1.0, 1.0) 129 | ax.plot([w[0]], 130 | [w[1]], 131 | [cost], 132 | markerfacecolor=rgba_color, 133 | markeredgecolor=rgba_color, 134 | marker='.', 135 | markersize=4) 136 | if oldx + oldy + oldz != 0.0 : 137 | rgba_color_weak = list(rgba_color) 138 | rgba_color_weak[-1] = 0.3 139 | ax.plot([w[0], oldx],[w[1], oldy], [cost, oldz],color=rgba_color_weak) 140 | temp_red += - 1 / size 141 | temp_blue += 1 / size 142 | oldx = w[0] 143 | oldy = w[1] 144 | oldz = cost 145 | ax.view_init(elev=position[0], azim=position[1]) 146 | ax.set_title(title, 147 | fontsize=20, 148 | fontweight='bold') 149 | plt.show() 150 | 151 | def simple_step_plot(ylist, 152 | yname, 153 | title, 154 | figsize=(4, 4), 155 | labels=None): 156 | """ 157 | Plots values over time. 158 | 159 | :param ylist: list of values lists 160 | :type ylist: list 161 | :param yname: value name 162 | :type yname: str 163 | :param title: plot's title 164 | :type title: str 165 | :param figsize: plot's size 166 | :type figsize: tuple 167 | :param labels: label for each values list in ylist 168 | :type range_points: list 169 | """ 170 | y0 = ylist[0] 171 | x = np.arange(1, len(y0) + 1, 1) 172 | fig, ax = plt.subplots(1, 1, figsize=figsize) 173 | for y in ylist: 174 | ax.plot(x, y) 175 | plt.xlabel('step') 176 | plt.ylabel(yname) 177 | plt.title(title, 178 | fontsize=14, 179 | fontweight='bold') 180 | plt.grid(True) 181 | if labels is not None: 182 | plt.legend(labels, 183 | loc='upper right') 184 | plt.show() 185 | 186 | def plot9images(images, cls_true, img_shape, cls_pred=None, lspace=0.3): 187 | """ 188 | Function to show 9 images with their respective classes. 189 | If cls_pred is an array, you can see the image and the prediction. 190 | 191 | :param images: images 192 | :type images: np array 193 | :param cls_true: true classes 194 | :type cls_true: np array 195 | :param img_shape: image shape 196 | :type img_shape: tuple 197 | :param cls_pred: model's prediction 198 | :type cls_pred: None or np array 199 | :param lspace: space between images 200 | :type lspace: float 201 | """ 202 | assert len(images) == len(cls_true) == 9 203 | if cls_pred is None: 204 | title = "Some images with labels" 205 | else: 206 | title = "Some images with predictions and labels" 207 | fig, axes = plt.subplots(3, 3) 208 | fig.subplots_adjust(hspace=lspace, wspace=0.3) 209 | st = fig.suptitle(title, fontsize=24, fontweight='bold') 210 | 211 | for i, ax in enumerate(axes.flat): 212 | ax.imshow(images[i].reshape(img_shape), cmap=None) 213 | if cls_pred is None: 214 | xlabel = "Label: {0}".format(cls_true[i]) 215 | else: 216 | xlabel = "Label: {0}\nPred: {1}".format(cls_true[i], cls_pred[i]) 217 | ax.set_xlabel(xlabel) 218 | ax.set_xticks([]) 219 | ax.set_yticks([]) 220 | plt.tight_layout() 221 | st.set_y(1.05) 222 | fig.subplots_adjust(top=0.85) 223 | plt.show() 224 | 225 | 226 | def plot_confusion_matrix(truth, 227 | predictions, 228 | classes, 229 | normalize=False, 230 | save=False, 231 | cmap=plt.cm.Oranges, 232 | path="confusion_matrix.png"): 233 | """ 234 | This function plots the confusion matrix. 235 | Normalization can be applied by setting `normalize=True`. 236 | 'cmap' controls the color plot. colors: 237 | https://matplotlib.org/1.3.1/examples/color/colormaps_reference.html 238 | :param truth: true labels 239 | :type truth: np array 240 | :param predictions: model predictions 241 | :type predictions: np array 242 | :param classes: list of classes in order 243 | :type classes: list 244 | :param normalize: param to normalize cm matrix 245 | :type normalize: bool 246 | :param save: param to save cm plot 247 | :type save: bool 248 | :param cmap: plt color map 249 | :type cmap: plt.cm 250 | :param path: path to save image 251 | :type path: str 252 | """ 253 | acc = np.array(truth) == np.array(predictions) 254 | size = float(acc.shape[0]) 255 | acc = np.sum(acc.astype("int32")) / size 256 | title = "Confusion matrix of {0} examples\n accuracy = {1:.6f}".format(int(size), # noqa 257 | acc) 258 | cm = confusion_matrix(truth, predictions) 259 | if normalize: 260 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 261 | plt.figure(figsize=(9, 9)) 262 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 263 | plt.title(title, fontsize=24, fontweight='bold') 264 | plt.colorbar() 265 | tick_marks = np.arange(len(classes)) 266 | plt.xticks(tick_marks, classes, rotation=45) 267 | plt.yticks(tick_marks, classes) 268 | 269 | fmt = '.2f' if normalize else 'd' 270 | thresh = cm.max() / 2. 271 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 272 | plt.text(j, i, format(cm[i, j], fmt), 273 | horizontalalignment="center", 274 | color="white" if cm[i, j] > thresh else "black") 275 | 276 | plt.tight_layout() 277 | plt.ylabel('True label', fontweight='bold') 278 | plt.xlabel('Predicted label', fontweight='bold') 279 | plt.show() 280 | if save: 281 | plt.savefig(path) 282 | 283 | def plot_histogram_from_labels(labels, labels_legend, comment): 284 | """ 285 | Plot dataset histogram 286 | :param label_path: array of labels 287 | :type label_path: np.array 288 | :param labels_legend: list with the name of labels 289 | :type labels_legend: list 290 | :param comment: comment to dataset to be printed on title 291 | :type comment: str 292 | """ 293 | 294 | data_hist = plt.hist(labels, 295 | bins=np.arange(len(labels_legend) + 1) - 0.5, 296 | edgecolor='black') 297 | axes = plt.gca() 298 | axes.set_ylim([0, len(labels)]) 299 | 300 | plt.title("Histogram of {} data points ({})".format(len(labels), comment)) 301 | plt.xticks(np.arange(len(labels_legend) + 1), labels_legend) 302 | plt.xlabel("Label") 303 | plt.ylabel("Frequency") 304 | 305 | for i in range(len(labels_legend)): 306 | plt.text(data_hist[1][i] + 0.25, 307 | data_hist[0][i] + (data_hist[0][i] * 0.01), 308 | str(int(data_hist[0][i]))) 309 | plt.show() 310 | plt.close() 311 | 312 | -------------------------------------------------------------------------------- /notebooks/util/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | 5 | 6 | def add_feature_ones(X): 7 | """ 8 | Returns the ndarray 'X' with the extra 9 | feature column containing only 1s. 10 | 11 | :param X: input array 12 | :type X: np.ndarray(shape=(N, d)) 13 | :return: output array 14 | :rtype: np.ndarray(shape=(N, d+1)) 15 | """ 16 | return np.concatenate((np.ones((X.shape[0], 1)), X), axis=1) 17 | 18 | 19 | def get_housing_prices_data(N, verbose=True): 20 | """ 21 | Generates artificial linear data, 22 | where x = square meter, y = house price 23 | 24 | :param N: data set size 25 | :type N: int 26 | :param verbose: param to control print 27 | :type verbose: bool 28 | :return: design matrix, regression targets 29 | :rtype: np.array, np.array 30 | """ 31 | cond = False 32 | while not cond: 33 | x = np.linspace(90, 1200, N) 34 | gamma = np.random.normal(30, 10, x.size) 35 | y = 50 * x + gamma * 400 36 | x = x.astype("float32") 37 | x = x.reshape((x.shape[0], 1)) 38 | y = y.astype("float32") 39 | y = y.reshape((y.shape[0], 1)) 40 | cond = min(y) > 0 41 | xmean, xsdt, xmax, xmin = np.mean(x), np.std(x), np.max(x), np.min(x) 42 | ymean, ysdt, ymax, ymin = np.mean(y), np.std(y), np.max(y), np.min(y) 43 | if verbose: 44 | print("\nX shape = {}".format(x.shape)) 45 | print("\ny shape = {}\n".format(y.shape)) 46 | print("X:\nmean {}, sdt {:.2f}, max {}, min {}".format(xmean, 47 | xsdt, 48 | xmax, 49 | xmin)) 50 | print("\ny:\nmean {}, sdt {:.2f}, max {}, min {}".format(ymean, 51 | ysdt, 52 | ymax, 53 | ymin)) 54 | return x, y 55 | 56 | 57 | def r_squared(y, y_hat): 58 | """ 59 | Calculate the R^2 value 60 | 61 | :param y: regression targets 62 | :type y: np array 63 | :param y_hat: prediction 64 | :type y_hat: np array 65 | :return: r^2 value 66 | :rtype: float 67 | """ 68 | y_mean = np.mean(y) 69 | ssres = np.sum(np.square(y - y_mean)) 70 | ssexp = np.sum(np.square(y_hat - y_mean)) 71 | sstot = ssres + ssexp 72 | return 1 - (ssexp / sstot) 73 | 74 | 75 | def randomize_in_place(list1, list2, init=0): 76 | """ 77 | Function to randomize two lists in the same way. 78 | 79 | :param list1: list 80 | :type list1: list or np.array 81 | :param list2: list 82 | :type list2: list or np.array 83 | :param init: seed 84 | :type init: int 85 | """ 86 | np.random.seed(seed=init) 87 | np.random.shuffle(list1) 88 | np.random.seed(seed=init) 89 | np.random.shuffle(list2) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.14.1 2 | matplotlib==2.1.2 3 | scikit-learn==0.19.1 4 | -------------------------------------------------------------------------------- /slides/LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | -------------------------------------------------------------------------------- /slides/README.md: -------------------------------------------------------------------------------- 1 | # Slides 2 | 3 | ![alt text](images/cc-logo.png "CC") 4 | 5 | 6 | Licensed under [creative commons](https://github.com/MLIME/MAC0460/blob/master/2017/slides/LICENSE) 7 | 8 | ## Uso 9 | 10 | Slides com os diferentes materiais dados em aula (diferentes aulas estão em diferentes pastas). Cada pasta possui uma subpasta chamada "pdf" em que você pode pegar o material. Se quiser compilar o código em latex da pasta "foo" basta rodar (em Ubuntu / Debian): 11 | 12 | ``` 13 | $ bash install.sh 14 | $ cd foo/ 15 | $ make 16 | ``` 17 | 18 | -------------------------------------------------------------------------------- /slides/backprop1/Makefile: -------------------------------------------------------------------------------- 1 | BASE_NAME := main 2 | BUILD_DIR := build 3 | PDF_NAME := BackpropLecture1.pdf 4 | 5 | PDFLATEX_OPTIONS = -halt-on-error -aux-directory=$(BUILD_DIR) -output-directory=$(BUILD_DIR) --shell-escape 6 | LATEX := latex 7 | PDFLATEX = pdflatex $(PDFLATEX_OPTIONS) 8 | BIBTEX := bibtex 9 | 10 | pdf: $(BASE_NAME).pdf 11 | 12 | $(BASE_NAME).pdf: $(BASE_NAME).tex 13 | mkdir -p $(BUILD_DIR) 14 | $(PDFLATEX) $< 15 | $(BIBTEX) $(BUILD_DIR)/$(BASE_NAME) 16 | $(PDFLATEX) $< 17 | $(PDFLATEX) $< 18 | $(PDFLATEX) $< 19 | cp $(BUILD_DIR)/$(BASE_NAME).pdf $(PDF_NAME) 20 | 21 | clean: 22 | rm -rf build $(PDF_NAME) 23 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/3x3.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[scale=1.4,every node/.style={minimum size=1cm}, on grid] 6 | \draw[fill=base02,opacity=0.4] (0,0) rectangle (3,3); 7 | \draw[draw=base03,thick] (0,0) grid (3,3); 8 | \node (00) at (0.5,2.5) {\Large 0}; 9 | \node (01) at (1.5,2.5) {\Large 1}; 10 | \node (02) at (2.5,2.5) {\Large 2}; 11 | \node (10) at (0.5,1.5) {\Large 2}; 12 | \node (11) at (1.5,1.5) {\Large 2}; 13 | \node (12) at (2.5,1.5) {\Large 0}; 14 | \node (20) at (0.5,0.5) {\Large 0}; 15 | \node (21) at (1.5,0.5) {\Large 1}; 16 | \node (22) at (2.5,0.5) {\Large 2}; 17 | \end{tikzpicture} 18 | } % scalebox 19 | \end{figure} 20 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/5x5.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[scale=1.5,every node/.style={minimum size=2cm}, on grid] 6 | \draw[fill=blue2,opacity=1.2] (0,0) rectangle (5,5); 7 | \draw[draw=base03,thick] (0,0) grid (5,5); 8 | \node (00) at (0.5,4.5) {\LARGE 3}; 9 | \node (10) at (0.5,3.5) {\LARGE 0}; 10 | \node (20) at (0.5,2.5) {\LARGE 3}; 11 | \node (30) at (0.5,1.5) {\LARGE 2}; 12 | \node (40) at (0.5,0.5) {\LARGE 2}; 13 | 14 | \node (01) at (1.5,4.5) {\LARGE 3}; 15 | \node (11) at (1.5,3.5) {\LARGE 0}; 16 | \node (21) at (1.5,2.5) {\LARGE 1}; 17 | \node (31) at (1.5,1.5) {\LARGE 0}; 18 | \node (41) at (1.5,0.5) {\LARGE 0}; 19 | 20 | \node (02) at (2.5,4.5) {\LARGE 2}; 21 | \node (12) at (2.5,3.5) {\LARGE 1}; 22 | \node (22) at (2.5,2.5) {\LARGE 2}; 23 | \node (32) at (2.5,1.5) {\LARGE 0}; 24 | \node (42) at (2.5,0.5) {\LARGE 0}; 25 | 26 | \node (03) at (3.5,4.5) {\LARGE 1}; 27 | \node (13) at (3.5,3.5) {\LARGE 3}; 28 | \node (23) at (3.5,2.5) {\LARGE 2}; 29 | \node (33) at (3.5,1.5) {\LARGE 2}; 30 | \node (43) at (3.5,0.5) {\LARGE 0}; 31 | 32 | \node (04) at (4.5,4.5) {\LARGE 0}; 33 | \node (14) at (4.5,3.5) {\LARGE 1}; 34 | \node (24) at (4.5,2.5) {\LARGE 3}; 35 | \node (34) at (4.5,1.5) {\LARGE 2}; 36 | \node (44) at (4.5,0.5) {\LARGE 1}; 37 | \end{tikzpicture} 38 | } % scalebox 39 | \end{figure} 40 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/BackPropScalar.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.5}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (y) {y}; 9 | \node[op, above=30pt of y] (z) {z}; 10 | \node[op, below=30pt of y] (x) {x}; 11 | 12 | % gradients ============================= 13 | \visible<2->{\node[gradient, right=50pt of z] (delzdely) {$\frac{dz}{dy}$};} 14 | \visible<2->{\node[textonly, right=0.1pt of delzdely] {$=\frac{1}{x^{2}}$};} 15 | \visible<3->{\node[gradient, right=50pt of y] (delydelx) {$\frac{dy}{dx}$};} 16 | \visible<3->{\node[textonly, right=0.1pt of delydelx] {$=2x$};} 17 | \visible<4->{\node[gradient, right=80pt of x] (delzdelx) {$\frac{dz}{dx}$};} 18 | \visible<4->{\node[textonly, right=0.1pt of delzdelx] {$=\frac{2}{x}$};} 19 | 20 | 21 | % edges 22 | \path[tedge] (x) -- (y); 23 | \path[tedge] (y) -- (z); 24 | \visible<2->{\path[tedge] (z) -- (delzdely);} 25 | \visible<3->{\path[tedge] (y) -- (delydelx);} 26 | \visible<4->{\path[tedge] (x) -- (delzdelx);} 27 | \visible<4->{\path[tedge] (delydelx) -- (delzdelx);} 28 | \visible<4->{\path[tedge] (delzdely) to [bend right, out=-310, in=100, distance=40pt] (delzdelx);} 29 | 30 | 31 | % \visible<2->{\node[gradient, left=20pt of V] (grad-V) {$\nabla_{\textbf{V}}\textbf{L}$};} 32 | 33 | 34 | \end{tikzpicture} 35 | } % scalebox 36 | \end{figure} 37 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Compgraph1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[op] (W1) {$\vect{W}$}; 11 | \node[textonly, below=20pt of W1] (inv1) {}; 12 | \node[op, below=40pt of W1] (x1) {$\vect{x}$}; 13 | \node[op, right=30.5pt of inv1] (v1) {matmul}; 14 | \node[op, right=120pt of W1] (W2) {$\vect{W}$}; 15 | \node[textonly, below=20pt of W2] (inv2) {}; 16 | \node[op, below=40pt of W2] (x2) {$\vect{x}$}; 17 | \node[op, right=30.5pt of inv2] (v2) {$\vect{v}$}; 18 | \node[textonly, left=1.5pt of v2] (matmtul) {matmul}; 19 | \node[textonly, left=40pt of inv2] (inv3) {}; 20 | \node[textonly, below=50.5pt of inv3] (result) {{\large $\vect{v} = \vect{W} \vect{x} $}}; 21 | 22 | % edges 23 | \path[tedge] (W1) -- (v1); 24 | \path[tedge] (x1) -- (v1); 25 | \path[tedge] (W2) -- (v2); 26 | \path[tedge] (x2) -- (v2); 27 | 28 | \end{tikzpicture} 29 | } % scalebox 30 | \end{figure} 31 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Compgraph2.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.15}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[op] (W1) {$\vect{W}_{1}$}; 11 | \node[textonly, below=40pt of W1] (inv1) {}; 12 | \node[op, below=20pt of inv1] (x) {$\vect{x}$}; 13 | \node[op, right=30.5pt of inv1] (v) {$\vect{v}$}; 14 | \node[textonly, left=1.5pt of v] (matmtul1) {matmul}; 15 | \node[op, right=35pt of v] (h) {$\vect{h}$}; 16 | \node[op, right=110pt of W1] (W2) {$\vect{W}_{2}$}; 17 | \node[op, right=35pt of h] (z) {$\vect{z}$}; 18 | \node[op, right=55pt of z] (y) {$\hat{\vect{y}}$}; 19 | \node[textonly, above left=1.5pt of z] (matmtul2) {matmul}; 20 | 21 | 22 | % edges 23 | \path[tedge] (W1) -- (v); 24 | \path[tedge] (x) -- (v); 25 | \path[tedge] (v) edge node[above=1pt] {{\Large$\sigma$}} (h); 26 | \path[tedge] (z) edge node[above=1pt] {{\Large softmax}} (y); 27 | \path[tedge] (W2) -- (z); 28 | \path[tedge] (h) -- (z); 29 | 30 | 31 | \end{tikzpicture} 32 | } % scalebox 33 | \end{figure} 34 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Compgraph3.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.2}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes on the left 10 | \node[op] (W1) {$\vect{W}_{1}$}; 11 | \node[textonly, below=20pt of W1] (inv1) {}; 12 | \node[op, right=30pt of inv1] (v) {$\vect{v}$}; 13 | \node[op, below right=30.5pt of v] (x) {$\vect{x}$}; 14 | \node[op, above=30.5pt of v] (hprime) {$\vect{h}^{\prime}$}; 15 | \node[textonly, above right=2pt of hprime] (h) {{\LARGE$\vect{h}$}}; 16 | 17 | 18 | %% namedscope in the left 19 | \begin{scope}[on background layer] 20 | \coordinate (p1) at (hprime.north); 21 | \coordinate (p2) at (v.south east); 22 | \coordinate (p3) at (W1.west); 23 | \tkzCircumCenter(p1,p2,p3) 24 | \tkzGetPoint{O} 25 | \tkzDrawCircle[draw=orange, line width=1.5pt, fill=orange!60](O,p1) 26 | \end{scope} 27 | 28 | % edges on the left 29 | \path[tedge] (W1) -- (v); 30 | \path[tedge] (x) -- (v); 31 | \path[tedge] (v) -- (hprime); 32 | 33 | % nodes on the right 34 | \node[op, right=30pt of x] (hh) {$\vect{h}$}; 35 | \node[op, above right=30.5pt of hh] (z) {$\vect{z}$}; 36 | \node[op, above=30.5pt of z] (yprime) {$\vect{y}^{\prime}$}; 37 | \node[textonly, right=30pt of z] (inv2) {}; 38 | \node[op, above=20pt of inv2] (W2) {$\vect{W}_{2}$}; 39 | \node[textonly, above left=1pt of yprime] (yhat) {{\LARGE$\hat{\vect{y}}$}}; 40 | 41 | 42 | %% namedscope in the right 43 | \begin{scope}[on background layer] 44 | \coordinate (p1) at (yprime.north); 45 | \coordinate (p2) at (W2.north east); 46 | \coordinate (p3) at (z.south west); 47 | \tkzCircumCenter(p1,p2,p3) 48 | \tkzGetPoint{O} 49 | \tkzDrawCircle[draw=orange, line width=1.5pt, fill=orange!60](O,p1) 50 | \end{scope} 51 | 52 | 53 | 54 | % edges on the right 55 | \path[tedge] (W2) -- (z); 56 | \path[tedge] (hh) -- (z); 57 | \path[tedge] (z) -- (yprime); 58 | 59 | 60 | \end{tikzpicture} 61 | } % scalebox 62 | \end{figure} 63 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Compgraph4.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.5}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (h) {$\vect{h}$}; 9 | \node[op, above=30pt of h] (y) {$\hat{\vect{y}}$}; 10 | \node[op, below=30pt of h] (x) {$\vect{x}$}; 11 | 12 | 13 | % edges 14 | \path[tedge] (x) -- (h); 15 | \path[tedge] (h) -- (y); 16 | 17 | \end{tikzpicture} 18 | } % scalebox 19 | \end{figure} 20 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/DFNclassification.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[textonly] (vectorx) {$\begin{bmatrix}0.34\\ \vdots \\0.06\end{bmatrix}$}; 11 | \node[textonly, above=1pt of vectorx] (x) {$\vect{x}$}; 12 | \node[textonly, below=1pt of vectorx] (dimension1) {{\small$d\times 1$}}; 13 | \node[op, right=30pt of vectorx] (model) {$h(\vect{x}; \vect{\theta})$}; 14 | \node[textonly, right=30pt of model] (vectoryhat) {$\begin{bmatrix}p(y=1| \vect{x};\vect{\theta})\\ \vdots \\p(y=n| \vect{x};\vect{\theta})\end{bmatrix}$}; 15 | \node[textonly, above=1pt of vectoryhat] (yhat) {$\hat{\vect{y}}$}; 16 | \node[textonly, below=1pt of vectoryhat] (dimension2) {{\small$n\times 1$}}; 17 | 18 | 19 | 20 | % edges 21 | \path[tedge] (vectorx) -- (model); 22 | \path[tedge] (model) -- (vectoryhat); 23 | 24 | 25 | \end{tikzpicture} 26 | } % scalebox 27 | \end{figure} 28 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/DFNclassification2.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[textonly] (vectorx) {$\begin{bmatrix}0.34\\ \vdots \\0.06\end{bmatrix}$}; 11 | \node[textonly, above=1pt of vectorx] (x) {$\vect{x}$}; 12 | \node[textonly, below=1pt of vectorx] (dimension1) {{\small$d\times 1$}}; 13 | \node[op, right=30pt of vectorx] (model) {$h(\vect{x}; \vect{\theta})$}; 14 | \node[textonly, right=30pt of model] (vectoryhat) {$p(y=1| \vect{x};\vect{\theta})$}; 15 | \node[textonly, above=1pt of vectoryhat] (yhat) {$\hat{\vect{y}}$}; 16 | 17 | 18 | 19 | 20 | % edges 21 | \path[tedge] (vectorx) -- (model); 22 | \path[tedge] (model) -- (vectoryhat); 23 | 24 | 25 | \end{tikzpicture} 26 | } % scalebox 27 | \end{figure} 28 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/DeepNN.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (h1) {$\vect{h}^{(1)}$}; 9 | \node[op, below=30pt of h1] (x) {$\vect{x}$}; 10 | \node[op, above=30pt of h1] (h2) {$\vect{h}^{(2)}$}; 11 | \node[textonly, above=20pt of h2] (hdots) {{\LARGE$\vdots$}}; 12 | \node[op, above=20pt of hdots] (hn) {$\vect{h}^{(n)}$}; 13 | \node[op, above=30pt of hn] (y) {$\hat{\vect{y}}$}; 14 | 15 | %invisible nodes 16 | \node[textonly, above right=2pt of y] (yinv1) {}; 17 | \node[textonly, below right=2pt of y] (yinv2) {}; 18 | \node[textonly, right=26pt of y] (yinv3) {output layer}; 19 | 20 | \node[textonly, above right=2pt of hn] (hinv1) {}; 21 | \node[textonly, below right=2pt of h1] (hinv2) {}; 22 | \node[textonly, right=26pt of h2] (hinv3) {hidden layers}; 23 | 24 | \node[textonly, above right=2pt of x] (xinv1) {}; 25 | \node[textonly, below right=2pt of x] (xinv2) {}; 26 | \node[textonly, right=26pt of x] (xinv3) {input layer}; 27 | 28 | 29 | \visible<2->{\node[textonly, above left=0.1pt and 0.1pt of y] (d1) {};} 30 | \visible<2->{\node[textonly, below left=0.1pt and 0.1pt of x] (d2) {};} 31 | \visible<2->{\node[textonly, left=56pt of hdots] (d3) {{\large\alert{deep model}}};} 32 | 33 | 34 | % edges 35 | \path[tedge] (x) -- (h1); 36 | \path[tedge] (h1) -- (h2); 37 | \path[tedge] (h2) -- (hdots); 38 | \path[tedge] (hdots) -- (hn); 39 | \path[tedge] (hn) -- (y); 40 | 41 | % visual aid edges 42 | \draw[orange!120, line width=1mm] (yinv3) to [out=180,in=-80] (yinv1); 43 | \draw[orange!120, line width=1mm] (yinv3) to [out=180,in=80] (yinv2); 44 | 45 | 46 | \draw[orange!120, line width=1mm] (hinv3) to [out=180,in=-80] (hinv1); 47 | \draw[orange!120, line width=1mm] (hinv3) to [out=180,in=80] (hinv2); 48 | 49 | 50 | \draw[orange!120, line width=1mm] (xinv3) to [out=180,in=-80] (xinv1); 51 | \draw[orange!120, line width=1mm] (xinv3) to [out=180,in=80] (xinv2); 52 | 53 | 54 | \visible<2->{\draw[orange!180, line width=1mm] (d3) to [out=0,in=180] (d1);} 55 | \visible<2->{\draw[orange!180, line width=1mm] (d3) to [out=0,in=180] (d2);} 56 | 57 | 58 | 59 | \end{tikzpicture} 60 | } % scalebox 61 | \end{figure} 62 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Dropout1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (x2) {$x_2$}; 9 | \node[op, above=20pt of x2] (x1) {$x_1$}; 10 | \node[op, below=20pt of x2] (x3) {$x_3$}; 11 | \node[op, above right=10pt and 40pt of x2] (h2) {$h_2$}; 12 | \node[op, above=20pt of h2] (h1) {$h_1$}; 13 | \node[op, below=20pt of h2] (h3) {$h_3$}; 14 | \node[op, below=20pt of h3] (h4) {$h_4$}; 15 | \node[op, right=90pt of x2] (o) {$\hat{y}$}; 16 | 17 | % edges 18 | \path[tedge_dashed] (x1) edge node[pos=0.25, above=1.8pt, dashed] {\large{\alert{$0$}}} (h1); 19 | \path[tedge] (x1) edge node[above=1.2pt] {} (h2); 20 | \path[tedge] (x1) edge node[above=1.8pt] {} (h3); 21 | \path[tedge] (x1) edge node[above=1.8pt] {} (h4); 22 | 23 | \path[tedge] (x2) edge node[above=1.8pt] {} (h1); 24 | \path[tedge] (x2) edge node[above=1.8pt] {} (h2); 25 | \path[tedge] (x2) edge node[above=1.8pt] {} (h3); 26 | \path[tedge] (x2) edge node[above=1.8pt] {} (h4); 27 | 28 | \path[tedge] (x3) edge node[above=1.8pt] {} (h1); 29 | \path[tedge] (x3) edge node[above=1.8pt] {} (h2); 30 | \path[tedge] (x3) edge node[above=1.8pt] {} (h3); 31 | \path[tedge_dashed] (x3) edge node[above=1.0pt] {} (h4); 32 | 33 | \path[tedge_dashed] (h1) edge node[pos=0.25, above=1.8pt, right=0.1cm] {} (o); 34 | \path[tedge] (h2) edge node[above=1.8pt] {} (o); 35 | \path[tedge] (h3) edge node[above=1.8pt] {} (o); 36 | \path[tedge] (h4) edge node[above=1.8pt] {} (o); 37 | 38 | 39 | % info edges 40 | 41 | 42 | \end{tikzpicture} 43 | } % scalebox 44 | \end{figure} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Dropout2.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (x2) {$x_2$}; 9 | \node[op, above=20pt of x2] (x1) {$x_1$}; 10 | \node[op, below=20pt of x2] (x3) {$x_3$}; 11 | \node[op, above right=10pt and 40pt of x2] (h2) {$h_2$}; 12 | \node[op, above=20pt of h2] (h1) {$h_1$}; 13 | \node[op, below=20pt of h2] (h3) {$h_3$}; 14 | \node[op, below=20pt of h3] (h4) {$h_4$}; 15 | \node[op, right=90pt of x2] (o) {$\hat{y}$}; 16 | % edges 17 | \path[tedge] (x1) edge node[pos=0.25, above=1.8pt] {} (h1); 18 | \path[tedge] (x1) edge node[above=1.2pt] {} (h2); 19 | \path[tedge] (x1) edge node[above=1.8pt] {} (h3); 20 | \path[tedge_dashed] (x1) edge node[above=1.8pt] {} (h4); 21 | 22 | \path[tedge] (x2) edge node[above=1.8pt] {} (h1); 23 | \path[tedge_dashed] (x2) edge node[above=1.8pt] {} (h2); 24 | \path[tedge] (x2) edge node[above=1.8pt] {} (h3); 25 | \path[tedge] (x2) edge node[above=1.8pt] {} (h4); 26 | 27 | \path[tedge_dashed] (x3) edge node[above=1.8pt] {} (h1); 28 | \path[tedge] (x3) edge node[above=1.8pt] {} (h2); 29 | \path[tedge] (x3) edge node[above=1.8pt] {} (h3); 30 | \path[tedge] (x3) edge node[above=1.0pt] {} (h4); 31 | 32 | \path[tedge] (h1) edge node[pos=0.25, above=1.8pt, right=0.1cm] {} (o); 33 | \path[tedge] (h2) edge node[above=1.8pt] {} (o); 34 | \path[tedge] (h3) edge node[above=1.8pt] {} (o); 35 | \path[tedge_dashed] (h4) edge node[below=1.8pt] {\large{\alert{$0$}}}(o); 36 | 37 | 38 | % info edges 39 | 40 | 41 | \end{tikzpicture} 42 | } % scalebox 43 | \end{figure} 44 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Entropy1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[textonly] (pprob) {$\begin{bmatrix}0.8\\0.2\end{bmatrix}$}; 11 | \node[textonly, right=40pt of pprob] (qprob) {$\begin{bmatrix}0.5\\0.5\end{bmatrix}$}; 12 | \node[textonly, above=1pt of pprob] (p) {$\vect{p}$}; 13 | \node[textonly, above=1pt of qprob] (q) {$\vect{q}$}; 14 | 15 | 16 | \node[textonly, below=20pt of pprob] (Hp) {$H(\vect{p}) = 0.72$}; 17 | \node[textonly, below=20pt of qprob] (Hq) {$H(\vect{q}) = 1$}; 18 | \node[textonly, below=30pt of Hp] (inv1) {}; 19 | \node[textonly, right=-40pt of inv1] (Hquation) {{\Large$H(\vect{p}) = \sum_{i} \vect{p}_i\log\frac{1}{\vect{p}_i}$}}; 20 | 21 | 22 | \end{tikzpicture} 23 | } % scalebox 24 | \end{figure} 25 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Entropy2.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.0}{ 5 | \begin{tikzpicture} 6 | \begin{axis}% 7 | [ 8 | grid=major, 9 | xmin=0, 10 | xmax=1, 11 | axis x line=bottom, 12 | ytick={0,.5,1}, 13 | ymax=1.1, 14 | axis y line=middle, 15 | xlabel= $p$, 16 | ylabel= $H(\vect{p})$, 17 | ] 18 | \addplot% 19 | [ orange!180, 20 | ultra thick, 21 | % blue,% 22 | mark=none, 23 | samples=200, 24 | domain=0.0001:0.9999, 25 | ] 26 | (x,{(x*log2(1/x)) + ((1-x)*log2(1/(1-x)))}); 27 | \end{axis} 28 | \node[textonly] (pprob) at (8.75,2.8) {{\Large$\begin{bmatrix}p\\1-p\end{bmatrix}$}}; 29 | \node[textonly, above=1pt of pprob] (p) {{\Large$\vect{p}$}}; 30 | \end{tikzpicture} 31 | } % scalebox 32 | \end{figure} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Kernel_image_pro.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.70}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node (original) 11 | {\includegraphics[width=.15\textwidth]{images/Vd-Orig.png}}; 12 | \node[above right= 20pt and 150pt of original] (edge) {\includegraphics[width=.15\textwidth]{images/Vd-Edge3.png}}; 13 | \node[below=20pt of edge] (sharpen) {\includegraphics[width=.15\textwidth]{images/Vd-Sharp.png}}; 14 | \node[below=20pt of sharpen] (blur) {\includegraphics[width=.15\textwidth]{images/Vd-Blur1.png}}; 15 | 16 | % edges 17 | \path[tedge, orange!120, line width=1mm] (original) to [out=90,in=180, looseness=9, distance=125pt] (edge); 18 | \path[tedge, orange!120, line width=1mm] (original) to [out=0,in=180] (sharpen); 19 | \path[tedge, orange!120, line width=1mm] (original) to [out=-90,in=180, looseness=9, distance=125pt] (blur); 20 | 21 | % nodes for kernels 22 | \node[op3, right=20pt of original] (kernel1) {$\begin{bmatrix}0 & -1 & 0\\ -1 & 5 & -1\\0 & -1 & 0\end{bmatrix}$}; 23 | \node[op3, above=10pt of kernel1] (kernel2) {$\begin{bmatrix}-1 & -1 & -1\\ -1 & 8 & -1\\-1 & -1 & -1\end{bmatrix}$}; 24 | \node[op3, below=10pt of kernel1] (kernel3) {$\frac{1}{16}\begin{bmatrix}1 & 2 & 1\\ 2 & 4 & 2\\1 & 2 & 1\end{bmatrix}$}; 25 | 26 | \end{tikzpicture} 27 | } % scalebox 28 | \vspace*{-10mm} 29 | \caption{Exemplo de aplicação de filtros em uma imagem (extraído de \url{https://en.wikipedia.org/wiki/Kernel_(image_processing)})} 30 | \end{figure} 31 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/KullbackLeibler.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[textonly] (p1prob) {$\begin{bmatrix}0.8\\0.2\end{bmatrix}$}; 11 | \node[textonly, right=30pt of p1prob] (q1prob) {$\begin{bmatrix}0.5\\0.5\end{bmatrix}$}; 12 | \node[textonly, above=1pt of p1prob] (p1) {$\vect{p}$}; 13 | \node[textonly, above=1pt of q1prob] (q1) {$\vect{q}$}; 14 | \node[textonly, right=20pt of q1prob] (p2prob) {$\begin{bmatrix}0.8\\0.2\end{bmatrix}$}; 15 | \node[textonly, right=30pt of p2prob] (q2prob) {$\begin{bmatrix}0.88\\0.12\end{bmatrix}$}; 16 | \node[textonly, above=1pt of p2prob] (p2) {$\vect{p}^{\prime}$}; 17 | \node[textonly, above=1pt of q2prob] (q2) {$\vect{q}^{\prime}$}; 18 | 19 | 20 | \node[textonly, below right=20pt and -15pt of p1prob] (Dkl1) {$D_{KL}(\vect{p}||\vect{q}) = 0.28$}; 21 | \node[textonly, below right=20pt and -15pt of p2prob] (Dkl2) {$D_{KL}(\vect{p}^{\prime}||\vect{q}^{\prime}) = 0.04$}; 22 | \node[textonly, below=20pt of Dkl1] (inv1) {}; 23 | \node[textonly, right=-40pt of inv1] (Dklequation) {{\Large$D_{KL}(\vect{p}||\vect{q}) = \sum_{i} \vect{p}_i\log\frac{\vect{p}_i}{\vect{q}_i}$}}; 24 | 25 | 26 | 27 | % edges 28 | \draw[orange!120, line width=1mm] (Dkl1) to [out=150,in=-90] (p1prob); 29 | \draw[orange!120, line width=1mm] (Dkl1) to [out=150,in=-100] (q1prob); 30 | 31 | \draw[orange!120, line width=1mm] (Dkl2) to [out=150,in=-90] (p2prob); 32 | \draw[orange!120, line width=1mm] (Dkl2) to [out=150,in=-100] (q2prob); 33 | 34 | \end{tikzpicture} 35 | } % scalebox 36 | \end{figure} 37 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/NN.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.5}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (h) {$\vect{h}$}; 9 | \node[op, above=30pt of h] (y) {$\hat{\vect{y}}$}; 10 | \node[op, below=30pt of h] (x) {$\vect{x}$}; 11 | \node[textonly, right=76pt of h] (inv) {}; 12 | \node[textonly, above=16pt of inv] (f1) {$\hat{y} = f^{(2)}(f^{(1)}(\vect{x}; \vect{W}_1); \vect{W}_2)$}; 13 | \node[textonly, below=10pt of f1] (f2) {$\hat{y} = softmax( \vect{W}_2 (\sigma(\vect{W}_1\vect{x})))$}; 14 | 15 | 16 | % edges 17 | \path[tedge] (x) edge [out=90,in=-90] node[right] {$\vect{W}_{1}$} (h); 18 | \path[tedge] (h) edge [out=90,in=-90] node[right] {$\vect{W}_{2}$} (y); 19 | 20 | \end{tikzpicture} 21 | } % scalebox 22 | \end{figure} 23 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/OldNN1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input layer 10 | \node[op] (x5) {$x_5$}; 11 | \node[op, above=2.5pt of x5] (x4) {$x_4$}; 12 | \node[op, above=2.5pt of x4] (x3) {$x_3$}; 13 | \node[op, above=2.5pt of x3] (x2) {$x_2$}; 14 | \node[op, above=2.5pt of x2] (x1) {$x_1$}; 15 | \node[op, below=2.5pt of x5] (x6) {$x_6$}; 16 | \node[op, below=2.5pt of x6] (x7) {$x_7$}; 17 | \node[op, below=2.5pt of x7] (x8) {$x_8$}; 18 | \node[op, below=2.5pt of x8] (x9) {$x_9$}; 19 | \node[op, below=2.5pt of x9] (x10) {$x_{10}$}; 20 | 21 | % hidden layer 22 | \node[op, right=130pt of x5] (v2) {$v_2$}; 23 | \node[op, below=2.5pt of v2] (v3) {$v_3$}; 24 | \node[op, below=2.5pt of v3] (v4) {$v_4$}; 25 | \node[op, above=2.5pt of v2] (v1) {$v_1$}; 26 | 27 | \node[op, right=40pt of v2] (h2) {$h_2$}; 28 | \node[op, below=2.5pt of h2] (h3) {$h_3$}; 29 | \node[op, below=2.5pt of h3] (h4) {$h_4$}; 30 | \node[op, above=2.5pt of h2] (h1) {$h_1$}; 31 | 32 | 33 | % output layer 34 | \node[op, right=60pt of h2] (z1) {$z_1$}; 35 | \node[op, right=60pt of h3] (z2) {$z_2$}; 36 | 37 | \node[op, right=50pt of z1] (y1) {$\hat{y}_1$}; 38 | \node[op, right=50pt of z2] (y2) {$\hat{y}_2$}; 39 | 40 | 41 | % edges input layer to hidden 42 | \path[tedge] (x1) -- (v1); 43 | \path[tedge] (x1) -- (v2); 44 | \path[tedge] (x1) -- (v3); 45 | \path[tedge] (x1) -- (v4); 46 | 47 | \path[tedge] (x2) -- (v1); 48 | \path[tedge] (x2) -- (v2); 49 | \path[tedge] (x2) -- (v3); 50 | \path[tedge] (x2) -- (v4); 51 | 52 | \path[tedge] (x3) -- (v1); 53 | \path[tedge] (x3) -- (v2); 54 | \path[tedge] (x3) -- (v3); 55 | \path[tedge] (x3) -- (v4); 56 | 57 | \path[tedge] (x4) -- (v1); 58 | \path[tedge] (x4) -- (v2); 59 | \path[tedge] (x4) -- (v3); 60 | \path[tedge] (x4) -- (v4); 61 | 62 | \path[tedge] (x5) -- (v1); 63 | \path[tedge] (x5) -- (v2); 64 | \path[tedge] (x5) -- (v3); 65 | \path[tedge] (x5) -- (v4); 66 | 67 | \path[tedge] (x6) -- (v1); 68 | \path[tedge] (x6) -- (v2); 69 | \path[tedge] (x6) -- (v3); 70 | \path[tedge] (x6) -- (v4); 71 | 72 | \path[tedge] (x7) -- (v1); 73 | \path[tedge] (x7) -- (v2); 74 | \path[tedge] (x7) -- (v3); 75 | \path[tedge] (x7) -- (v4); 76 | 77 | \path[tedge] (x8) -- (v1); 78 | \path[tedge] (x8) -- (v2); 79 | \path[tedge] (x8) -- (v3); 80 | \path[tedge] (x8) -- (v4); 81 | 82 | \path[tedge] (x9) -- (v1); 83 | \path[tedge] (x9) -- (v2); 84 | \path[tedge] (x9) -- (v3); 85 | \path[tedge] (x9) -- (v4); 86 | 87 | \path[tedge] (x10) -- (v1); 88 | \path[tedge] (x10) -- (v2); 89 | \path[tedge] (x10) -- (v3); 90 | \path[tedge] (x10) -- (v4); 91 | 92 | % edges hidden to hidden 93 | \path[tedge] (v1) edge node[above=1pt] {{\Large$\sigma$}} (h1) ; 94 | \path[tedge] (v2) edge node[above=1pt] {{\Large$\sigma$}} (h2) ; 95 | \path[tedge] (v3) edge node[above=1pt] {{\Large$\sigma$}} (h3) ; 96 | \path[tedge] (v4) edge node[above=1pt] {{\Large$\sigma$}} (h4) ; 97 | 98 | % edges hidden to output 99 | \path[tedge] (h1) -- (z1); 100 | \path[tedge] (h1) -- (z2); 101 | 102 | \path[tedge] (h2) -- (z1); 103 | \path[tedge] (h2) -- (z2); 104 | 105 | \path[tedge] (h3) -- (z1); 106 | \path[tedge] (h3) -- (z2); 107 | 108 | \path[tedge] (h4) -- (z1); 109 | \path[tedge] (h4) -- (z2); 110 | 111 | % edges output to output 112 | \path[tedge] (z1) edge node[above=1pt] {{\Large softmax}} (y1) ; 113 | \path[tedge] (z2) edge node[above=1pt] {{\Large softmax}} (y2) ; 114 | 115 | 116 | 117 | 118 | \end{tikzpicture} 119 | } % scalebox 120 | \end{figure} 121 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/OldNN2.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input layer 10 | \node[op] (x5) {}; 11 | \node[op, above=2.5pt of x5] (x4) {}; 12 | \node[op, above=2.5pt of x4] (x3) {}; 13 | \node[op, above=2.5pt of x3] (x2) {}; 14 | \node[op, above=2.5pt of x2] (x1) {}; 15 | \node[op, below=2.5pt of x5] (x6) {}; 16 | \node[op, below=2.5pt of x6] (x7) {}; 17 | \node[op, below=2.5pt of x7] (x8) {}; 18 | \node[op, below=2.5pt of x8] (x9) {}; 19 | \node[op, below=2.5pt of x9] (x10) {}; 20 | 21 | \node[textonly, above=2.5pt of x1] (x) {{\LARGE$\vect{x}$}}; 22 | \node[textonly, below=4.5pt of x10] (input) {{\large Input layer}}; 23 | 24 | % hidden layer 25 | \node[op, right=130pt of x5] (h2) {}; 26 | \node[op, below=2.5pt of h2] (h3) {}; 27 | \node[op, below=2.5pt of h3] (h4) {}; 28 | \node[op, above=2.5pt of h2] (h1) {}; 29 | 30 | \node[textonly, above=2.5pt of h1] (h) {{\LARGE$\vect{h}$}}; 31 | \node[textonly, below=4.5pt of h4] (hidden) {{\large Hidden layer}}; 32 | 33 | % output layer 34 | \node[op, right=60pt of h2] (y1) {}; 35 | \node[op, right=60pt of h3] (y2) {}; 36 | 37 | \node[textonly, above=2.5pt of y1] (y) {{\LARGE$\hat{\vect{y}}$}}; 38 | \node[textonly, below=4.5pt of y2] (output) {{\large Output layer}}; 39 | 40 | % edges input layer to hidden 41 | \path[tedge] (x1) -- (h1); 42 | \path[tedge] (x1) -- (h2); 43 | \path[tedge] (x1) -- (h3); 44 | \path[tedge] (x1) -- (h4); 45 | 46 | \path[tedge] (x2) -- (h1); 47 | \path[tedge] (x2) -- (h2); 48 | \path[tedge] (x2) -- (h3); 49 | \path[tedge] (x2) -- (h4); 50 | 51 | \path[tedge] (x3) -- (h1); 52 | \path[tedge] (x3) -- (h2); 53 | \path[tedge] (x3) -- (h3); 54 | \path[tedge] (x3) -- (h4); 55 | 56 | \path[tedge] (x4) -- (h1); 57 | \path[tedge] (x4) -- (h2); 58 | \path[tedge] (x4) -- (h3); 59 | \path[tedge] (x4) -- (h4); 60 | 61 | \path[tedge] (x5) -- (h1); 62 | \path[tedge] (x5) -- (h2); 63 | \path[tedge] (x5) -- (h3); 64 | \path[tedge] (x5) -- (h4); 65 | 66 | \path[tedge] (x6) -- (h1); 67 | \path[tedge] (x6) -- (h2); 68 | \path[tedge] (x6) -- (h3); 69 | \path[tedge] (x6) -- (h4); 70 | 71 | \path[tedge] (x7) -- (h1); 72 | \path[tedge] (x7) -- (h2); 73 | \path[tedge] (x7) -- (h3); 74 | \path[tedge] (x7) -- (h4); 75 | 76 | \path[tedge] (x8) -- (h1); 77 | \path[tedge] (x8) -- (h2); 78 | \path[tedge] (x8) -- (h3); 79 | \path[tedge] (x8) -- (h4); 80 | 81 | \path[tedge] (x9) -- (h1); 82 | \path[tedge] (x9) -- (h2); 83 | \path[tedge] (x9) -- (h3); 84 | \path[tedge] (x9) -- (h4); 85 | 86 | \path[tedge] (x10) -- (h1); 87 | \path[tedge] (x10) -- (h2); 88 | \path[tedge] (x10) -- (h3); 89 | \path[tedge] (x10) -- (h4); 90 | 91 | % edges hidden to output 92 | \path[tedge] (h1) -- (y1); 93 | \path[tedge] (h1) -- (y2); 94 | 95 | \path[tedge] (h2) -- (y1); 96 | \path[tedge] (h2) -- (y2); 97 | 98 | \path[tedge] (h3) -- (y1); 99 | \path[tedge] (h3) -- (y2); 100 | 101 | \path[tedge] (h4) -- (y1); 102 | \path[tedge] (h4) -- (y2); 103 | 104 | 105 | 106 | \end{tikzpicture} 107 | } % scalebox 108 | \end{figure} 109 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/OldNN3.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input layer 10 | \node[op] (x5) {}; 11 | \node[op, above=2.5pt of x5] (x4) {}; 12 | \node[op, above=2.5pt of x4] (x3) {}; 13 | \node[op, above=2.5pt of x3] (x2) {}; 14 | \node[op, above=2.5pt of x2] (x1) {}; 15 | \node[op, below=2.5pt of x5] (x6) {}; 16 | \node[op, below=2.5pt of x6] (x7) {}; 17 | \node[op, below=2.5pt of x7] (x8) {}; 18 | \node[op, below=2.5pt of x8] (x9) {}; 19 | \node[op, below=2.5pt of x9] (x10) {}; 20 | 21 | \node[textonly, above=2.5pt of x1] (x) {{\LARGE$\vect{x}$}}; 22 | \node[textonly, below=4.5pt of x10] (input) {{\large Input layer}}; 23 | 24 | % hidden layer 25 | \node[op, right=130pt of x5] (h2) {}; 26 | \node[op, below=2.5pt of h2] (h3) {}; 27 | \node[op, below=2.5pt of h3] (h4) {}; 28 | \node[op, above=2.5pt of h2] (h1) {}; 29 | 30 | \node[textonly, above=2.5pt of h1] (h) {{\LARGE$\vect{h}$}}; 31 | \node[textonly, below=4.5pt of h4] (hidden) {{\large Hidden layer}}; 32 | 33 | % output layer 34 | \node[op, right=60pt of h2] (y1) {}; 35 | \node[op, right=60pt of h3] (y2) {}; 36 | 37 | \node[textonly, above=2.5pt of y1] (y) {{\LARGE$\hat{\vect{y}}$}}; 38 | \node[textonly, below=4.5pt of y2] (output) {{\large Output layer}}; 39 | 40 | % edges input layer to hidden 41 | \draw[line width=0.5mm] (x1) -- (hidden); 42 | \draw[line width=0.5mm] (x10) -- (h); 43 | 44 | % edges hidden to output 45 | \draw [line width=0.5mm] (h1) -- (output); 46 | \draw [line width=0.5mm] (h4) -- (y); 47 | 48 | \end{tikzpicture} 49 | } % scalebox 50 | \end{figure} 51 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/ReLU.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.0}{ 5 | \begin{tikzpicture} 6 | \begin{axis}% 7 | [ 8 | grid=major, 9 | xmin=-6, 10 | xmax=6, 11 | axis x line=bottom, 12 | ytick={0}, 13 | ymax=10, 14 | axis y line=middle, 15 | ] 16 | \addplot% 17 | [ orange!180, 18 | ultra thick, 19 | % blue,% 20 | mark=none, 21 | samples=100, 22 | domain=-6:6, 23 | ] 24 | (x,{max(x,0)}); 25 | \end{axis} 26 | \node[textonly] (relu) at (8.95,2.8) {{\Large$g(x) = max\{0,x\}$}}; 27 | \end{tikzpicture} 28 | } % scalebox 29 | \end{figure} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Sigmoid.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.0}{ 5 | \begin{tikzpicture} 6 | \begin{axis}% 7 | [ 8 | grid=major, 9 | xmin=-6, 10 | xmax=6, 11 | axis x line=bottom, 12 | ytick={0,.5,1}, 13 | ymax=1, 14 | axis y line=middle, 15 | ] 16 | \addplot% 17 | [ orange!180, 18 | ultra thick, 19 | % blue,% 20 | mark=none, 21 | samples=100, 22 | domain=-6:6, 23 | ] 24 | (x,{1/(1+exp(-x))}); 25 | \end{axis} 26 | \node[textonly] (sigmoid) at (8.75,2.8) {{\Large$\sigma(x) = \frac{1}{1 + e^{-x}}$}}; 27 | \end{tikzpicture} 28 | } % scalebox 29 | \end{figure} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/Softmax.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[textonly] (logits) {$\begin{bmatrix}3.82\\5.35\\1.44\\-1.26\\2.71 \\1.98\end{bmatrix}$}; 11 | \node[textonly, right=60pt of logits] (softmax) {$\begin{bmatrix}0.16115195\\0.74422819\\0.01491471\\0.00100235\\0.05310907 \\0.02559374\end{bmatrix}$}; 12 | \node[textonly, below=15pt of logits] (inv1) {}; 13 | \node[textonly, right=10pt of inv1] (softmax_eq) {{\Large$softmax(\vect{x})_i = \frac{e^{\vect{x}_i}}{\sum_j e^{\vect{x}_j}}$}}; 14 | 15 | 16 | 17 | % edges 18 | \path[tedge] (logits) edge node[above=1pt] {{\Large softmax}} (softmax); 19 | \end{tikzpicture} 20 | } % scalebox 21 | \end{figure} 22 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/b1_path1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op2, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op2, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op2, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op2, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op2, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op2, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op2, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op2, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | %edges 52 | \path[tedge] (w11) -- (mult1); 53 | \path[tedge] (x1) -- (mult1); 54 | \path[tedge] (w12) -- (mult2); 55 | \path[tedge] (x2) -- (mult2); 56 | \path[tedge] (w21) -- (mult3); 57 | \path[tedge] (x11) -- (mult3); 58 | \path[tedge] (w22) -- (mult4); 59 | \path[tedge] (x22) -- (mult4); 60 | 61 | \path[tedge] (mult1) -- (sum1); 62 | \path[tedge] (mult2) -- (sum1); 63 | \path[tedge] (mult3) -- (sum2); 64 | \path[tedge] (mult4) -- (sum2); 65 | \path[tedge] (sum1) -- (sum3); 66 | \path[tedge] (b1) -- (sum3); 67 | \path[tedge] (sum2) -- (sum4); 68 | \path[tedge] (b2) -- (sum4); 69 | 70 | \path[tedge] (sum3) -- (exp1); 71 | \path[tedge] (sum4) -- (exp2); 72 | \path[tedge] (exp1) -- (sum5); 73 | \path[tedge] (exp2) -- (sum5); 74 | \path[tedge] (exp1) -- (div1); 75 | \path[tedge] (exp2) -- (div2); 76 | \path[tedge] (sum5) -- (div1); 77 | \path[tedge] (sum5) -- (div2); 78 | 79 | 80 | \path[tedge] (div1) -- (log1); 81 | \path[tedge] (div2) -- (log2); 82 | \path[tedge] (log1) -- (mult5); 83 | \path[tedge] (y1) -- (mult5); 84 | \path[tedge] (log2) -- (mult6); 85 | \path[tedge] (y2) -- (mult6); 86 | \path[tedge] (mult5) -- (sum6); 87 | \path[tedge] (mult6) -- (sum6); 88 | \path[tedge] (sum6) -- (minus1); 89 | 90 | \end{tikzpicture} 91 | } % scalebox 92 | \end{figure} 93 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/b1_path1_grad.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.8}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % multiplication 10 | \node[op] (z1) {$z_1$}; 11 | \node[op, above left=25pt and 20pt of z1] (b1) {$b_1$}; 12 | 13 | % exp 14 | \node[op, right=25pt of z1] (exp1) {$h_1$}; 15 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 16 | \node[op, below left=25pt and 10pt of div1] (H) {$H$}; 17 | 18 | % log 19 | \node[op, right=25pt of div1] (log1) {$\log$}; 20 | \node[op, right=25pt of log1] (mult5) {$*$}; 21 | \node[op, right=25pt of mult5] (sum6) {$+$}; 22 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 23 | \node[op, below left=25pt and 10pt of mult5] (y1) {$y_1$}; 24 | \node[textonly, below left=55pt and 10pt of sum6] (dots) {{\LARGE$\dots$}}; 25 | 26 | %gradients 1 27 | \node[gradient, above=10pt of sum6] (dsum6) {$-1$}; 28 | \node[gradient, above=10pt of mult5] (dmult5) {$1$}; 29 | \node[gradient, above=10pt of log1] (dlog1) {$y_1$}; 30 | \node[gradient, above=10pt of div1] (ddiv1) {$\frac{H}{h_1}$}; 31 | \node[gradient, above=10pt of exp1] (dexp1) {$\frac{1}{H}$}; 32 | \node[gradient, above=10pt of z1] (dz1) {$h_1$}; 33 | \node[gradient, above=10pt of b1] (db1) {$1$}; 34 | 35 | %gradients 2 36 | \node[gradient2, above=10pt of minus1] (dLdL) {$1$}; 37 | \node[gradient2, above right =30pt and 10pt of dsum6] (dLdLpp) {$-1$}; 38 | \node[gradient2, above left=30pt and 10pt of dsum6] (dLdmult5) {$-1$}; 39 | \node[gradient2, left=25pt of dLdmult5] (dLlog1) {$-y_1$}; 40 | \node[gradient2, left=25pt of dLlog1] (dLdiv1) {$-y_1\frac{H}{h_1}$}; 41 | \node[gradient2, left=25pt of dLdiv1] (dLdexp1) {$\frac{-y_1}{h_1}$}; 42 | \node[gradient2, left=25pt of dLdexp1] (dLdz1) {$-y_1$}; 43 | \node[gradient2, above=35pt of db1] (dLdb1) {$-y_1$}; 44 | 45 | %edges 46 | \path[tedge] (b1) -- (z1); 47 | \path[tedge] (z1) -- (exp1); 48 | \path[tedge] (exp1) -- (div1); 49 | \path[tedge] (H) -- (div1); 50 | \path[tedge] (div1) -- (log1); 51 | \path[tedge] (log1) -- (mult5); 52 | \path[tedge] (y1) -- (mult5); 53 | \path[tedge] (mult5) -- (sum6); 54 | \path[tedge] (dots) -- (sum6); 55 | \path[tedge] (sum6) -- (minus1); 56 | 57 | %edges gradient 1 58 | % \path[tedge] (b1) -- (db1); 59 | % \path[tedge] (z1) -- (dz1); 60 | % \path[tedge] (exp1) -- (dexp1); 61 | % \path[tedge] (div1) -- (ddiv1); 62 | % \path[tedge] (log1) -- (dlog1); 63 | % \path[tedge] (mult5) -- (dmult5); 64 | % \path[tedge] (sum6) -- (dsum6); 65 | 66 | %edges gradient 2 67 | \path[tedge] (dsum6) -- (dLdLpp); 68 | \path[tedge] (dLdL) -- (dLdLpp); 69 | \path[tedge] (dLdLpp) -- (dLdmult5); 70 | \path[tedge] (dmult5) -- (dLdmult5); 71 | \path[tedge] (dLdmult5) -- (dLlog1); 72 | \path[tedge] (dlog1) -- (dLlog1); 73 | \path[tedge] (ddiv1) -- (dLdiv1); 74 | \path[tedge] (dLlog1) -- (dLdiv1); 75 | \path[tedge] (dexp1) -- (dLdexp1); 76 | \path[tedge] (dLdiv1) -- (dLdexp1); 77 | \path[tedge] (dz1) -- (dLdz1); 78 | \path[tedge] (dLdexp1) -- (dLdz1); 79 | \path[tedge] (db1) -- (dLdb1); 80 | \path[tedge] (dLdz1) -- (dLdb1); 81 | 82 | 83 | \end{tikzpicture} 84 | } % scalebox 85 | \end{figure} 86 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/b1_path2.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op2, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op2, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op2, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op2, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op2, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op2, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op2, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op2, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op2, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/b1_path2_grad.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % multiplication 10 | \node[op] (z1) {$z_1$}; 11 | \node[op, above left=25pt and 20pt of z1] (b1) {$b_1$}; 12 | 13 | % exp 14 | \node[op, right=25pt of z1] (exp1) {$h_1$}; 15 | \node[op, right=25pt of exp1] (H) {$H$}; 16 | \node[op, right=35pt of H] (div1) {$\hat{y}_1$}; 17 | 18 | % log 19 | \node[op, right=25pt of div1] (log1) {$\log$}; 20 | \node[op, right=25pt of log1] (mult5) {$*$}; 21 | \node[op, below left=25pt and 10pt of mult5] (y1) {$y_1$}; 22 | \node[op, right=25pt of mult5] (sum6) {$+$}; 23 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 24 | \node[textonly, below left=55pt and 10pt of sum6] (dots) {{\LARGE$\dots$}}; 25 | 26 | %gradients 1 27 | \node[gradient, above=10pt of sum6] (dsum6) {$-1$}; 28 | \node[gradient, above=10pt of mult5] (dmult5) {$1$}; 29 | \node[gradient, above=10pt of log1] (dlog1) {$y_1$}; 30 | \node[gradient, above=10pt of div1] (ddiv1) {$\frac{H}{h_1}$}; 31 | \node[gradient, above=10pt of H] (dH) {$-\frac{h_1}{H^2}$}; 32 | \node[gradient, above=10pt of exp1] (dexp1) {$1$}; 33 | \node[gradient, above=10pt of z1] (dz1) {$h_1$}; 34 | \node[gradient, above=10pt of b1] (db1) {$1$}; 35 | 36 | %gradients 2 37 | \node[gradient2, above=10pt of minus1] (dLdL) {$1$}; 38 | \node[gradient2, above right =30pt and 10pt of dsum6] (dLdLpp) {$-1$}; 39 | \node[gradient2, above left=30pt and 10pt of dsum6] (dLdmult5) {$-1$}; 40 | \node[gradient2, left=25pt of dLdmult5] (dLlog1) {$-y_1$}; 41 | \node[gradient2, left=25pt of dLlog1] (dLdiv1) {$-y_1\frac{H}{h_1}$}; 42 | \node[gradient2, left=25pt of dLdiv1] (dLdH) {$\frac{y_1}{H}$}; 43 | \node[gradient2, left=25pt of dLdH] (dLdexp1) {$\frac{y_1}{H}$}; 44 | \node[gradient2, left=25pt of dLdexp1] (dLdz1) {$y_1\frac{h_1}{H}$}; 45 | \node[gradient2, above=35pt of db1] (dLdb1) {$y_1\frac{h_1}{H}$}; 46 | 47 | %edges 48 | \path[tedge] (b1) -- (z1); 49 | \path[tedge] (z1) -- (exp1); 50 | \path[tedge] (exp1) -- (H); 51 | \path[tedge] (H) -- (div1); 52 | 53 | \path[tedge] (div1) -- (log1); 54 | \path[tedge] (log1) -- (mult5); 55 | \path[tedge] (y1) -- (mult5); 56 | \path[tedge] (mult5) -- (sum6); 57 | \path[tedge] (dots) -- (sum6); 58 | \path[tedge] (sum6) -- (minus1); 59 | 60 | %edges gradient 1 61 | % \path[tedge] (b1) -- (db1); 62 | % \path[tedge] (z1) -- (dz1); 63 | % \path[tedge] (exp1) -- (dexp1); 64 | % \path[tedge] (div1) -- (ddiv1); 65 | % \path[tedge] (H) -- (dH); 66 | % \path[tedge] (log1) -- (dlog1); 67 | % \path[tedge] (mult5) -- (dmult5); 68 | % \path[tedge] (sum6) -- (dsum6); 69 | 70 | %edges gradient 2 71 | \path[tedge] (dsum6) -- (dLdLpp); 72 | \path[tedge] (dLdL) -- (dLdLpp); 73 | \path[tedge] (dLdLpp) -- (dLdmult5); 74 | \path[tedge] (dmult5) -- (dLdmult5); 75 | \path[tedge] (dLdmult5) -- (dLlog1); 76 | \path[tedge] (dlog1) -- (dLlog1); 77 | \path[tedge] (ddiv1) -- (dLdiv1); 78 | \path[tedge] (dLlog1) -- (dLdiv1); 79 | \path[tedge] (dexp1) -- (dLdexp1); 80 | \path[tedge] (dH) -- (dLdH); 81 | \path[tedge] (dLdiv1) -- (dLdH); 82 | \path[tedge] (dLdH) -- (dLdexp1); 83 | \path[tedge] (dz1) -- (dLdz1); 84 | \path[tedge] (dLdexp1) -- (dLdz1); 85 | \path[tedge] (db1) -- (dLdb1); 86 | \path[tedge] (dLdz1) -- (dLdb1); 87 | 88 | 89 | \end{tikzpicture} 90 | } % scalebox 91 | \end{figure} 92 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/b1_path3.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op2, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op2, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op2, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op2, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op2, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op2, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op2, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op2, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op2, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/b1_path3_grad.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % multiplication 10 | \node[op] (z1) {$z_1$}; 11 | \node[op, above left=25pt and 20pt of z1] (b1) {$b_1$}; 12 | 13 | % exp 14 | \node[op, right=25pt of z1] (exp1) {$h_1$}; 15 | \node[op, right=25pt of exp1] (H) {$H$}; 16 | \node[op, right=35pt of H] (div1) {$\hat{y}_2$}; 17 | 18 | % log 19 | \node[op, right=25pt of div1] (log1) {$\log$}; 20 | \node[op, right=25pt of log1] (mult5) {$*$}; 21 | \node[op, below left=25pt and 10pt of mult5] (y2) {$y_2$}; 22 | \node[op, right=25pt of mult5] (sum6) {$+$}; 23 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 24 | \node[textonly, below left=55pt and 10pt of sum6] (dots) {{\LARGE$\dots$}}; 25 | 26 | %gradients 1 27 | \node[gradient, above=10pt of sum6] (dsum6) {$-1$}; 28 | \node[gradient, above=10pt of mult5] (dmult5) {$1$}; 29 | \node[gradient, above=10pt of log1] (dlog1) {$y_2$}; 30 | \node[gradient, above=10pt of div1] (ddiv1) {$\frac{H}{h_1}$}; 31 | \node[gradient, above=10pt of H] (dH) {$-\frac{h_1}{H^2}$}; 32 | \node[gradient, above=10pt of exp1] (dexp1) {$1$}; 33 | \node[gradient, above=10pt of z1] (dz1) {$h_1$}; 34 | \node[gradient, above=10pt of b1] (db1) {$1$}; 35 | 36 | %gradients 2 37 | \node[gradient2, above=10pt of minus1] (dLdL) {$1$}; 38 | \node[gradient2, above right =30pt and 10pt of dsum6] (dLdLpp) {$-1$}; 39 | \node[gradient2, above left=30pt and 10pt of dsum6] (dLdmult5) {$-1$}; 40 | \node[gradient2, left=25pt of dLdmult5] (dLlog1) {$-y_2$}; 41 | \node[gradient2, left=25pt of dLlog1] (dLdiv1) {$-y_2\frac{H}{h_1}$}; 42 | \node[gradient2, left=25pt of dLdiv1] (dLdH) {$\frac{y_2}{H}$}; 43 | \node[gradient2, left=25pt of dLdH] (dLdexp1) {$\frac{y_2}{H}$}; 44 | \node[gradient2, left=25pt of dLdexp1] (dLdz1) {$y_2\frac{h_1}{H}$}; 45 | \node[gradient2, above=35pt of db1] (dLdb1) {$y_2\frac{h_1}{H}$}; 46 | 47 | %edges 48 | \path[tedge] (b1) -- (z1); 49 | \path[tedge] (z1) -- (exp1); 50 | \path[tedge] (exp1) -- (H); 51 | \path[tedge] (H) -- (div1); 52 | 53 | \path[tedge] (div1) -- (log1); 54 | \path[tedge] (log1) -- (mult5); 55 | \path[tedge] (y2) -- (mult5); 56 | \path[tedge] (mult5) -- (sum6); 57 | \path[tedge] (dots) -- (sum6); 58 | \path[tedge] (sum6) -- (minus1); 59 | 60 | %edges gradient 1 61 | % \path[tedge] (b1) -- (db1); 62 | % \path[tedge] (z1) -- (dz1); 63 | % \path[tedge] (exp1) -- (dexp1); 64 | % \path[tedge] (div1) -- (ddiv1); 65 | % \path[tedge] (H) -- (dH); 66 | % \path[tedge] (log1) -- (dlog1); 67 | % \path[tedge] (mult5) -- (dmult5); 68 | % \path[tedge] (sum6) -- (dsum6); 69 | 70 | %edges gradient 2 71 | \path[tedge] (dsum6) -- (dLdLpp); 72 | \path[tedge] (dLdL) -- (dLdLpp); 73 | \path[tedge] (dLdLpp) -- (dLdmult5); 74 | \path[tedge] (dmult5) -- (dLdmult5); 75 | \path[tedge] (dLdmult5) -- (dLlog1); 76 | \path[tedge] (dlog1) -- (dLlog1); 77 | \path[tedge] (ddiv1) -- (dLdiv1); 78 | \path[tedge] (dLlog1) -- (dLdiv1); 79 | \path[tedge] (dexp1) -- (dLdexp1); 80 | \path[tedge] (dH) -- (dLdH); 81 | \path[tedge] (dLdiv1) -- (dLdH); 82 | \path[tedge] (dLdH) -- (dLdexp1); 83 | \path[tedge] (dz1) -- (dLdz1); 84 | \path[tedge] (dLdexp1) -- (dLdz1); 85 | \path[tedge] (db1) -- (dLdb1); 86 | \path[tedge] (dLdz1) -- (dLdb1); 87 | 88 | 89 | \end{tikzpicture} 90 | } % scalebox 91 | \end{figure} 92 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/batch_example_values.tex: -------------------------------------------------------------------------------- 1 | {\large\textbf{batch\_size = 3} 2 | }\begin{figure}[ht!] 3 | \centering 4 | 5 | \scalebox{1.3}{ 6 | \begin{tikzpicture}[auto] 7 | 8 | % operations ============================= 9 | 10 | % nodes 11 | \node[textonly] (x1) {$\begin{bmatrix}0.2\\0.7\end{bmatrix}$}; 12 | \node[textonly, below=30pt of x1] (y1) {$\begin{bmatrix}1\\0\end{bmatrix}$}; 13 | \node[textonly, right=10pt of x1] (x2) {$\begin{bmatrix}0.8\\0.1\end{bmatrix}$}; 14 | \node[textonly, below=30pt of x2] (y2) {$\begin{bmatrix}0\\1\end{bmatrix}$}; 15 | \node[textonly, right=10pt of x2] (x3) {$\begin{bmatrix}0.3\\0.5\end{bmatrix}$}; 16 | \node[textonly, below=30pt of x3] (y3) {$\begin{bmatrix}1\\0\end{bmatrix}$}; 17 | 18 | 19 | \node[textonly, above=1pt of x1] (x1name) {$\vect{x}_1$}; 20 | \node[textonly, above=1pt of y1] (y1name) {$\vect{y}_1$}; 21 | \node[textonly, above=1pt of x2] (x2name) {$\vect{x}_2$}; 22 | \node[textonly, above=1pt of y2] (y2name) {$\vect{y}_2$}; 23 | \node[textonly, above=1pt of x3] (x3name) {$\vect{x}_3$}; 24 | \node[textonly, above=1pt of y3] (y3name) {$\vect{y}_3$}; 25 | 26 | 27 | \end{tikzpicture} 28 | } % scalebox 29 | \end{figure} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/batch_graph.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.8}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | \begin{scope}[xshift=0cm,yshift=0cm] 10 | \begin{scope}[xshift=0cm,yshift=0cm] 11 | \node[placeholder] (x1) at (1,3.5) {$\begin{bmatrix}0.3\\0.5\end{bmatrix}$}; 12 | \end{scope} 13 | \begin{scope}[xshift=-0.6cm,yshift=-0.6cm] 14 | \node[placeholder] (x2) at (1,3.5) {$\begin{bmatrix}0.8\\0.1\end{bmatrix}$}; 15 | \end{scope} 16 | \begin{scope}[xshift=-1.2cm,yshift=-1.2cm] 17 | \node[placeholder] (x3) at (1,3.5) {$\begin{bmatrix}0.2\\0.7\end{bmatrix}$}; 18 | \end{scope} 19 | \end{scope} 20 | 21 | \begin{scope}[xshift=0cm,yshift=0cm] 22 | \begin{scope}[xshift=0cm,yshift=0cm] 23 | \node[placeholder] (y1) at (1,1) {$\begin{bmatrix}1\\0\end{bmatrix}$}; 24 | \end{scope} 25 | \begin{scope}[xshift=-0.6cm,yshift=-0.6cm] 26 | \node[placeholder] (y2) at (1,1) {$\begin{bmatrix}0\\1\end{bmatrix}$}; 27 | \end{scope} 28 | \begin{scope}[xshift=-1.2cm,yshift=-1.2cm] 29 | \node[placeholder] (y3) at (1,1) {$\begin{bmatrix}1\\0\end{bmatrix}$}; 30 | \end{scope} 31 | \end{scope} 32 | 33 | \begin{scope}[xshift=0cm,yshift=0cm] 34 | \begin{scope}[xshift=0cm,yshift=0cm] 35 | \node[op] (df1) at (7,1) {\large{L}}; 36 | \end{scope} 37 | \begin{scope}[xshift=-0.4cm,yshift=-0.4cm] 38 | \node[op] (df2) at (7,1) {\large{L}}; 39 | \end{scope} 40 | \begin{scope}[xshift=-0.8cm,yshift=-0.8cm] 41 | \node[op] (df3) at (7,1) {\large{L}}; 42 | \end{scope} 43 | \end{scope} 44 | 45 | 46 | \begin{scope}[xshift=0cm,yshift=0cm] 47 | \begin{scope}[xshift=0cm,yshift=0cm] 48 | \node[gradient2] (grad1) at (12,1) {$-0.26$}; 49 | \end{scope} 50 | \begin{scope}[xshift=-0.6cm,yshift=-0.6cm] 51 | \node[gradient2] (grad2) at (12,1) {$0.54$}; 52 | \end{scope} 53 | \begin{scope}[xshift=-1.2cm,yshift=-1.2cm] 54 | \node[gradient2] (grad3) at (12,1) {$-0.19$}; 55 | \end{scope} 56 | \end{scope} 57 | 58 | 59 | \node[textonly, right=10pt of x2] (inv1) {}; 60 | \node[textonly, right=10pt of y2] (inv2) {}; 61 | \node[textonly, left=10pt of df2] (inv3) {}; 62 | \node[textonly, right=10pt of df2] (inv4) {}; 63 | \node[textonly, left=10pt of grad2] (inv5) {}; 64 | 65 | %edges 66 | \path[tedge, orange!120, line width=1.5mm] (inv1) -- (inv3); 67 | \path[tedge, orange!120, line width=1.5mm] (inv2) -- (inv3); 68 | \path[tedge, green2!120, line width=1.5mm] (inv4) -- (inv5); 69 | 70 | \end{tikzpicture} 71 | } % scalebox 72 | \end{figure} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/chain_rule_nodes.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.0}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (nt) {$u_j$}; 9 | \node[op, above left=50pt of nt] (a) {$u_{j-1}$}; 10 | \node[op, below left=40pt of nt] (b) {$u_{j-2}$}; 11 | \node[op, right=40pt of nt] (ntp) {$u_{j+1}$}; 12 | \node[textonly, right=20pt of ntp] (ntpp) {$\dots$}; 13 | \node[op, right=40pt of ntpp] (nT) {$u_{n}$}; 14 | \node[gradient, above=15pt of nt] (dnt) {$\frac{\partial u_{j+1}}{\partial u_{j}}$}; 15 | \node[gradient2, above=15pt of ntp] (dntp) {$\frac{\partial u_{n}}{\partial u_{j+1}}$}; 16 | \node[gradient2, above=15pt of dnt] (ddnt) {$\frac{\partial u_{n}}{\partial u_{j}}$}; 17 | \node[gradient2, above=20pt of nT] (dLdL) {$\frac{\partial u_{n}}{\partial u_{n}}$}; 18 | \node[textonly, right=0.1pt of dLdL] {$=1$}; 19 | 20 | % edges ============================= 21 | \path[tedge] (a) -- (nt); 22 | \path[tedge] (b) -- (nt); 23 | \path[tedge] (nt) -- (ntp); 24 | \path[tedge] (nt) -- (dnt); 25 | \path[tedge] (ntp) -- (ntpp); 26 | \path[tedge] (ntpp) -- (nT); 27 | \path[tedge] (ntp) -- (dntp); 28 | \path[tedge] (dnt) -- (ddnt); 29 | \path[tedge] (dntp) -- (ddnt); 30 | 31 | \end{tikzpicture} 32 | } % scalebox 33 | \end{figure} 34 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/convnet_arch.tex: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture} 2 | \node (convlayer) at (0, 0) {CONV}; 3 | \node (poollayer) at (1.5, 0) {POOL}; 4 | \node (actvlayer) at (3, 0) {ReLU}; 5 | \node (otherlayer) at (4.5, 0) {(...)}; 6 | \node (fullyconnected) at (6, 0) {FC}; 7 | \draw [->, thin] (convlayer.east) -- (poollayer.west); 8 | \draw [->, thin] (poollayer.east) -- (actvlayer.west); 9 | \draw [->, thin] (actvlayer.east) -- (otherlayer.west); 10 | \draw [->, thin] (otherlayer.east) -- (fullyconnected.west); 11 | \end{tikzpicture} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/div.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.2}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (times) {$div$}; 9 | \node[op, above left=20pt of times] (a) {$a$}; 10 | \node[op, below left=20pt of times] (b) {$b$}; 11 | \node[gradient, above left=15pt and 20pt of a] (da) {$\frac{\partial f}{\partial a}$}; 12 | \node[gradient, below left=15pt and 20pt of b] (db) {$\frac{\partial f}{\partial b}$}; 13 | \node[textonly, right=0.1pt of da] {$=\frac{1}{b}$}; 14 | \node[textonly, right=0.1pt of db] {$=\frac{-a}{b^{2}}$}; 15 | \node[textonly, right=0.1pt of times] {$=f(a,b) =\frac{a}{b}$}; 16 | 17 | % edges 18 | \path[tedge] (a) -- (times); 19 | \path[tedge] (b) -- (times); 20 | \path[tedge] (b) -- (da); 21 | \path[tedge] (a) -- (db); 22 | \path[tedge] (b) -- (db); 23 | 24 | 25 | \end{tikzpicture} 26 | } % scalebox 27 | \end{figure} 28 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/examples_values.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.3}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node[textonly] (W) {$\begin{bmatrix}0.65 & 1.19\\0.69 & -0.92\end{bmatrix}$}; 11 | \node[textonly, right=40pt of W] (x) {$\begin{bmatrix}0.2\\0.7\end{bmatrix}$}; 12 | \node[textonly, below=30pt of W] (b) {$\begin{bmatrix}0\\0\end{bmatrix}$}; 13 | \node[textonly, below=30pt of x] (y) {$\begin{bmatrix}1\\0\end{bmatrix}$}; 14 | \node[textonly, left=1pt of W] (Wname) {$\vect{W}=$}; 15 | \node[textonly, left=1pt of x] (xname) {$\vect{x}=$}; 16 | \node[textonly, left=1pt of b] (bname) {$\vect{b}=$}; 17 | \node[textonly, left=1pt of y] (yname) {$\vect{y}=$}; 18 | 19 | 20 | \end{tikzpicture} 21 | } % scalebox 22 | \end{figure} 23 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/exp.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.5}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (times) {$exp$}; 9 | \node[op, left=20pt of times] (a) {$a$}; 10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$}; 11 | \node[textonly, right=0.1pt of da] {$=e^{a}$}; 12 | \node[textonly, right=0.1pt of times] {$=f(a) = e^{a}$}; 13 | 14 | % edges 15 | \path[tedge] (a) -- (times); 16 | \path[tedge] (a) -- (da); 17 | 18 | \end{tikzpicture} 19 | } % scalebox 20 | \end{figure} 21 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_0.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[state] (w11) {$0.65$}; 11 | \node[state, below=10pt of w11] (x1) {$0.2$}; 12 | \node[state, below=20pt of x1] (w12) {$1.19$}; 13 | \node[state, below=10pt of w12] (x2) {$0.7$}; 14 | 15 | \node[state, below=20pt of x2] (w21) {$0.69$}; 16 | \node[state, below=10pt of w21] (x11) {$0.2$}; 17 | \node[state, below=20pt of x11] (w22) {$-0.92$}; 18 | \node[state, below=10pt of w22] (x22) {$0.7$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | \end{tikzpicture} 106 | } % scalebox 107 | \end{figure} 108 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_10.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[state, right=25pt of div1] (log1) {$-0.207$}; 43 | \node[state, right=25pt of div2] (log2) {$-1.67$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[state, above left=25pt and 10pt of mult5] (y1) {$1$}; 47 | \node[state, below left=25pt and 10pt of mult6] (y2) {$0$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_11.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[state, right=25pt of log1] (mult5) {$-0.207$}; 45 | \node[state, right=25pt of log2] (mult6) {$0$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_12.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[state, below right=65pt and 15pt of mult5] (sum6) {$-0.207$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_13.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[state, right=25pt of sum6] (minus1) {$0.207$}; 50 | 51 | % gradient 52 | \visible<2->{\node[gradient, right=25pt of b1] (dLdb1) {$\frac{\partial L}{\partial b_1}$};} 53 | 54 | 55 | %edges 56 | \path[tedge] (w11) -- (mult1); 57 | \path[tedge] (x1) -- (mult1); 58 | \path[tedge] (w12) -- (mult2); 59 | \path[tedge] (x2) -- (mult2); 60 | \path[tedge] (w21) -- (mult3); 61 | \path[tedge] (x11) -- (mult3); 62 | \path[tedge] (w22) -- (mult4); 63 | \path[tedge] (x22) -- (mult4); 64 | 65 | \path[tedge] (mult1) -- (sum1); 66 | \path[tedge] (mult2) -- (sum1); 67 | \path[tedge] (mult3) -- (sum2); 68 | \path[tedge] (mult4) -- (sum2); 69 | \path[tedge] (sum1) -- (sum3); 70 | \path[tedge] (b1) -- (sum3); 71 | \path[tedge] (sum2) -- (sum4); 72 | \path[tedge] (b2) -- (sum4); 73 | 74 | \path[tedge] (sum3) -- (exp1); 75 | \path[tedge] (sum4) -- (exp2); 76 | \path[tedge] (exp1) -- (sum5); 77 | \path[tedge] (exp2) -- (sum5); 78 | \path[tedge] (exp1) -- (div1); 79 | \path[tedge] (exp2) -- (div2); 80 | \path[tedge] (sum5) -- (div1); 81 | \path[tedge] (sum5) -- (div2); 82 | 83 | 84 | \path[tedge] (div1) -- (log1); 85 | \path[tedge] (div2) -- (log2); 86 | \path[tedge] (log1) -- (mult5); 87 | \path[tedge] (y1) -- (mult5); 88 | \path[tedge] (log2) -- (mult6); 89 | \path[tedge] (y2) -- (mult6); 90 | \path[tedge] (mult5) -- (sum6); 91 | \path[tedge] (mult6) -- (sum6); 92 | \path[tedge] (sum6) -- (minus1); 93 | 94 | \visible<2->{\path[tedge] (b1) -- (dLdb1);} 95 | 96 | \end{tikzpicture} 97 | } % scalebox 98 | \end{figure} 99 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_14.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[state, right=25pt of sum6] (minus1) {$0.207$}; 50 | 51 | % gradient 52 | \node[gradient, right=25pt of b1] (dLdb1) {$-0.19$}; 53 | 54 | 55 | %edges 56 | \path[tedge] (w11) -- (mult1); 57 | \path[tedge] (x1) -- (mult1); 58 | \path[tedge] (w12) -- (mult2); 59 | \path[tedge] (x2) -- (mult2); 60 | \path[tedge] (w21) -- (mult3); 61 | \path[tedge] (x11) -- (mult3); 62 | \path[tedge] (w22) -- (mult4); 63 | \path[tedge] (x22) -- (mult4); 64 | 65 | \path[tedge] (mult1) -- (sum1); 66 | \path[tedge] (mult2) -- (sum1); 67 | \path[tedge] (mult3) -- (sum2); 68 | \path[tedge] (mult4) -- (sum2); 69 | \path[tedge] (sum1) -- (sum3); 70 | \path[tedge] (b1) -- (sum3); 71 | \path[tedge] (sum2) -- (sum4); 72 | \path[tedge] (b2) -- (sum4); 73 | 74 | \path[tedge] (sum3) -- (exp1); 75 | \path[tedge] (sum4) -- (exp2); 76 | \path[tedge] (exp1) -- (sum5); 77 | \path[tedge] (exp2) -- (sum5); 78 | \path[tedge] (exp1) -- (div1); 79 | \path[tedge] (exp2) -- (div2); 80 | \path[tedge] (sum5) -- (div1); 81 | \path[tedge] (sum5) -- (div2); 82 | 83 | 84 | \path[tedge] (div1) -- (log1); 85 | \path[tedge] (div2) -- (log2); 86 | \path[tedge] (log1) -- (mult5); 87 | \path[tedge] (y1) -- (mult5); 88 | \path[tedge] (log2) -- (mult6); 89 | \path[tedge] (y2) -- (mult6); 90 | \path[tedge] (mult5) -- (sum6); 91 | \path[tedge] (mult6) -- (sum6); 92 | \path[tedge] (sum6) -- (minus1); 93 | 94 | \path[tedge] (b1) -- (dLdb1); 95 | 96 | \end{tikzpicture} 97 | } % scalebox 98 | \end{figure} 99 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_2.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[state, below right=1pt and 20pt of w11] (mult1) {$0.13$}; 22 | \node[state, below right=1pt and 20pt of w12] (mult2) {$0.83$}; 23 | \node[state, below right=1pt and 20pt of w21] (mult3) {$0.14$}; 24 | \node[state, below right=1pt and 20pt of w22] (mult4) {$-0.64$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_3.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[state, below right=25pt and 20pt of mult1] (sum1) {$0.96$}; 28 | \node[state, below right=25pt and 20pt of mult3] (sum2) {$-0.5$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_4.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[state, below right=25pt and 20pt of mult1] (sum1) {$0.96$}; 28 | \node[state, below right=25pt and 20pt of mult3] (sum2) {$-0.5$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[state, above left=25pt and 10pt of sum3] (b1) {$0$}; 32 | \node[state, below left=25pt and 10pt of sum4] (b2) {$0$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_5.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[state, right=25pt of sum1] (sum3) {$0.96$}; 30 | \node[state, right=25pt of sum2] (sum4) {$-0.5$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_6.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[state, right=25pt of sum3] (exp1) {$2.61$}; 36 | \node[state, right=25pt of sum4] (exp2) {$0.6$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_7.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[state, right=25pt of sum3] (exp1) {$2.61$}; 36 | \node[state, right=25pt of sum4] (exp2) {$0.6$}; 37 | \node[state, below right=65pt and 15pt of exp1] (sum5) {$3.22$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_8.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[state, right=35pt of exp1] (div1) {$0.81$}; 39 | \node[state, right=35pt of exp2] (div2) {$0.19$}; 40 | 41 | % log 42 | \node[op, right=25pt of div1] (log1) {$\log$}; 43 | \node[op, right=25pt of div2] (log2) {$\log$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/expanded_graph_9.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.6}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w11) {$w_{11}$}; 11 | \node[op, below=10pt of w11] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w12) {$w_{12}$}; 13 | \node[op, below=10pt of w12] (x2) {$x_{2}$}; 14 | 15 | \node[op, below=20pt of x2] (w21) {$w_{21}$}; 16 | \node[op, below=10pt of w21] (x11) {$x_{1}$}; 17 | \node[op, below=20pt of x11] (w22) {$w_{22}$}; 18 | \node[op, below=10pt of w22] (x22) {$x_{2}$}; 19 | 20 | % multiplication 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$}; 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$}; 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$}; 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$}; 25 | 26 | % sum 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$}; 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$}; 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$}; 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$}; 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$}; 33 | 34 | % exp 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$}; 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$}; 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$}; 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$}; 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$}; 40 | 41 | % log 42 | \node[state, right=25pt of div1] (log1) {$-0.207$}; 43 | \node[state, right=25pt of div2] (log2) {$-1.67$}; 44 | \node[op, right=25pt of log1] (mult5) {$*$}; 45 | \node[op, right=25pt of log2] (mult6) {$*$}; 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$}; 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$}; 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$}; 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$}; 50 | 51 | 52 | 53 | %edges 54 | \path[tedge] (w11) -- (mult1); 55 | \path[tedge] (x1) -- (mult1); 56 | \path[tedge] (w12) -- (mult2); 57 | \path[tedge] (x2) -- (mult2); 58 | \path[tedge] (w21) -- (mult3); 59 | \path[tedge] (x11) -- (mult3); 60 | \path[tedge] (w22) -- (mult4); 61 | \path[tedge] (x22) -- (mult4); 62 | 63 | \path[tedge] (mult1) -- (sum1); 64 | \path[tedge] (mult2) -- (sum1); 65 | \path[tedge] (mult3) -- (sum2); 66 | \path[tedge] (mult4) -- (sum2); 67 | \path[tedge] (sum1) -- (sum3); 68 | \path[tedge] (b1) -- (sum3); 69 | \path[tedge] (sum2) -- (sum4); 70 | \path[tedge] (b2) -- (sum4); 71 | 72 | \path[tedge] (sum3) -- (exp1); 73 | \path[tedge] (sum4) -- (exp2); 74 | \path[tedge] (exp1) -- (sum5); 75 | \path[tedge] (exp2) -- (sum5); 76 | \path[tedge] (exp1) -- (div1); 77 | \path[tedge] (exp2) -- (div2); 78 | \path[tedge] (sum5) -- (div1); 79 | \path[tedge] (sum5) -- (div2); 80 | 81 | 82 | \path[tedge] (div1) -- (log1); 83 | \path[tedge] (div2) -- (log2); 84 | \path[tedge] (log1) -- (mult5); 85 | \path[tedge] (y1) -- (mult5); 86 | \path[tedge] (log2) -- (mult6); 87 | \path[tedge] (y2) -- (mult6); 88 | \path[tedge] (mult5) -- (sum6); 89 | \path[tedge] (mult6) -- (sum6); 90 | \path[tedge] (sum6) -- (minus1); 91 | 92 | \end{tikzpicture} 93 | } % scalebox 94 | \end{figure} 95 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/fashionMNIST.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.05}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % nodes 10 | \node (shirt) 11 | {\includegraphics[width=.15\textwidth]{images/shirt.png}}; 12 | \node[textonly, below=1pt of shirt] (dimension0) {{\small$28\times 28$}}; 13 | \node[textonly, above right= 10pt and 90pt of shirt] (vector) {$\begin{bmatrix}0.34\\ \vdots \\0.06\end{bmatrix}$}; 14 | \node[textonly, above=1pt of vector] (x) {$\vect{x}$}; 15 | \node[textonly, below=1pt of vector] (dimension1) {{\small$784\times 1$}}; 16 | 17 | \node[textonly, below=30pt of vector] (y) {$y=1$}; 18 | 19 | \node[textonly, below=30pt of y] (one-hot) {$\begin{bmatrix}1\\ \vdots \\0\end{bmatrix}$}; 20 | 21 | \node[textonly, above=1pt of one-hot] (yvector) {$\vect{y}$}; 22 | \node[textonly, below=1pt of one-hot] (dimension2) {{\small$10\times 1$}}; 23 | 24 | 25 | 26 | % edges 27 | \path[tedge, orange!120, line width=1mm] (shirt) -- (vector); 28 | \path[tedge, orange!120, line width=1mm] (shirt) -- (y); 29 | \path[tedge, orange!120, line width=1mm] (shirt) -- (one-hot); 30 | 31 | 32 | 33 | \end{tikzpicture} 34 | } % scalebox 35 | \end{figure} 36 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/feature_engineering.tex: -------------------------------------------------------------------------------- 1 | \begin{center} 2 | \begin{tikzpicture} 3 | \node (original) at (1, 6) 4 | {\includegraphics[width=.15\textwidth]{images/original.png}}; 5 | \node (segmented) at (5, 6) 6 | {\includegraphics[width=.15\textwidth]{images/segmented.png}}; 7 | 8 | \node (circled) at (0, 3) 9 | {\includegraphics[width=.15\textwidth]{images/circled.png}}; 10 | \node (rected) at (3, 3) 11 | {\includegraphics[width=.15\textwidth]{images/rected.png}}; 12 | \node (ellipsed) at (6, 3) 13 | {\includegraphics[width=.15\textwidth]{images/ellipsed.png}}; 14 | 15 | \draw[->, thick] (original.east) -- (segmented.west); 16 | \draw[->, thick] (segmented.south) -- (circled.north); 17 | \draw[->, thick] (segmented.south) -- (rected.north); 18 | \draw[->, thick] (segmented.south) -- (ellipsed.north); 19 | \draw[->, thick] (circled.south) -- (0.5, 0.7); 20 | \draw[->, thick] (rected.south) -- (3, 0.7); 21 | \draw[->, thick] (ellipsed.south) -- (5.5, 0.7); 22 | 23 | \foreach \i in {0,...,12} 24 | { 25 | \pgfkeys{/pgf/number format/.cd,fixed,precision=0} 26 | \pgfmathsetmacro\myvalue{abs(rand) * 10} 27 | \draw[fill=blue!45!white] (0.5 * \i, 0) rectangle (0.5 + 0.5 * \i, 0.5) node[pos=.5]{\pgfmathprintnumber\myvalue}; 28 | } 29 | \end{tikzpicture} 30 | \end{center} -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/feature_map.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[scale=1.4,every node/.style={minimum size=1cm}, on grid] 6 | \draw[fill=cyan,opacity=1.2] (0,0) rectangle (3,3); 7 | \draw[draw=base03,thick] (0,0) grid (3,3); 8 | \node (00) at (0.5,2.5) {\large 12.0}; 9 | \node (01) at (1.5,2.5) {\large 12.0}; 10 | \node (02) at (2.5,2.5) {\large 17.0}; 11 | \node (10) at (0.5,1.5) {\large 10.0}; 12 | \node (11) at (1.5,1.5) {\large 17.0}; 13 | \node (12) at (2.5,1.5) {\large 19.0}; 14 | \node (20) at (0.5,0.5) {\large 9.0}; 15 | \node (21) at (1.5,0.5) {\large 6.0}; 16 | \node (22) at (2.5,0.5) {\large 14.0}; 17 | \end{tikzpicture} 18 | } % scalebox 19 | \end{figure} 20 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/log.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.5}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (times) {$\log$}; 9 | \node[op, left=20pt of times] (a) {$a$}; 10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$}; 11 | \node[textonly, right=0.1pt of da] {$=\frac{1}{a}$}; 12 | \node[textonly, right=0.1pt of times] {$=f(a)=\log(a)$}; 13 | 14 | % edges 15 | \path[tedge] (a) -- (times); 16 | \path[tedge] (a) -- (da); 17 | 18 | \end{tikzpicture} 19 | } % scalebox 20 | \end{figure} 21 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/lr_graph.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.8}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w1) {$w_{1}$}; 11 | \node[op, below=10pt of w1] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w2) {$w_{2}$}; 13 | \node[op, below=10pt of w2] (x2) {$x_{2}$}; 14 | 15 | % multiplication 16 | \node[op, below right=1pt and 40pt of w1] (mult1) {$*$}; 17 | \node[op, below right=1pt and 40pt of w2] (mult2) {$*$}; 18 | % sum 19 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 20 | \node[op, right=65pt of sum1] (sum2) {$+$}; 21 | \node[op, below=35pt of sum2] (minus) {$-1$}; 22 | \node[op, left=25pt of minus] (y) {$y$}; 23 | \node[op, right=45pt of sum2] (squ) {$squ$}; 24 | 25 | 26 | %edges 27 | \path[tedge] (w1) -- (mult1); 28 | \path[tedge] (x1) -- (mult1); 29 | \path[tedge] (w2) -- (mult2); 30 | \path[tedge] (x2) -- (mult2); 31 | 32 | \path[tedge] (mult1) -- (sum1); 33 | \path[tedge] (mult2) -- (sum1); 34 | \path[tedge] (sum1) -- (sum2); 35 | \path[tedge] (y) -- (minus); 36 | \path[tedge] (minus) -- (sum2); 37 | \path[tedge] (sum2) -- (squ); 38 | \end{tikzpicture} 39 | } % scalebox 40 | \end{figure} 41 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/lr_graph1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.8}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op2] (w1) {$w_{1}$}; 11 | \node[op, below=10pt of w1] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w2) {$w_{2}$}; 13 | \node[op, below=10pt of w2] (x2) {$x_{2}$}; 14 | 15 | % multiplication 16 | \node[op2, below right=1pt and 40pt of w1] (mult1) {$*$}; 17 | \node[op, below right=1pt and 40pt of w2] (mult2) {$*$}; 18 | % sum 19 | \node[op2, below right=25pt and 20pt of mult1] (sum1) {$+$}; 20 | \node[op2, right=65pt of sum1] (sum2) {$+$}; 21 | \node[op, below=35pt of sum2] (minus) {$-1$}; 22 | \node[op, left=25pt of minus] (y) {$y$}; 23 | \node[op2, right=45pt of sum2] (squ) {$squ$}; 24 | 25 | 26 | %edges 27 | \path[tedge] (w1) -- (mult1); 28 | \path[tedge] (x1) -- (mult1); 29 | \path[tedge] (w2) -- (mult2); 30 | \path[tedge] (x2) -- (mult2); 31 | 32 | \path[tedge] (mult1) -- (sum1); 33 | \path[tedge] (mult2) -- (sum1); 34 | \path[tedge] (sum1) -- (sum2); 35 | \path[tedge] (y) -- (minus); 36 | \path[tedge] (minus) -- (sum2); 37 | \path[tedge] (sum2) -- (squ); 38 | \end{tikzpicture} 39 | } % scalebox 40 | \end{figure} 41 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/lr_graph_grad.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | 9 | % input x and W 10 | \node[op] (w1) {$w_{1}$}; 11 | \node[op, below=10pt of w1] (x1) {$x_{1}$}; 12 | \node[op, below=20pt of x1] (w2) {$w_{2}$}; 13 | \node[op, below=10pt of w2] (x2) {$x_{2}$}; 14 | 15 | % multiplication 16 | \node[op, below right=1pt and 40pt of w1] (mult1) {$*$}; 17 | \node[op, below right=1pt and 40pt of w2] (mult2) {$*$}; 18 | % sum 19 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$}; 20 | \node[op, right=65pt of sum1] (sum2) {$+$}; 21 | \node[op, below=35pt of sum2] (minus) {$-1$}; 22 | \node[op, left=25pt of minus] (y) {$y$}; 23 | \node[op, right=45pt of sum2] (squ) {$squ$}; 24 | 25 | 26 | 27 | %gradients 1 28 | \visible<3->{\node[gradient, above=10pt of sum2] (dsum2) {$2(\hat{y} - y)$};} 29 | \visible<5->{\node[gradient, above=10pt of sum1] (dsum1) {$1$};} 30 | \visible<7->{\node[gradient, above=10pt of mult1] (dmult1) {$1$};} 31 | \visible<9->{\node[gradient, above=10pt of w1] (dw1) {$x_1$};} 32 | 33 | %gradients 2 34 | \visible<2->{\node[gradient2, above=35pt of squ] (dLdL) {$1$};} 35 | \visible<4->{\node[gradient2, above right=35pt and 15pt of dsum2] (dLdLpp) {$2(\hat{y} - y)$};} 36 | \visible<6->{\node[gradient2, above left=35pt and 15pt of dsum2] (dLdsum2) {$2(\hat{y} - y)$};} 37 | \visible<8->{\node[gradient2, above left=10pt and 25pt of dLdsum2] (dLdmult1) {$2(\hat{y} - y)$};} 38 | \visible<10->{\node[gradient2, left=25pt of dLdmult1] (dLdw1) {$2(\hat{y} - y)x_1$};} 39 | 40 | 41 | 42 | 43 | %edges 44 | \path[tedge] (w1) -- (mult1); 45 | \path[tedge] (x1) -- (mult1); 46 | \path[tedge] (w2) -- (mult2); 47 | \path[tedge] (x2) -- (mult2); 48 | 49 | \path[tedge] (mult1) -- (sum1); 50 | \path[tedge] (mult2) -- (sum1); 51 | \path[tedge] (sum1) -- (sum2); 52 | \path[tedge] (y) -- (minus); 53 | \path[tedge] (minus) -- (sum2); 54 | \path[tedge] (sum2) -- (squ); 55 | 56 | \visible<4->{\path[tedge] (dLdL) -- (dLdLpp);} 57 | \visible<4->{\path[tedge] (dsum2) -- (dLdLpp);} 58 | \visible<3->{\path[tedge] (sum2) -- (dsum2);} 59 | \visible<6->{\path[tedge] (dLdLpp) -- (dLdsum2);} 60 | 61 | % \visible<4->{\path[tedge] (dsum2) -- (dLdsum2);} 62 | \visible<6->{\path[tedge] (dsum1) -- (dLdsum2);} 63 | \visible<8->{\path[tedge] (dLdsum2) -- (dLdmult1);} 64 | \visible<8->{\path[tedge] (dmult1) -- (dLdmult1);} 65 | \visible<10->{\path[tedge] (dLdmult1) -- (dLdw1);} 66 | \visible<10->{\path[tedge] (dw1) -- (dLdw1);} 67 | 68 | 69 | \end{tikzpicture} 70 | } % scalebox 71 | \end{figure} 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/minus1.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.5}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (times) {$*-1$}; 9 | \node[op, left=20pt of times] (a) {$a$}; 10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$}; 11 | \node[textonly, right=0.1pt of da] {$=-1$}; 12 | \node[textonly, right=0.1pt of times] {$=f(a) =-a$}; 13 | 14 | % edges 15 | \path[tedge] (a) -- (times); 16 | 17 | \end{tikzpicture} 18 | } % scalebox 19 | \end{figure} 20 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/mult.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.2}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (times) {$\times$}; 9 | \node[op, above left=20pt of times] (a) {$a$}; 10 | \node[op, below left=20pt of times] (b) {$b$}; 11 | \node[gradient, above left=15pt and 20pt of a] (da) {$\frac{\partial f}{\partial a}$}; 12 | \node[gradient, below left=15pt and 20pt of b] (db) {$\frac{\partial f}{\partial b}$}; 13 | \node[textonly, right=0.1pt of da] {$=b$}; 14 | \node[textonly, right=0.1pt of db] {$=a$}; 15 | \node[textonly, right=0.1pt of times] {$=f(a,b) =a*b$}; 16 | 17 | % edges 18 | \path[tedge] (a) -- (times); 19 | \path[tedge] (b) -- (times); 20 | \path[tedge] (a) -- (db); 21 | \path[tedge] (b) -- (da); 22 | 23 | 24 | \end{tikzpicture} 25 | } % scalebox 26 | \end{figure} 27 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/multiple_paths.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.2}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (div) {$div$}; 9 | \node[textonly, right=0.1pt of div] {$=f(x,y)=\frac{x}{y}=\frac{a*c}{a+b}$}; 10 | \node[op, above left=20pt of div] (mult) {$*$}; 11 | \node[op, below left=20pt of div] (plus) {$+$}; 12 | \node[op, below=20pt of plus] (b2) {$b$}; 13 | \node[op, above=20pt of mult] (b1) {$c$}; 14 | \node[op, left=55pt of div] (a) {$a$}; 15 | \node[textonly, above right=5pt and 5pt of div] (inv1) {$\frac{\partial f}{\partial x} \frac{\partial x}{\partial a}$}; 16 | \node[textonly, above left=2pt and 2pt of a] (inv2) {}; 17 | \node[textonly, below right=5pt and 5pt of div] (inv3) {$\frac{\partial f}{\partial y} \frac{\partial y}{\partial a}$}; 18 | \node[textonly, below left=2pt and 2pt of a] (inv4) {}; 19 | 20 | 21 | % edges 22 | \path[tedge] (a) -- (mult); 23 | \path[tedge] (b1) -- (mult); 24 | \path[tedge] (a) -- (plus); 25 | \path[tedge] (b2) -- (plus); 26 | \path[tedge] (plus) -- (div); 27 | \path[tedge] (mult) -- (div); 28 | \path[tedge, nephritis!60, line width=1mm] (inv1) to [out=120,in=80] (inv2); 29 | \path[tedge, nephritis!60, line width=1mm] (inv3) to [out=-120,in=-80] (inv4); 30 | 31 | 32 | 33 | \end{tikzpicture} 34 | } % scalebox 35 | \end{figure} 36 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/perceptron.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.15}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (x2) {$x_2$}; 9 | \node[op, above=40pt of x2] (x1) {$x_1$}; 10 | \node[op, below=40pt of x2] (x3) {$x_3$}; 11 | \node[op, right=60pt of x2] (vk) {$v$}; 12 | \node[op, right=40pt of vk] (yk) {$\hat{y}$}; 13 | \node[textonly, below right=20pt of x3] (Synaptic) {Synaptic link}; 14 | \node[textonly, below right=50pt of vk] (Activation) {Activation link}; 15 | \node[textonly, above right=28pt and -46pt of yk] (f) {$\hat{y} = f(\vect{x};\vect{\theta})= g\left(\sum_{i=1}^{3} \theta_ix_i\right)$}; 16 | 17 | 18 | % edges 19 | \path[tedge] (x1) edge node[above=1.8pt] {$\theta_{1}$} (vk); 20 | \path[tedge] (x2) edge node[above=0.2pt] {$\theta_{2}$} (vk); 21 | \path[tedge] (x3) edge node[above=3.8pt] {$\theta_{3}$} (vk); 22 | \path[tedge] (vk) edge node[above=1pt] {{\Large$g$}} (yk) ; 23 | 24 | % info edges 25 | \draw[orange!120, line width=1mm] (Synaptic) to [out=150,in=0] (x3); 26 | \draw[orange!120, line width=1mm] (Synaptic) to [out=150,in=-100] (vk); 27 | 28 | \draw[orange!120, line width=1mm] (Activation) to [out=170,in=-40] (vk); 29 | \draw[orange!120, line width=1mm] (Activation) to [out=170,in=-100] (yk); 30 | 31 | 32 | 33 | \end{tikzpicture} 34 | } % scalebox 35 | \end{figure} 36 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/simple_example.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.0}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (z) {$z$}; 9 | \node[op, above left=50pt of z] (x) {$x$}; 10 | \node[op, below left=40pt of z] (y) {$y$}; 11 | \node[op, right=40pt of z] (u) {$u$}; 12 | 13 | 14 | % edges ============================= 15 | \path[tedge] (x) -- (z); 16 | \path[tedge] (y) -- (z); 17 | \path[tedge] (z) -- (u); 18 | \end{tikzpicture} 19 | } % scalebox 20 | \end{figure} 21 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/simple_example0.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.0}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (x) {$x$}; 9 | \node[op, right=40pt of x] (y) {$y$}; 10 | \node[op, right=40pt of y] (u) {$u$}; 11 | 12 | 13 | % edges ============================= 14 | \path[tedge] (x) -- (y); 15 | \path[tedge] (y) -- (u); 16 | \end{tikzpicture} 17 | } % scalebox 18 | \end{figure} 19 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/simple_exampleCR.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{0.7}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (x) {$x$}; 9 | \node[op, right=40pt of x] (y) {$y$}; 10 | \node[op, right=40pt of y] (u) {$u$}; 11 | 12 | 13 | % edges ============================= 14 | \path[tedge] (x) -- (y); 15 | \path[tedge] (y) -- (u); 16 | \end{tikzpicture} 17 | } % scalebox 18 | \end{figure} 19 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/soma.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.2}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (times) {$+$}; 9 | \node[op, above left=20pt of times] (a) {$a$}; 10 | \node[op, below left=20pt of times] (b) {$b$}; 11 | \node[gradient, above=15pt of a] (da) {$\frac{\partial f}{\partial a}$}; 12 | \node[gradient, below=15pt of b] (db) {$\frac{\partial f}{\partial b}$}; 13 | \node[textonly, right=0.1pt of da] {$=1$}; 14 | \node[textonly, right=0.1pt of db] {$=1$}; 15 | \node[textonly, right=0.1pt of times] {$=f(a,b) = a+b$}; 16 | 17 | % edges 18 | \path[tedge] (a) -- (times); 19 | \path[tedge] (b) -- (times); 20 | 21 | 22 | \end{tikzpicture} 23 | } % scalebox 24 | \end{figure} 25 | -------------------------------------------------------------------------------- /slides/backprop1/TikzFiles/squ.tex: -------------------------------------------------------------------------------- 1 | \begin{figure}[ht!] 2 | \centering 3 | 4 | \scalebox{1.5}{ 5 | \begin{tikzpicture}[auto] 6 | 7 | % operations ============================= 8 | \node[op] (times) {$squ$}; 9 | \node[op, left=20pt of times] (a) {$a$}; 10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$}; 11 | \node[textonly, right=0.1pt of da] {$=2a$}; 12 | \node[textonly, right=0.1pt of times] {$=f(a)=a^2$}; 13 | 14 | % edges 15 | \path[tedge] (a) -- (times); 16 | \path[tedge] (a) -- (da); 17 | 18 | \end{tikzpicture} 19 | } % scalebox 20 | \end{figure} 21 | -------------------------------------------------------------------------------- /slides/backprop1/all_imports.tex: -------------------------------------------------------------------------------- 1 | \usepackage[utf8]{inputenc} 2 | \usepackage[T1]{fontenc} 3 | \usepackage{lmodern} 4 | \usepackage{appendixnumberbeamer} 5 | \usepackage{hyperref} 6 | \usepackage{booktabs} 7 | \usepackage{bm} 8 | \usepackage[scale=2]{ccicons} 9 | \usepackage[outputdir=build]{minted} 10 | \usepackage{pgfplots} 11 | \usepackage{array,colortbl,xcolor} 12 | \usepgfplotslibrary{dateplot} 13 | \usepackage{setspace} 14 | \usepackage{etoolbox} 15 | \usepackage{xspace} 16 | \usepackage{tikz} 17 | \usetikzlibrary{shapes,arrows,positioning,fit,backgrounds} 18 | \usepackage{tkz-euclide} 19 | \usepackage{soul} 20 | \usepackage{ragged2e} 21 | \usepackage{algorithm,algorithmic} 22 | -------------------------------------------------------------------------------- /slides/backprop1/all_new_commands.tex: -------------------------------------------------------------------------------- 1 | \newcommand{\themename}{\textbf{\textsc{metropolis}}\xspace} 2 | \newcommand{\vect}[1]{\bm{#1}} 3 | \newcommand{\myprime}[1]{{#1}^{\prime}} 4 | \newcommand{\grad}[2]{\nabla_{#1} {#2}} 5 | \newcommand{\dotp}[2]{{#1}^{\top}{#2}} 6 | \newcommand{\dotpPright}[2]{{#1}^{\top}\left({#2}\right)} 7 | \newcommand{\outerp}[2]{\left({#1}\right){#2}^{\top}} 8 | \newcommand{\Jacobian}[2]{\frac{\partial #1}{\partial #2}} 9 | \newcommand{\Vocab}{\mathbb{V}} 10 | \DeclareMathOperator*{\argmin}{arg\,min} 11 | \DeclareMathOperator*{\argmax}{arg\,max} 12 | \DeclareMathOperator{\E}{\mathbb{E}} 13 | 14 | 15 | % Quote with author reference at the end 16 | \let\oldquote\quote 17 | \let\endoldquote\endquote 18 | \renewenvironment{quote}[2][] 19 | {\if\relax\detokenize{#1}\relax 20 | \def\quoteauthor{#2}% 21 | \else 22 | \def\quoteauthor{#2~---~#1}% 23 | \fi 24 | \oldquote} 25 | {\par\nobreak\smallskip\hfill(\quoteauthor)% 26 | \endoldquote\addvspace{\bigskipamount}} 27 | %----------------------------------------- 28 | -------------------------------------------------------------------------------- /slides/backprop1/definitions/colors.tex: -------------------------------------------------------------------------------- 1 | \definecolor{blue}{RGB}{159, 192, 176} 2 | \definecolor{blue2}{RGB}{38,139,210} 3 | \definecolor{green}{RGB}{160, 227, 127} 4 | \definecolor{green2}{RGB}{132, 164, 76} 5 | \definecolor{orange}{RGB}{243, 188, 125} 6 | \definecolor{red}{RGB}{253, 123, 84} 7 | \definecolor{nephritis}{RGB}{39, 174, 96} 8 | \definecolor{emerald}{RGB}{46, 204, 113} 9 | \definecolor{turquoise}{RGB}{39, 174, 96} 10 | \definecolor{green-sea}{RGB}{22, 160, 133} 11 | \definecolor{base02}{RGB}{7,54,66} 12 | \definecolor{base03}{RGB}{0,43,54} 13 | \definecolor{cyan}{RGB}{42,161,152} -------------------------------------------------------------------------------- /slides/backprop1/definitions/styles.tex: -------------------------------------------------------------------------------- 1 | % Tikzstyles for Computation Graphs 2 | 3 | % nodes 4 | \tikzstyle{noop} = [circle, draw=none, fill=red, minimum size = 10pt] 5 | \tikzstyle{op} = [circle, draw=red, line width=1.5pt, fill=red!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 6 | \tikzstyle{op2} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 7 | \tikzstyle{op3} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \scriptsize, minimum size = 7pt] 8 | \tikzstyle{placeholder} = [circle, draw=red, line width=1.5pt, fill=red!30, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 9 | \tikzstyle{state} = [circle, draw=blue, line width=1.5pt, fill=blue!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 10 | \tikzstyle{gradient} = [circle, draw=nephritis, line width=1.5pt, fill=nephritis!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 11 | \tikzstyle{gradient2} = [circle, draw=green2, line width=1.5pt, fill=green2!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 12 | \tikzstyle{textonly} = [draw=none, fill=none, text centered, font=\bf \normalsize] 13 | 14 | % edges 15 | % \tikzstyle{tedge} = [draw, thick, >=stealth, ->] 16 | \tikzstyle{tedge} = [draw, thick, >=latex, ->] 17 | \tikzstyle{tedge_dashed} = [draw, thick, >=latex, ->, dashed] 18 | 19 | % namedscope 20 | \tikzstyle{namedscope} = [circle, draw=orange, line width=1.5pt, fill=orange!60, align=center, inner sep=0pt] 21 | 22 | % \tikzstyle{container} = [draw=none, rectangle, dotted, inner ysep=1.5em] 23 | % \tikzstyle{novertex} = [draw=none, fill=none, text centered] 24 | % \tikzstyle{predicate} = [ellipse, draw, thick, text centered, rounded corners, minimum size=30pt] 25 | % \tikzstyle{aux} = [rectangle, draw, thick, text centered, rounded corners, minimum size=30pt] 26 | % \tikzstyle{ledge} = [draw, dashed, thick, >=stealth, ->] 27 | % \tikzstyle{pedge} = [draw, thick, >=stealth, ->] 28 | -------------------------------------------------------------------------------- /slides/backprop1/header.tex: -------------------------------------------------------------------------------- 1 | \title{MAC0460 - Introdução ao aprendizado de máquina \\ \vspace{0.2 cm} Back-propagation 1} 2 | \date{\today} 3 | 4 | \author{ 5 | Felipe Salvatore\\ 6 | \url{https://felipessalvatore.github.io/}\vspace{0.4 cm} 7 | \and\\ 8 | Nina S. T. Hirata\\ 9 | \url{https://www.ime.usp.br/~nina/} 10 | \vspace{0.4 cm} 11 | } 12 | 13 | \institute{\textbf{IME-USP}: Institute of Mathematics and Statistics, University of São Paulo} 14 | 15 | 16 | -------------------------------------------------------------------------------- /slides/backprop1/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop1/images/logo.png -------------------------------------------------------------------------------- /slides/backprop1/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt]{beamer} 2 | \usetheme{metropolis} 3 | % all imports 4 | \input{all_imports} 5 | 6 | \AtBeginEnvironment{quote}{\singlespacing} 7 | 8 | % new commands 9 | \input{all_new_commands} 10 | 11 | % definitions 12 | \input{definitions/colors} 13 | \input{definitions/styles} 14 | 15 | \input{header} 16 | 17 | \begin{document} 18 | \nocite{DeepLearningbook} 19 | 20 | \maketitle 21 | 22 | \section{Revisão: regressão logística} 23 | 24 | \begin{frame}{O problema de classificação} 25 | 26 | \begin{itemize} 27 | \item [] Em vários casos a função desconhecida $f:\mathbb{R}^{d} \rightarrow \mathbb{R}$ que queremos aproximar é uma \textbf{distribuição de probabilidade}. 28 | \vspace{0.3cm} 29 | \item[] Temos um vetor $\vect{x}$ e queremos saber a qual das classes $k_1, \dots, k_n$ ele pertence. Um modo de formular esse problema como um problema de apreendizado supervisionado é coletar um conjunto de dados $(\vect{x}_{1}, y_{1}), \dots ,(\vect{x}_{N}, y_{N})$ onde $y_i \in \{k_1, \dots, k_n\}$ e tentar estimar $p(y | \vect{x})$ por meio de uma família de modelos $p(y | \vect{x}; \vect{\theta})$. 30 | \end{itemize} 31 | \end{frame} 32 | 33 | 34 | \begin{frame}{Classificação com duas classes} 35 | Quando $y$ é uma variável binária definimos o modelo $p(y | \vect{x}; \vect{\theta})$ do seguinte modo: 36 | \Large{ 37 | \begin{align*} 38 | \hat{y} &= p(y=1| \vect{x}; \vect{\theta})\\ 39 | &= h(\vect{x}; \vect{\theta}) \\ 40 | &= \sigma(z)\\ 41 | \end{align*} 42 | } 43 | em que 44 | \begin{equation*} 45 | z = \dotp{\vect{w}}{\vect{x}} + b 46 | \end{equation*} 47 | 48 | \end{frame} 49 | 50 | \begin{frame}[fragile]{Revisão: função sigmoide} 51 | \input{TikzFiles/Sigmoid} 52 | \end{frame} 53 | 54 | \begin{frame}{Classificação} 55 | \input{TikzFiles/DFNclassification2} 56 | \end{frame} 57 | 58 | \begin{frame}{Classificação para várias classes} 59 | E quando $y$ é uma variável com $n$ valores definimos $p(y | \vect{x}; \vect{\theta})$ do seguinte modo: 60 | \Large{ 61 | \begin{align*} 62 | \hat{\vect{y}} &= p(y| \vect{x}; \vect{\theta})\\ 63 | &= h(\vect{x}; \vect{\theta}) \\ 64 | &= softmax(\vect{z})\\ 65 | \end{align*} 66 | } 67 | em que 68 | \begin{equation*} 69 | \vect{z} = \vect{W}\vect{x} + \vect{b} 70 | \end{equation*} 71 | 72 | \end{frame} 73 | 74 | \begin{frame}[fragile]{Revisão: função softmax} 75 | \input{TikzFiles/Softmax} 76 | \end{frame} 77 | 78 | \begin{frame}{Classificação} 79 | \input{TikzFiles/DFNclassification} 80 | \end{frame} 81 | 82 | 83 | \begin{frame}{Princípio da máxima verossimilhança} 84 | Os parâmetros $\vect{\theta}$ vão ser adaptados de modo que $p(y| \vect{x};\vect{\theta})$ seja a distribuição mais adequada para os dados 85 | \begin{equation*} 86 | (\vect{x}^{(1)},y^{(1)}), \dots, (\vect{x}^{(N)},y^{(N)}) 87 | \end{equation*} 88 | \end{frame} 89 | 90 | \begin{frame}{Classificação} 91 | A função que queremos maximizar é 92 | \Large{ 93 | \begin{align*} 94 | \mathcal{L}(\vect{\theta}) &= \E_{\vect{x},y \sim p_{data}} \log p(y| \vect{x}; \vect{\theta})\\ 95 | &= \frac{1}{N}\sum_{i=1}^{N}\log p(y^{(i)}| \vect{x}^{(i)}; \vect{\theta})\\ 96 | \end{align*} 97 | } 98 | \end{frame} 99 | 100 | \begin{frame}{Revisão: entropia} 101 | \input{TikzFiles/Entropy1} 102 | \end{frame} 103 | 104 | \begin{frame}{Revisão: entropia} 105 | \input{TikzFiles/Entropy2} 106 | \end{frame} 107 | 108 | \begin{frame}{Revisão: divergência Kullback-Leibler} 109 | \input{TikzFiles/KullbackLeibler} 110 | \end{frame} 111 | 112 | \begin{frame}{Revisão: entropia cruzada} 113 | \Large{ 114 | \begin{align*} 115 | CE(\vect{p},\vect{q}) &= H(\vect{p}) + D_{KL}(\vect{p}||\vect{q})\\ 116 | \vspace{0.2cm} 117 | &= -\sum_{i}\vect{p}_{i}\log(\vect{q}_{i}) 118 | \end{align*} 119 | } 120 | \vspace{0.2cm} 121 | \begin{equation*} 122 | \argmin_{\vect{q}} CE(\vect{p},\vect{q}) = \argmin_{\vect{q}} D_{KL}(\vect{p},\vect{q}) 123 | \end{equation*} 124 | \end{frame} 125 | 126 | \begin{frame}[fragile]{Entropia cruzada e verossimilhança} 127 | 128 | Assumindo que $\vect{y}$ é one-hot temos que: 129 | 130 | \Large{ 131 | \begin{align*} 132 | L(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)}) &= CE(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)})\\ 133 | &= -\sum_{k=1}^{n} \vect{y}^{(i)}_{k}\log p(y=k| \vect{x}^{(i)}; \vect{\theta})\\ 134 | &= - \log p(y^{(i)}| \vect{x}^{(i)}; \vect{\theta})\\ 135 | \end{align*} 136 | } 137 | \end{frame} 138 | 139 | \begin{frame}{Entropia cruzada e verossimilhança} 140 | E a função que queremos minimizar é 141 | \Large{ 142 | \begin{align*} 143 | J(\vect{\theta}) &= \frac{1}{N}\sum_{i=1}^{N} L(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)})\\ 144 | &= - \frac{1}{N}\sum_{i=1}^{N}\log p(y^{(i)}| \vect{x}^{(i)}; \vect{\theta})\\ 145 | &= - \mathcal{L}(\vect{\theta}) 146 | \end{align*} 147 | 148 | \vspace{0.2cm} 149 | \begin{equation*} 150 | \argmax_{\vect{\theta}} \mathcal{L}(\vect{\theta}) = \argmin_{\vect{\theta}} J(\vect{\theta}) 151 | \end{equation*} 152 | } 153 | \end{frame} 154 | 155 | \begin{frame}{Treinando um modelo} 156 | \Large{ 157 | \begin{itemize} 158 | \item $\hat{\vect{y}} = f(\vect{x}; \vect{\theta})$ 159 | \item $J(\vect{\theta}) = \frac{1}{m}\sum_{i=1}^{m} L(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)})$ 160 | \item \text{algum algoritmo de otimização (e.g., \textbf{SGD})}: 161 | \begin{equation*} 162 | \vect{\theta}^{novo} \leftarrow \vect{\theta}^{velho} - \eta \grad{\vect{\theta}}{J(\vect{\theta})} 163 | \end{equation*} 164 | \vspace{0.3cm} 165 | \end{itemize} 166 | } 167 | 168 | Vamos ver como computar $\grad{\vect{\theta}}{J(\vect{\theta})}$ de modo eficiente para uma função arbitrária $J$. 169 | 170 | \end{frame} 171 | 172 | \section{Grafo de computação (caso escalar)} 173 | 174 | \begin{frame}{Grafo de computação} 175 | 176 | Considere os seguintes conjuntos de funções: 177 | \Large{ 178 | \begin{itemize} 179 | \item $OP_1 = \{ \lambda x. -x, \lambda x. x^2, \lambda x. e^x, \lambda x. log(x), \lambda x. x \}$ 180 | \item $OP_2 = \{ \lambda xy. x + y, \lambda xy. x * y, \lambda xy. \frac{x}{y} \}$ 181 | \item $OP = OP_1 \cup OP_2$ 182 | \end{itemize} 183 | } 184 | \end{frame} 185 | 186 | \begin{frame}{Grafo de computação} 187 | Um grafo de computação definido em $OP$ $\mathcal{G} = (\mathcal{V}, \mathcal{E}_1, \mathcal{E}_2)$ é um grafo acíclico dirigido (DAG) tal que cada elemento $u \in \mathcal{V}$ indica uma variável, se $(x,y) \in \mathcal{E}_1$ então $f(x)=y$ onde $f \in OP_1 \cup \{g(x,\alpha) | \alpha \in \mathbb{R} , g \in OP_2\}$, e se $(x,y) \in \mathcal{E}_2$ então $f(x)=y$ onde $f \in \{g(\alpha, x) | \alpha \in \mathbb{R} , g \in OP_2\}$. 188 | 189 | \vspace{0.3cm} 190 | \begin{itemize} 191 | \item $Pa(x) = \{y \in \mathcal{V} | (y,x) \in \mathcal{E}_1 \cup \mathcal{E}_2 \}$. 192 | \item $S(x) = \{y \in \mathcal{V} | (x,y) \in \mathcal{E}_1 \cup \mathcal{E}_2 \}$. 193 | \end{itemize} 194 | 195 | \end{frame} 196 | 197 | 198 | \begin{frame}{Grafo de computação} 199 | 200 | \input{TikzFiles/simple_example0} 201 | \Large{ 202 | \begin{itemize} 203 | \item $y = x^2$ 204 | \item $u = e^{y}$ 205 | \end{itemize} 206 | } 207 | \end{frame} 208 | 209 | \begin{frame}{Grafo de computação} 210 | 211 | \input{TikzFiles/simple_example} 212 | \Large{ 213 | \begin{itemize} 214 | \item $z = x + y$ 215 | \item $u = \log(z)$ 216 | \end{itemize} 217 | } 218 | 219 | \end{frame} 220 | 221 | \begin{frame}{Grafo de computação} 222 | \Large{ 223 | Queremos representar uma função $L$ por um grafo definido em $OP$ pois as derivadas parciais das funções de $OP$ são simples de calcular. E com a \alert{a regra da cadeia} podemos combinar as derivadas das funções locais para obter a derivada parcial de $L$ com respeito a quaisquer parâmetros. 224 | } 225 | \end{frame} 226 | 227 | \begin{frame}{Grafo de computação} 228 | \Large{ 229 | Como todas as funções em $OP$ são diferenciáveis, podemos extender $\mathcal{G}$ em $\mathcal{G}^{\prime}$ adicionando todas as derivadas parciais dos filhos em relação aos pais junto com as respectivas dependências. 230 | } 231 | \end{frame} 232 | 233 | 234 | \begin{frame}{Extendendo o grafo de operações básicas: soma} 235 | \input{TikzFiles/soma} 236 | \end{frame} 237 | 238 | \begin{frame}{Extendendo o grafo de operações básicas: multiplicação} 239 | \input{TikzFiles/mult} 240 | \end{frame} 241 | 242 | \begin{frame}{Extendendo o grafo de operações básicas: divisão} 243 | \input{TikzFiles/div} 244 | \end{frame} 245 | 246 | \begin{frame}{Extendendo o grafo de operações básicas: negativo} 247 | \input{TikzFiles/minus1} 248 | \end{frame} 249 | 250 | \begin{frame}{Extendendo o grafo de operações básicas: exponenciação} 251 | \input{TikzFiles/exp} 252 | \end{frame} 253 | 254 | \begin{frame}{Extendendo o grafo de operações básicas: logarítimo} 255 | \input{TikzFiles/log} 256 | \end{frame} 257 | 258 | \begin{frame}{Extendendo o grafo de operações básicas: ao quadrado} 259 | \input{TikzFiles/squ} 260 | \end{frame} 261 | 262 | 263 | \begin{frame}{Regra da cadeia} 264 | \Large{ 265 | \begin{itemize} 266 | \item $f:\mathbb{R} \rightarrow\mathbb{R}$, $g:\mathbb{R} \rightarrow\mathbb{R}$. 267 | \item $y = g(x)$ 268 | \item $u = f(g(x)) = f(y)$ 269 | 270 | \end{itemize} 271 | 272 | \input{TikzFiles/simple_exampleCR} 273 | 274 | \[ 275 | \frac{\partial u}{\partial x} = \frac{\partial u}{\partial y} \frac{\partial y}{\partial x} 276 | \] 277 | } 278 | \end{frame} 279 | 280 | \begin{frame}{Aplicando a regra da cadeia} 281 | \input{TikzFiles/chain_rule_nodes} 282 | \Large{ 283 | \begin{itemize} 284 | \item $\frac{\partial u_{n}}{\partial u_{j}} = \frac{\partial u_{n}}{\partial u_{j+1}} \frac{\partial u_{j+1}}{\partial u_{j}}$ 285 | \end{itemize} 286 | } 287 | 288 | \end{frame} 289 | 290 | \begin{frame}{Exemplo 1: regressão linear} 291 | \Large{ 292 | \begin{align*} 293 | J(\vect{w}) & = \frac{1}{N}\sum_{i=1}^{N}L(y_{i}, \hat{y}_{i})\\ 294 | & = \frac{1}{N}\sum_{i=1}^{N}(\hat{y}_{i} - y_{i})^{2}\\ 295 | & = \frac{1}{N}\sum_{i=1}^{N}(\vect{w}^\top\vect{x}_{i} - y_{i})^{2}\\ 296 | \end{align*} 297 | } 298 | \end{frame} 299 | 300 | \begin{frame}{Simplificação} 301 | \Large{ 302 | \begin{itemize} 303 | \item $\vect{w} = \begin{bmatrix}w_{1} \\ w_{2}\end{bmatrix}$ 304 | 305 | \vspace{0.8cm} 306 | 307 | \item $\vect{x} = \begin{bmatrix}x_1 \\ x_2\end{bmatrix}$ 308 | 309 | \end{itemize} 310 | } 311 | \end{frame} 312 | 313 | 314 | 315 | \begin{frame}{Grafo de $L(\hat{y}, y)$} 316 | \input{TikzFiles/lr_graph} 317 | \end{frame} 318 | 319 | 320 | \begin{frame}{Caminho de $w_1$} 321 | \input{TikzFiles/lr_graph1} 322 | \end{frame} 323 | 324 | \begin{frame}{Derivade de $L$ em relação a $w_1$} 325 | \input{TikzFiles/lr_graph_grad} 326 | \end{frame} 327 | 328 | 329 | \begin{frame}{Regra da cadeia para várias variáveis} 330 | \Large{ 331 | \begin{itemize} 332 | \item $z = f(x,y)$ 333 | \item $x = f_{1}(a)$. 334 | \item $y = f_{2}(a)$ 335 | \end{itemize} 336 | \[ 337 | \frac{\partial z}{ \partial a} = \frac{\partial z}{\partial x} \frac{\partial x}{\partial a} + \frac{\partial z}{\partial y} \frac{\partial y}{\partial a} 338 | \] 339 | } 340 | \end{frame} 341 | 342 | 343 | \begin{frame}{Exemplo} 344 | \input{TikzFiles/multiple_paths} 345 | \end{frame} 346 | 347 | \begin{frame}{Exemplo 2: regressão logística} 348 | \Large{ 349 | \begin{equation*} 350 | \hat{\vect{y}} = softmax(\vect{W}\vect{x} + \vect{b}) 351 | \end{equation*} 352 | \begin{equation*} 353 | L(\vect{y},\hat{\vect{y}}) = CE(\vect{y},\hat{\vect{y}}) 354 | \end{equation*} 355 | 356 | 357 | \begin{equation*} 358 | L(\vect{y},\hat{\vect{y}}) = - \sum_{i}\vect{y}_{i} \log \left(\frac{exp(\sum_{k} \vect{W}_{i,k}\vect{x}_{k} + \vect{b}_{i})}{\sum_{j}exp(\sum_{k}\vect{W}_{j,k}\vect{x}_{k} + \vect{b}_{j})} \right) 359 | \end{equation*} 360 | } 361 | \end{frame} 362 | 363 | \begin{frame}{Simplificação} 364 | \Large{ 365 | \begin{itemize} 366 | \item $\begin{bmatrix}z_1\\z_2\end{bmatrix} = \begin{bmatrix}w_{11} & w_{12}\\w_{21} & w_{22}\end{bmatrix}* \begin{bmatrix}x_1\\x_2\end{bmatrix} + \begin{bmatrix}b_1\\b_2\end{bmatrix}$ 367 | 368 | \vspace{0.4cm} 369 | 370 | \item $\begin{bmatrix}h_1\\h_2\end{bmatrix} = \begin{bmatrix}exp(z_1)\\exp(z_2)\end{bmatrix}$ 371 | 372 | \vspace{0.4cm} 373 | 374 | \item $H = h_1 + h_2$ 375 | 376 | \vspace{0.4cm} 377 | 378 | \item $\begin{bmatrix}\hat{y}_1\\\hat{y}_2\end{bmatrix} = \begin{bmatrix}\frac{h_1}{H}\\\frac{h_2}{H}\end{bmatrix}$ 379 | \end{itemize} 380 | } 381 | \end{frame} 382 | 383 | 384 | 385 | \begin{frame}{Grafo de $L(\hat{\vect{y}}, \vect{y})$} 386 | \input{TikzFiles/expanded_graph_0} 387 | \end{frame} 388 | 389 | \begin{frame}{Caminho de $b_1$: 1} 390 | \input{TikzFiles/b1_path1} 391 | \end{frame} 392 | 393 | \begin{frame}{Caminho de $b_1$: 2} 394 | \input{TikzFiles/b1_path2} 395 | \end{frame} 396 | 397 | \begin{frame}{Caminho de $b_1$: 3} 398 | \input{TikzFiles/b1_path3} 399 | \end{frame} 400 | 401 | \begin{frame}{Derivada parcial de L com respeito a $b_1$: 1} 402 | \input{TikzFiles/b1_path1_grad} 403 | \end{frame} 404 | 405 | \begin{frame}{Derivada parcial de L com respeito a $b_1$: 2} 406 | \input{TikzFiles/b1_path2_grad} 407 | \end{frame} 408 | 409 | \begin{frame}{Derivada parcial de L com respeito a $b_1$: 3} 410 | \input{TikzFiles/b1_path3_grad} 411 | \end{frame} 412 | 413 | \begin{frame}{Derivada parcial de L com respeito a $b_1$} 414 | \Large{ 415 | \begin{align*} 416 | \frac{\partial L}{\partial b_1} &= -y_1 + y_1\frac{h_1}{H} + y_2\frac{h_1}{H}\\ 417 | \vspace{0.2cm} 418 | \visible<2->{&= y_1\left(\frac{h_1}{H} -1\right) + y_2\left(\frac{h_1}{H} -0\right)\\} 419 | \visible<3->{&= y_1\left(\hat{y}_1 -1\right) + y_2\left(\hat{y}_1 -0\right)\\} 420 | \visible<4->{&= \hat{y}_1 - y_1 \;\;\;\; \text{(quando} \;\; y \;\; \text{é um vetor one-hot)}} 421 | \end{align*} 422 | } 423 | \end{frame} 424 | 425 | \begin{frame}{Exemplo} 426 | \input{TikzFiles/examples_values} 427 | \end{frame} 428 | 429 | \begin{frame}{Forward} 430 | \input{TikzFiles/expanded_graph_0} 431 | \end{frame} 432 | 433 | \begin{frame}{Forward} 434 | \input{TikzFiles/expanded_graph_1} 435 | \end{frame} 436 | 437 | \begin{frame}{Forward} 438 | \input{TikzFiles/expanded_graph_2} 439 | \end{frame} 440 | 441 | \begin{frame}{Forward} 442 | \input{TikzFiles/expanded_graph_3} 443 | \end{frame} 444 | 445 | \begin{frame}{Forward} 446 | \input{TikzFiles/expanded_graph_4} 447 | \end{frame} 448 | 449 | \begin{frame}{Forward} 450 | \input{TikzFiles/expanded_graph_5} 451 | \end{frame} 452 | 453 | \begin{frame}{Forward} 454 | \input{TikzFiles/expanded_graph_6} 455 | \end{frame} 456 | 457 | \begin{frame}{Forward} 458 | \input{TikzFiles/expanded_graph_7} 459 | \end{frame} 460 | 461 | \begin{frame}{Forward} 462 | \input{TikzFiles/expanded_graph_8} 463 | \end{frame} 464 | 465 | \begin{frame}{Forward} 466 | \input{TikzFiles/expanded_graph_9} 467 | \end{frame} 468 | 469 | \begin{frame}{Forward} 470 | \input{TikzFiles/expanded_graph_10} 471 | \end{frame} 472 | 473 | \begin{frame}{Forward} 474 | \input{TikzFiles/expanded_graph_11} 475 | \end{frame} 476 | 477 | \begin{frame}{Forward} 478 | \input{TikzFiles/expanded_graph_12} 479 | \end{frame} 480 | 481 | \begin{frame}{Forward} 482 | \input{TikzFiles/expanded_graph_13} 483 | \end{frame} 484 | 485 | \begin{frame}{Backward} 486 | \input{TikzFiles/expanded_graph_14} 487 | \end{frame} 488 | 489 | 490 | \begin{frame}{Algoritmo de back-propagation (caso escalar)} 491 | \begin{algorithm}[H] 492 | \begin{algorithmic}[1] 493 | \STATE \textbf{Require:} Computational graph $\mathcal{G} = (\{ u_1, \dots, u_n \}, \mathcal{E}_1, \mathcal{E}_2)$, where $u_n$ is a leaf node. 494 | \STATE Initialize $grad\_table$, a data structure that will store the derivatives that have been computed (at the end $grad\_table[u_i] = \frac{\partial u_n}{\partial u_i}$). 495 | \STATE $grad\_table[u_n] \leftarrow 1$ 496 | \FOR{$j=n-1$ down to $1$} 497 | \STATE $grad\_table[u_j] \leftarrow \sum_{u_{i} \in S(u_{j})}grad\_table[u_i]\frac{\partial u_i}{\partial u_j}$ 498 | \ENDFOR 499 | \RETURN $grad\_table$ 500 | \end{algorithmic} 501 | \caption{Back-propagation (scalar case)} 502 | \label{alg:seq} 503 | \end{algorithm} 504 | \end{frame} 505 | 506 | 507 | \begin{frame}[allowframebreaks]{Referências} 508 | 509 | \bibliography{my_references} 510 | \bibliographystyle{abbrv} 511 | 512 | \end{frame} 513 | 514 | 515 | 516 | 517 | \end{document} -------------------------------------------------------------------------------- /slides/backprop1/my_references.bib: -------------------------------------------------------------------------------- 1 | % RL 2 | 3 | @Book{Sutton98a, 4 | Title = {Reinforcement Learning : An Introduction}, 5 | Author = {Richard S. Sutton and Andrew G. Barto}, 6 | Publisher = {MIT Press}, 7 | Year = {1998}, 8 | } 9 | 10 | % machine learning 11 | 12 | @book{DeepLearningbook, 13 | author = "Ian Goodfellow and Yoshua Bengio and Aaron Courville", 14 | title = "Deep Learning", 15 | year = "2017", 16 | publisher = "MIT Press", 17 | } 18 | 19 | @book{learningfromdata, 20 | title = {Learning From Data: A short course}, 21 | author = {Yaser S. Abu-Mostafa, Malik Magdon-Ismail, Hsuan-Tien Lin}, 22 | publisher = {AMLBook.com}, 23 | isbn = {1600490069, 978-1600490064}, 24 | year = {2012}, 25 | series = {}, 26 | edition = {1}, 27 | volume = {}, 28 | url = {http://gen.lib.rus.ec/book/index.php?md5=BCF7C1FF782654437CA474770AB041D5} 29 | } 30 | -------------------------------------------------------------------------------- /slides/backprop1/pdf/BackpropLecture1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop1/pdf/BackpropLecture1.pdf -------------------------------------------------------------------------------- /slides/backprop2/Makefile: -------------------------------------------------------------------------------- 1 | BASE_NAME := main 2 | BUILD_DIR := build 3 | PDF_NAME := BackpropLecture2.pdf 4 | 5 | PDFLATEX_OPTIONS = -halt-on-error -aux-directory=$(BUILD_DIR) -output-directory=$(BUILD_DIR) --shell-escape 6 | LATEX := latex 7 | PDFLATEX = pdflatex $(PDFLATEX_OPTIONS) 8 | BIBTEX := bibtex 9 | 10 | pdf: $(BASE_NAME).pdf 11 | 12 | $(BASE_NAME).pdf: $(BASE_NAME).tex 13 | mkdir -p $(BUILD_DIR) 14 | $(PDFLATEX) $< 15 | $(BIBTEX) $(BUILD_DIR)/$(BASE_NAME) 16 | $(PDFLATEX) $< 17 | $(PDFLATEX) $< 18 | $(PDFLATEX) $< 19 | cp $(BUILD_DIR)/$(BASE_NAME).pdf $(PDF_NAME) 20 | 21 | clean: 22 | rm -rf build $(PDF_NAME) 23 | -------------------------------------------------------------------------------- /slides/backprop2/all_imports.tex: -------------------------------------------------------------------------------- 1 | \usepackage[utf8]{inputenc} 2 | \usepackage[T1]{fontenc} 3 | \usepackage{lmodern} 4 | \usepackage{appendixnumberbeamer} 5 | \usepackage{hyperref} 6 | \usepackage{booktabs} 7 | \usepackage{amsmath} 8 | \usepackage{bm} 9 | \usepackage[scale=2]{ccicons} 10 | \usepackage[outputdir=build]{minted} 11 | \usepackage{pgfplots} 12 | \usepackage{array,colortbl,xcolor} 13 | \usepgfplotslibrary{dateplot} 14 | \usepackage{setspace} 15 | \usepackage{etoolbox} 16 | \usepackage{xspace} 17 | \usepackage{tikz} 18 | \usetikzlibrary{shapes,arrows,positioning,fit,backgrounds} 19 | \usepackage{tkz-euclide} 20 | \usepackage{soul} 21 | \usepackage{ragged2e} 22 | \usepackage{algorithm,algorithmic} 23 | -------------------------------------------------------------------------------- /slides/backprop2/all_new_commands.tex: -------------------------------------------------------------------------------- 1 | \newcommand{\themename}{\textbf{\textsc{metropolis}}\xspace} 2 | \newcommand{\vect}[1]{\bm{#1}} 3 | \newcommand{\myprime}[1]{{#1}^{\prime}} 4 | \newcommand{\grad}[2]{\nabla_{#1} {#2}} 5 | \newcommand{\dotp}[2]{{#1}^{\top}{#2}} 6 | \newcommand{\dotpPright}[2]{{#1}^{\top}\left({#2}\right)} 7 | \newcommand{\outerp}[2]{\left({#1}\right){#2}^{\top}} 8 | \newcommand{\Jacobian}[2]{\frac{\partial #1}{\partial #2}} 9 | \newcommand{\Vocab}{\mathbb{V}} 10 | \DeclareMathOperator*{\argmin}{arg\,min} 11 | \DeclareMathOperator*{\argmax}{arg\,max} 12 | \DeclareMathOperator{\E}{\mathbb{E}} 13 | 14 | 15 | % Quote with author reference at the end 16 | \let\oldquote\quote 17 | \let\endoldquote\endquote 18 | \renewenvironment{quote}[2][] 19 | {\if\relax\detokenize{#1}\relax 20 | \def\quoteauthor{#2}% 21 | \else 22 | \def\quoteauthor{#2~---~#1}% 23 | \fi 24 | \oldquote} 25 | {\par\nobreak\smallskip\hfill(\quoteauthor)% 26 | \endoldquote\addvspace{\bigskipamount}} 27 | %----------------------------------------- 28 | -------------------------------------------------------------------------------- /slides/backprop2/definitions/colors.tex: -------------------------------------------------------------------------------- 1 | \definecolor{blue}{RGB}{159, 192, 176} 2 | \definecolor{blue2}{RGB}{38,139,210} 3 | \definecolor{green}{RGB}{160, 227, 127} 4 | \definecolor{green2}{RGB}{132, 164, 76} 5 | \definecolor{orange}{RGB}{243, 188, 125} 6 | \definecolor{red}{RGB}{253, 123, 84} 7 | \definecolor{nephritis}{RGB}{39, 174, 96} 8 | \definecolor{emerald}{RGB}{46, 204, 113} 9 | \definecolor{turquoise}{RGB}{39, 174, 96} 10 | \definecolor{green-sea}{RGB}{22, 160, 133} 11 | \definecolor{base02}{RGB}{7,54,66} 12 | \definecolor{base03}{RGB}{0,43,54} 13 | \definecolor{cyan}{RGB}{42,161,152} -------------------------------------------------------------------------------- /slides/backprop2/definitions/styles.tex: -------------------------------------------------------------------------------- 1 | % Tikzstyles for Computation Graphs 2 | 3 | % nodes 4 | \tikzstyle{noop} = [circle, draw=none, fill=red, minimum size = 10pt] 5 | \tikzstyle{op} = [circle, draw=red, line width=1.5pt, fill=red!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 6 | \tikzstyle{op2} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 7 | \tikzstyle{op3} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \scriptsize, minimum size = 7pt] 8 | \tikzstyle{placeholder} = [circle, draw=red, line width=1.5pt, fill=red!30, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 9 | \tikzstyle{state} = [circle, draw=blue, line width=1.5pt, fill=blue!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 10 | \tikzstyle{gradient} = [circle, draw=nephritis, line width=1.5pt, fill=nephritis!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 11 | \tikzstyle{gradient2} = [circle, draw=green2, line width=1.5pt, fill=green2!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt] 12 | \tikzstyle{textonly} = [draw=none, fill=none, text centered, font=\bf \normalsize] 13 | 14 | % edges 15 | % \tikzstyle{tedge} = [draw, thick, >=stealth, ->] 16 | \tikzstyle{tedge} = [draw, thick, >=latex, ->] 17 | \tikzstyle{tedge_dashed} = [draw, thick, >=latex, ->, dashed] 18 | 19 | % namedscope 20 | \tikzstyle{namedscope} = [circle, draw=orange, line width=1.5pt, fill=orange!60, align=center, inner sep=0pt] 21 | 22 | % \tikzstyle{container} = [draw=none, rectangle, dotted, inner ysep=1.5em] 23 | % \tikzstyle{novertex} = [draw=none, fill=none, text centered] 24 | % \tikzstyle{predicate} = [ellipse, draw, thick, text centered, rounded corners, minimum size=30pt] 25 | % \tikzstyle{aux} = [rectangle, draw, thick, text centered, rounded corners, minimum size=30pt] 26 | % \tikzstyle{ledge} = [draw, dashed, thick, >=stealth, ->] 27 | % \tikzstyle{pedge} = [draw, thick, >=stealth, ->] 28 | -------------------------------------------------------------------------------- /slides/backprop2/header.tex: -------------------------------------------------------------------------------- 1 | \title{MAC0460 - Introdução ao aprendizado de máquina \\ \vspace{0.2 cm} Back-propagation 2} 2 | \date{\today} 3 | 4 | \author{ 5 | Felipe Salvatore\\ 6 | \url{https://felipessalvatore.github.io/}\vspace{0.4 cm} 7 | \and\\ 8 | Nina S. T. Hirata\\ 9 | \url{https://www.ime.usp.br/~nina/} 10 | \vspace{0.4 cm} 11 | } 12 | 13 | \institute{\textbf{IME-USP}: Institute of Mathematics and Statistics, University of São Paulo} 14 | 15 | 16 | -------------------------------------------------------------------------------- /slides/backprop2/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop2/images/logo.png -------------------------------------------------------------------------------- /slides/backprop2/my_references.bib: -------------------------------------------------------------------------------- 1 | % machine learning 2 | 3 | @book{DeepLearningbook, 4 | author = "Ian Goodfellow and Yoshua Bengio and Aaron Courville", 5 | title = "Deep Learning", 6 | year = "2017", 7 | publisher = "MIT Press", 8 | } 9 | 10 | @book{learningfromdata, 11 | title = {Learning From Data: A short course}, 12 | author = {Yaser S. Abu-Mostafa, Malik Magdon-Ismail, Hsuan-Tien Lin}, 13 | publisher = {AMLBook.com}, 14 | isbn = {1600490069, 978-1600490064}, 15 | year = {2012}, 16 | series = {}, 17 | edition = {1}, 18 | volume = {}, 19 | url = {http://gen.lib.rus.ec/book/index.php?md5=BCF7C1FF782654437CA474770AB041D5} 20 | } 21 | 22 | @article{DBLP:journals/corr/abs-1802-01528, 23 | author = {Terence Parr and 24 | Jeremy Howard}, 25 | title = {The Matrix Calculus You Need For Deep Learning}, 26 | journal = {CoRR}, 27 | volume = {abs/1802.01528}, 28 | year = {2018}, 29 | url = {http://arxiv.org/abs/1802.01528}, 30 | archivePrefix = {arXiv}, 31 | eprint = {1802.01528}, 32 | timestamp = {Thu, 01 Mar 2018 15:00:45 +0100}, 33 | biburl = {https://dblp.org/rec/bib/journals/corr/abs-1802-01528}, 34 | bibsource = {dblp computer science bibliography, https://dblp.org} 35 | } 36 | 37 | @misc{VectorCalculus, 38 | title = {{Vector Calculus} (in Mathematics for Machine Learning)}, 39 | howpublished = {\url{https://mml-book.github.io/book/chapter05.pdf}} 40 | } 41 | 42 | @misc{graphsbackprop, 43 | title = {{Computational Graphs, and Backpropagation} (course notes for NLP by Michael Collins)}, 44 | howpublished = {\url{http://www.cs.columbia.edu/~mcollins/ff2.pdf}} 45 | } -------------------------------------------------------------------------------- /slides/backprop2/pdf/BackpropLecture2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop2/pdf/BackpropLecture2.pdf -------------------------------------------------------------------------------- /slides/images/cc-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/images/cc-logo.png -------------------------------------------------------------------------------- /slides/install.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get install texlive-latex-base 2 | sudo apt-get install latex-beamer 3 | sudo apt-get install texlive-latex-extra 4 | sudo apt-get install texlive-fonts-extra 5 | sudo apt-get install texlive-science 6 | --------------------------------------------------------------------------------